NMS在目标检测中非常常用,核心原理很容易理解,2017年开始相继出现了soft-NMS等等NMS的变体,优化的核心都是尽可能去除重复框的同时尽可能保证不漏检,以下是NMS的C++实现
static void sort(int n, const float* x, int* indices)
{
// 排序函数(降序排序),排序后进行交换的是indices中的数据
// n:排序总数// x:带排序数// indices:初始为0~n-1数目
int i, j;
for (i = 0; i < n; i++)
for (j = i + 1; j < n; j++)
{
if (x[indices[j]] > x[indices[i]])
{
//float x_tmp = x[i];
int index_tmp = indices[i];
//x[i] = x[j];
indices[i] = indices[j];
//x[j] = x_tmp;
indices[j] = index_tmp;
}
}
}
int nonMaximumSuppression(int numBoxes, const CvPoint *points,
const CvPoint *oppositePoints, const float *score,
float overlapThreshold,
int *numBoxesOut, CvPoint **pointsOut,
CvPoint **oppositePointsOut, float **scoreOut)
{
// numBoxes:窗口数目// points:窗口左上角座标点// oppositePoints:窗口右下角座标点
// score:窗口得分// overlapThreshold:重叠阈值控制// numBoxesOut:输出窗口数目
// pointsOut:输出窗口左上角座标点// oppositePoints:输出窗口右下角座标点
// scoreOut:输出窗口得分
int i, j, index;
float* box_area = (float*)malloc(numBoxes * sizeof(float)); // 定义窗口面积变量并分配空间
int* indices = (int*)malloc(numBoxes * sizeof(int)); // 定义窗口索引并分配空间
int* is_suppressed = (int*)malloc(numBoxes * sizeof(int)); // 定义是否抑制表标志并分配空间
// 初始化indices、is_supperssed、box_area信息
for (i = 0; i < numBoxes; i++)
{
indices[i] = i;
is_suppressed[i] = 0;
box_area[i] = (float)( (oppositePoints[i].x - points[i].x + 1) *
(oppositePoints[i].y - points[i].y + 1));
}
// 对输入窗口按照分数比值进行排序,排序后的编号放在indices中
sort(numBoxes, score, indices);
for (i = 0; i < numBoxes; i++) // 循环所有窗口
{
if (!is_suppressed[indices[i]]) // 判断窗口是否被抑制
{
for (j = i + 1; j < numBoxes; j++) // 循环当前窗口之后的窗口
{
if (!is_suppressed[indices[j]]) // 判断窗口是否被抑制
{
int x1max = max(points[indices[i]].x, points[indices[j]].x); // 求两个窗口左上角x座标最大值
int x2min = min(oppositePoints[indices[i]].x, oppositePoints[indices[j]].x); // 求两个窗口右下角x座标最小值
int y1max = max(points[indices[i]].y, points[indices[j]].y); // 求两个窗口左上角y座标最大值
int y2min = min(oppositePoints[indices[i]].y, oppositePoints[indices[j]].y); // 求两个窗口右下角y座标最小值
int overlapWidth = x2min - x1max + 1; // 计算两矩形重叠的宽度
int overlapHeight = y2min - y1max + 1; // 计算两矩形重叠的高度
if (overlapWidth > 0 && overlapHeight > 0)
{
float overlapPart = (overlapWidth * overlapHeight) / box_area[indices[j]]; // 计算重叠的比率
if (overlapPart > overlapThreshold) // 判断重叠比率是否超过重叠阈值
{
is_suppressed[indices[j]] = 1; // 将窗口j标记为抑制
}
}
}
}
}
}
*numBoxesOut = 0; // 初始化输出窗口数目0
for (i = 0; i < numBoxes; i++)
{
if (!is_suppressed[i]) (*numBoxesOut)++; // 统计输出窗口数目
}
*pointsOut = (CvPoint *)malloc((*numBoxesOut) * sizeof(CvPoint)); // 分配输出窗口左上角座标空间
*oppositePointsOut = (CvPoint *)malloc((*numBoxesOut) * sizeof(CvPoint)); // 分配输出窗口右下角座标空间
*scoreOut = (float *)malloc((*numBoxesOut) * sizeof(float)); // 分配输出窗口得分空间
index = 0;
for (i = 0; i < numBoxes; i++) // 遍历所有输入窗口
{
if (!is_suppressed[indices[i]]) // 将未发生抑制的窗口信息保存到输出信息中
{
(*pointsOut)[index].x = points[indices[i]].x;
(*pointsOut)[index].y = points[indices[i]].y;
(*oppositePointsOut)[index].x = oppositePoints[indices[i]].x;
(*oppositePointsOut)[index].y = oppositePoints[indices[i]].y;
(*scoreOut)[index] = score[indices[i]];
index++;
}
}
free(indices); // 释放indices空间
free(box_area); // 释放box_area空间
free(is_suppressed); // 释放is_suppressed空间
return LATENT_SVM_OK;
}
以下是python版本的实现
import numpy as np
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4] #bbox打分
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
#打分从大到小排列,取index
order = scores.argsort()[::-1]
#keep为最后保留的边框
keep = []
while order.size > 0:
#order[0]是当前分数最大的窗口,肯定保留
i = order[0]
keep.append(i)
#计算窗口i与其他所有窗口的交叠部分的面积
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
#交/并得到iou值
ovr = inter / (areas[i] + areas[order[1:]] - inter)
#inds为所有与窗口i的iou值小于threshold值的窗口的index,其他窗口此次都被窗口i吸收
inds = np.where(ovr <= thresh)[0]
#order里面只保留与窗口i交叠面积小于threshold的那些窗口,由于ovr长度比order长度少1(不包含i),所以inds+1对应到保留的窗口
order = order[inds + 1]
return keep
以下是soft-NMS的具体实现,由于score每次都需要改变,因为取消了排序的过程,两次循环中,先寻找当前score的最大值,和外层循环位置进行交换,再在第二层循环中对后续的框进行score衰减
def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep