1.候选框分组
2. 确定每个分组的座标
1.候选框的分组:
im_size[1]:传入图片的宽度
box[0]:细粒度候选窗口的左上角横座标
boxes_table:把所有左上角横座标相同候选窗口放在一块,方便下面根据左上角候选窗口的横座标等位候选窗口的索引。
boxes_table=[[] for _ in range(self.im_size[1])]
for index, box in enumerate(text_proposals):
boxes_table[int(box[0])].append(index)
self.boxes_table=boxes_table
graph:方形矩阵,里面元素值为bool类型,行和列均为候选敞口的个数。初始化为0,如果两个候选窗口相关联设置为1。
graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool)#text_proposals.shape[0]proposal的个数
get_successions(index):找到跟index候选窗口相关联的所有候选窗口
succession_index:这些候选窗口中得分最大的那个。
is_succession_node(index, succession_index):跟一个窗口相关联的候选串口可能有很多,钱向搜索只保留最大分值的那个。
for index, box in enumerate(text_proposals):
#successions proposals的num
successions=self.get_successions(index)
if len(successions)==0:
continue
succession_index=successions[np.argmax(scores[successions])]
if self.is_succession_node(index, succession_index):
# NOTE: a box can have multiple successions(precursors) if multiple successions(precursors)
# have equal scores.
graph[index, succession_index]=True
return Graph(graph)
前面调用的get_successions(index)函数。找到最靠近该候选窗口的一组候选窗口。meet_v_iou为比较的方法。
def get_successions(self, index):
box=self.text_proposals[index]
results=[]
for left in range(int(box[0])+1, min(int(box[0])+TextLineCfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])):
adj_box_indices=self.boxes_table[left]
for adj_box_index in adj_box_indices:
if self.meet_v_iou(adj_box_index, index):
results.append(adj_box_index)
if len(results)!=0:
return results
return results
size_similarity(index1, index2):比较两个窗口的高度之比。
overlaps_v(index1, index2):高度上重合部分与短高之比。
def meet_v_iou(self, index1, index2):
def overlaps_v(index1, index2):
h1=self.heights[index1]
h2=self.heights[index2]
y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1])
y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3])
return max(0, y1-y0+1)/min(h1, h2)
def size_similarity(index1, index2):
h1=self.heights[index1]
h2=self.heights[index2]
return min(h1, h2)/max(h1, h2)
return overlaps_v(index1, index2)>=TextLineCfg.MIN_V_OVERLAPS and \
size_similarity(index1, index2)>=TextLineCfg.MIN_SIZE_SIM#TextLineCfg.MIN_V_OVERLAPS 0.6TextLineCfg.MIN_SIZE_SIM 0.6
is_succession_node(index, succession_index)前面所调用的函数。与get_successions(index)同理。
def is_succession_node(self, index, succession_index):
precursors=self.get_precursors(succession_index)
if self.scores[index]>=np.max(self.scores[precursors]):
return True
return False
def get_precursors(self, index):
box=self.text_proposals[index]
results=[]
for left in range(int(box[0])-1, max(int(box[0]-TextLineCfg.MAX_HORIZONTAL_GAP), 0)-1, -1):
adj_box_indices=self.boxes_table[left]
for adj_box_index in adj_box_indices:
if self.meet_v_iou(adj_box_index, index):
results.append(adj_box_index)
if len(results)!=0:
return results
return results
2. 确定每个分组的座标
def get_text_lines(self, text_proposals, scores, im_size):
"""
text_proposals:boxes
"""
# tp=text proposal
tp_groups=self.group_text_proposals(text_proposals, scores, im_size)#首先还是建图,获取到文本行由哪几个小框构成
text_lines=np.zeros((len(tp_groups), 8), np.float32)
for index, tp_indices in enumerate(tp_groups):
text_line_boxes=text_proposals[list(tp_indices)]#每个文本行的全部小框
X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2# 求每一个小框的中心x,y座标
Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
z1 = np.polyfit(X,Y,1)#多项式拟合,根据之前求的中心店拟合一条直线(最小二乘)
x0=np.min(text_line_boxes[:, 0])#文本行x座标最小值
x1=np.max(text_line_boxes[:, 2])#文本行x座标最大值
offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5#小框宽度的一半
# 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x座标的极左极右对应的y座标
lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)#为什么加减offset
# 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x座标的极左极右对应的y座标
lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)
score=scores[list(tp_indices)].sum()/float(len(tp_indices))#求全部小框得分的均值作为文本行的均值
text_lines[index, 0]=x0
text_lines[index, 1]=min(lt_y, rt_y)#文本行上端 线段 的y座标的小值
text_lines[index, 2]=x1
text_lines[index, 3]=max(lb_y, rb_y)#文本行下端 线段 的y座标的大值
text_lines[index, 4]=score#文本行得分
text_lines[index, 5]=z1[0]#根据中心点拟合的直线的k,b
text_lines[index, 6]=z1[1]
height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )#小框平均高度
text_lines[index, 7]= height + 2.5
text_recs = np.zeros((len(text_lines), 9), np.float32)
index = 0
for line in text_lines:
b1 = line[6] - line[7] / 2 # 根据高度和文本行中心线,求取文本行上下两条线的b值
b2 = line[6] + line[7] / 2
x1 = line[0]
y1 = line[5] * line[0] + b1 # 左上
x2 = line[2]
y2 = line[5] * line[2] + b1 # 右上
x3 = line[0]
y3 = line[5] * line[0] + b2 # 左下
x4 = line[2]
y4 = line[5] * line[2] + b2 # 右下
disX = x2 - x1
disY = y2 - y1
width = np.sqrt(disX * disX + disY * disY) # 文本行宽度
fTmp0 = y3 - y1 # 文本行高度
fTmp1 = fTmp0 * disY / width
x = np.fabs(fTmp1 * disX / width) # 做补偿
y = np.fabs(fTmp1 * disY / width) #为什么这样做补偿求解释?
if line[5] < 0:
x1 -= x
y1 += y
x4 += x
y4 -= y
else:
x2 += x
y2 += y
x3 -= x
y3 -= y
text_recs[index, 0] = x1
text_recs[index, 1] = y1
text_recs[index, 2] = x2
text_recs[index, 3] = y2
text_recs[index, 4] = x3
text_recs[index, 5] = y3
text_recs[index, 6] = x4
text_recs[index, 7] = y4
text_recs[index, 8] = line[4]
index = index + 1
text_recs=clip_boxes(text_recs, im_size)
return text_recs