CTPN细粒度候选框合并

2024年6月1日 148次阅读

1.候选框分组

2. 确定每个分组的座标

1.候选框的分组：

im_size[1]:传入图片的宽度

box[0]:细粒度候选窗口的左上角横座标

boxes_table:把所有左上角横座标相同候选窗口放在一块，方便下面根据左上角候选窗口的横座标等位候选窗口的索引。

        boxes_table=[[] for _ in range(self.im_size[1])]
        for index, box in enumerate(text_proposals):
            boxes_table[int(box[0])].append(index)
        self.boxes_table=boxes_table

graph:方形矩阵,里面元素值为bool类型，行和列均为候选敞口的个数。初始化为0，如果两个候选窗口相关联设置为1。

graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool)#text_proposals.shape[0]proposal的个数

get_successions(index)：找到跟index候选窗口相关联的所有候选窗口

succession_index：这些候选窗口中得分最大的那个。

is_succession_node(index, succession_index)：跟一个窗口相关联的候选串口可能有很多，钱向搜索只保留最大分值的那个。

        for index, box in enumerate(text_proposals):
            #successions  proposals的num
            successions=self.get_successions(index)
            if len(successions)==0:
                continue
            succession_index=successions[np.argmax(scores[successions])]
            if self.is_succession_node(index, succession_index):
                # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors)
                # have equal scores.
                graph[index, succession_index]=True
        return Graph(graph)

前面调用的get_successions(index)函数。找到最靠近该候选窗口的一组候选窗口。meet_v_iou为比较的方法。

    def get_successions(self, index):
            box=self.text_proposals[index]
            results=[]
            for left in range(int(box[0])+1, min(int(box[0])+TextLineCfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])):
                adj_box_indices=self.boxes_table[left]
                for adj_box_index in adj_box_indices:
                    if self.meet_v_iou(adj_box_index, index):
                        results.append(adj_box_index)
                if len(results)!=0:
                    return results
            return results

size_similarity(index1, index2)：比较两个窗口的高度之比。

overlaps_v(index1, index2)：高度上重合部分与短高之比。

    def meet_v_iou(self, index1, index2):
        def overlaps_v(index1, index2):
            h1=self.heights[index1]
            h2=self.heights[index2]
            y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1])
            y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3])
            return max(0, y1-y0+1)/min(h1, h2)

        def size_similarity(index1, index2):
            h1=self.heights[index1]
            h2=self.heights[index2]
            return min(h1, h2)/max(h1, h2)

        return overlaps_v(index1, index2)>=TextLineCfg.MIN_V_OVERLAPS and \
               size_similarity(index1, index2)>=TextLineCfg.MIN_SIZE_SIM#TextLineCfg.MIN_V_OVERLAPS 0.6TextLineCfg.MIN_SIZE_SIM 0.6

is_succession_node(index, succession_index)前面所调用的函数。与get_successions(index)同理。

    def is_succession_node(self, index, succession_index):
        precursors=self.get_precursors(succession_index)
        if self.scores[index]>=np.max(self.scores[precursors]):
            return True
        return False
    def get_precursors(self, index):
        box=self.text_proposals[index]
        results=[]
        for left in range(int(box[0])-1, max(int(box[0]-TextLineCfg.MAX_HORIZONTAL_GAP), 0)-1, -1):
            adj_box_indices=self.boxes_table[left]
            for adj_box_index in adj_box_indices:
                if self.meet_v_iou(adj_box_index, index):
                    results.append(adj_box_index)
            if len(results)!=0:
                return results
        return results

2. 确定每个分组的座标

    def get_text_lines(self, text_proposals, scores, im_size):
        """
        text_proposals:boxes
        
        """
        # tp=text proposal
        tp_groups=self.group_text_proposals(text_proposals, scores, im_size)#首先还是建图，获取到文本行由哪几个小框构成
        
        text_lines=np.zeros((len(tp_groups), 8), np.float32)

        for index, tp_indices in enumerate(tp_groups):
            text_line_boxes=text_proposals[list(tp_indices)]#每个文本行的全部小框
            X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2# 求每一个小框的中心x，y座标
            Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
            
            z1 = np.polyfit(X,Y,1)#多项式拟合，根据之前求的中心店拟合一条直线（最小二乘）

            x0=np.min(text_line_boxes[:, 0])#文本行x座标最小值
            x1=np.max(text_line_boxes[:, 2])#文本行x座标最大值

            offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5#小框宽度的一半

            # 以全部小框的左上角这个点去拟合一条直线，然后计算一下文本行x座标的极左极右对应的y座标
            lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)#为什么加减offset
            # 以全部小框的左下角这个点去拟合一条直线，然后计算一下文本行x座标的极左极右对应的y座标
            lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)

            score=scores[list(tp_indices)].sum()/float(len(tp_indices))#求全部小框得分的均值作为文本行的均值

            text_lines[index, 0]=x0
            text_lines[index, 1]=min(lt_y, rt_y)#文本行上端 线段 的y座标的小值
            text_lines[index, 2]=x1
            text_lines[index, 3]=max(lb_y, rb_y)#文本行下端 线段 的y座标的大值
            text_lines[index, 4]=score#文本行得分
            text_lines[index, 5]=z1[0]#根据中心点拟合的直线的k，b
            text_lines[index, 6]=z1[1]
            height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )#小框平均高度
            text_lines[index, 7]= height + 2.5

        text_recs = np.zeros((len(text_lines), 9), np.float32)
        index = 0
        for line in text_lines:
            b1 = line[6] - line[7] / 2  # 根据高度和文本行中心线，求取文本行上下两条线的b值
            b2 = line[6] + line[7] / 2
            x1 = line[0]
            y1 = line[5] * line[0] + b1  # 左上
            x2 = line[2]
            y2 = line[5] * line[2] + b1  # 右上
            x3 = line[0]
            y3 = line[5] * line[0] + b2  # 左下
            x4 = line[2]
            y4 = line[5] * line[2] + b2  # 右下
            disX = x2 - x1
            disY = y2 - y1
            width = np.sqrt(disX * disX + disY * disY)  # 文本行宽度

            fTmp0 = y3 - y1  # 文本行高度
            fTmp1 = fTmp0 * disY / width
            x = np.fabs(fTmp1 * disX / width)  # 做补偿  
            y = np.fabs(fTmp1 * disY / width)   #为什么这样做补偿求解释？
            if line[5] < 0:
                x1 -= x
                y1 += y
                x4 += x
                y4 -= y
            else:
                x2 += x
                y2 += y
                x3 -= x
                y3 -= y
            text_recs[index, 0] = x1
            text_recs[index, 1] = y1
            text_recs[index, 2] = x2
            text_recs[index, 3] = y2
            text_recs[index, 4] = x3
            text_recs[index, 5] = y3
            text_recs[index, 6] = x4
            text_recs[index, 7] = y4
            text_recs[index, 8] = line[4]
            index = index + 1

        text_recs=clip_boxes(text_recs, im_size)

        return text_recs