github: https://github.com/tensorboy/pytorch_Realtime_Multi-Person_Pose_Estimation
1 # -*- coding: utf-8 -* 2 import os 3 import re 4 import sys 5 import cv2 6 import math 7 import time 8 import scipy 9 import argparse 10 import matplotlib 11 import numpy as np 12 import pylab as plt 13 from joblib import Parallel, delayed 14 import util 15 import torch 16 import torch as T 17 import torch.nn as nn 18 import torch.nn.functional as F 19 from torch.autograd import Variable 20 from collections import OrderedDict 21 from config_reader import config_reader 22 from scipy.ndimage.filters import gaussian_filter 23 #parser = argparse.ArgumentParser() 24 #parser.add_argument('--t7_file', required=True) 25 #parser.add_argument('--pth_file', required=True) 26 #args = parser.parse_args() 27 28 torch.set_num_threads(torch.get_num_threads()) 29 weight_name = './model/pose_model.pth' 30 31 blocks = {} 32 # 从1开始算的limb,图对应:Pose Output Format 33 # find connection in the specified sequence, center 29 is in the position 15 34 limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \ 35 [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \ 36 [1,16], [16,18], [3,17], [6,18]] 37 38 # the middle joints heatmap correpondence 39 mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \ 40 [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \ 41 [55,56], [37,38], [45,46]] 42 43 # visualize 44 colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 45 [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 46 [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 47 48 # heatmap channel为19 表示关节点的score 49 # PAF channel为38 表示limb的单位向量 50 block0 = [{'conv1_1':[3,64,3,1,1]},{'conv1_2':[64,64,3,1,1]},{'pool1_stage1':[2,2,0]},{'conv2_1':[64,128,3,1,1]},{'conv2_2':[128,128,3,1,1]},{'pool2_stage1':[2,2,0]},{'conv3_1':[128,256,3,1,1]},{'conv3_2':[256,256,3,1,1]},{'conv3_3':[256,256,3,1,1]},{'conv3_4':[256,256,3,1,1]},{'pool3_stage1':[2,2,0]},{'conv4_1':[256,512,3,1,1]},{'conv4_2':[512,512,3,1,1]},{'conv4_3_CPM':[512,256,3,1,1]},{'conv4_4_CPM':[256,128,3,1,1]}] 51 52 blocks['block1_1'] = [{'conv5_1_CPM_L1':[128,128,3,1,1]},{'conv5_2_CPM_L1':[128,128,3,1,1]},{'conv5_3_CPM_L1':[128,128,3,1,1]},{'conv5_4_CPM_L1':[128,512,1,1,0]},{'conv5_5_CPM_L1':[512,38,1,1,0]}] 53 54 blocks['block1_2'] = [{'conv5_1_CPM_L2':[128,128,3,1,1]},{'conv5_2_CPM_L2':[128,128,3,1,1]},{'conv5_3_CPM_L2':[128,128,3,1,1]},{'conv5_4_CPM_L2':[128,512,1,1,0]},{'conv5_5_CPM_L2':[512,19,1,1,0]}] 55 56 for i in range(2,7): 57 blocks['block%d_1'%i] = [{'Mconv1_stage%d_L1'%i:[185,128,7,1,3]},{'Mconv2_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv3_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv4_stage%d_L1'%i:[128,128,7,1,3]}, 58 {'Mconv5_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv6_stage%d_L1'%i:[128,128,1,1,0]},{'Mconv7_stage%d_L1'%i:[128,38,1,1,0]}] 59 blocks['block%d_2'%i] = [{'Mconv1_stage%d_L2'%i:[185,128,7,1,3]},{'Mconv2_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv3_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv4_stage%d_L2'%i:[128,128,7,1,3]}, 60 {'Mconv5_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv6_stage%d_L2'%i:[128,128,1,1,0]},{'Mconv7_stage%d_L2'%i:[128,19,1,1,0]}] 61 62 def make_layers(cfg_dict): 63 layers = [] 64 for i in range(len(cfg_dict)-1): 65 one_ = cfg_dict[i] 66 for k,v in one_.iteritems(): 67 if 'pool' in k: 68 layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2] )] 69 else: 70 conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4]) 71 layers += [conv2d, nn.ReLU(inplace=True)] 72 one_ = cfg_dict[-1].keys() 73 k = one_[0] 74 v = cfg_dict[-1][k] 75 conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4]) 76 layers += [conv2d] 77 return nn.Sequential(*layers) 78 79 layers = [] 80 for i in range(len(block0)): 81 one_ = block0[i] 82 for k,v in one_.iteritems(): 83 if 'pool' in k: 84 layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2] )] 85 else: 86 conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4]) 87 layers += [conv2d, nn.ReLU(inplace=True)] 88 89 models = {} 90 models['block0']=nn.Sequential(*layers) 91 92 for k,v in blocks.iteritems(): 93 models[k] = make_layers(v) 94 95 class pose_model(nn.Module): 96 def __init__(self,model_dict,transform_input=False): 97 super(pose_model, self).__init__() 98 self.model0 = model_dict['block0'] 99 self.model1_1 = model_dict['block1_1'] 100 self.model2_1 = model_dict['block2_1'] 101 self.model3_1 = model_dict['block3_1'] 102 self.model4_1 = model_dict['block4_1'] 103 self.model5_1 = model_dict['block5_1'] 104 self.model6_1 = model_dict['block6_1'] 105 106 self.model1_2 = model_dict['block1_2'] 107 self.model2_2 = model_dict['block2_2'] 108 self.model3_2 = model_dict['block3_2'] 109 self.model4_2 = model_dict['block4_2'] 110 self.model5_2 = model_dict['block5_2'] 111 self.model6_2 = model_dict['block6_2'] 112 113 def forward(self, x): 114 out1 = self.model0(x) 115 116 out1_1 = self.model1_1(out1) 117 out1_2 = self.model1_2(out1) 118 out2 = torch.cat([out1_1,out1_2,out1],1) 119 120 out2_1 = self.model2_1(out2) 121 out2_2 = self.model2_2(out2) 122 out3 = torch.cat([out2_1,out2_2,out1],1) 123 124 out3_1 = self.model3_1(out3) 125 out3_2 = self.model3_2(out3) 126 out4 = torch.cat([out3_1,out3_2,out1],1) 127 128 out4_1 = self.model4_1(out4) 129 out4_2 = self.model4_2(out4) 130 out5 = torch.cat([out4_1,out4_2,out1],1) 131 132 out5_1 = self.model5_1(out5) 133 out5_2 = self.model5_2(out5) 134 out6 = torch.cat([out5_1,out5_2,out1],1) 135 136 out6_1 = self.model6_1(out6) 137 out6_2 = self.model6_2(out6) 138 139 return out6_1,out6_2 140 141 142 model = pose_model(models) 143 model.load_state_dict(torch.load(weight_name)) 144 model.cuda() 145 model.float() 146 model.eval() 147 148 param_, model_ = config_reader() 149 150 #torch.nn.functional.pad(img pad, mode='constant', value=model_['padValue']) 151 tic = time.time() 152 test_image = './sample_image/ski.jpg' 153 #test_image = 'a.jpg' 154 oriImg = cv2.imread(test_image) # B,G,R order 155 imageToTest = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(),0),2,3),1,2),volatile=True).cuda() 156 157 multiplier = [x * model_['boxsize'] / oriImg.shape[0] for x in param_['scale_search']] # 不同scale输入 158 159 heatmap_avg = torch.zeros((len(multiplier),19,oriImg.shape[0], oriImg.shape[1])).cuda() 160 paf_avg = torch.zeros((len(multiplier),38,oriImg.shape[0], oriImg.shape[1])).cuda() 161 #print heatmap_avg.size() 162 163 toc =time.time() 164 print 'time is %.5f'%(toc-tic) 165 tic = time.time() 166 for m in range(len(multiplier)): 167 scale = multiplier[m] 168 h = int(oriImg.shape[0]*scale) 169 w = int(oriImg.shape[1]*scale) 170 pad_h = 0 if (h%model_['stride']==0) else model_['stride'] - (h % model_['stride']) 171 pad_w = 0 if (w%model_['stride']==0) else model_['stride'] - (w % model_['stride']) 172 new_h = h+pad_h 173 new_w = w+pad_w 174 175 imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 176 imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_['stride'], model_['padValue']) 177 imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5 178 # (-0.5~0.5) 179 feed = Variable(T.from_numpy(imageToTest_padded)).cuda() 180 output1,output2 = model(feed) 181 print output1.size() 182 print output2.size() 183 heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2) # 对output上采样至原图大小 184 185 paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1) # 同理 186 187 heatmap_avg[m] = heatmap[0].data 188 paf_avg[m] = paf[0].data 189 190 191 toc =time.time() 192 print 'time is %.5f'%(toc-tic) 193 tic = time.time() 194 # 不同scale的heatmap和PAF取均值 195 heatmap_avg = T.transpose(T.transpose(T.squeeze(T.mean(heatmap_avg, 0)),0,1),1,2).cuda() 196 paf_avg = T.transpose(T.transpose(T.squeeze(T.mean(paf_avg, 0)),0,1),1,2).cuda() 197 heatmap_avg=heatmap_avg.cpu().numpy() 198 paf_avg = paf_avg.cpu().numpy() 199 toc =time.time() 200 print 'time is %.5f'%(toc-tic) 201 tic = time.time() 202 203 all_peaks = [] 204 peak_counter = 0 205 206 #maps = 207 # picture array is reversed 208 for part in range(18): # 18个关节点的featuremap 209 map_ori = heatmap_avg[:,:,part] 210 map = gaussian_filter(map_ori, sigma=3) 211 212 map_left = np.zeros(map.shape) 213 map_left[1:,:] = map[:-1,:] 214 map_right = np.zeros(map.shape) 215 map_right[:-1,:] = map[1:,:] 216 map_up = np.zeros(map.shape) 217 map_up[:,1:] = map[:,:-1] 218 map_down = np.zeros(map.shape) 219 map_down[:,:-1] = map[:,1:] 220 221 # 计算是否为局部极值 222 peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param_['thre1'])) 223 # peaks_binary = T.eq( 224 # peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0]) 225 226 peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 227 228 peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks] 229 id = range(peak_counter, peak_counter + len(peaks)) 230 peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 231 232 all_peaks.append(peaks_with_score_and_id) # 一个关节点featuremap上不同人的peak [[y, x, peak_score, id)],...] 233 peak_counter += len(peaks) 234 235 236 237 238 # 计算线性积分 采样10个点计算 239 connection_all = [] 240 special_k = [] 241 mid_num = 10 242 243 for k in range(len(mapIdx)): 244 score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]] # channel为2的paf_avg,表示PAF向量 245 candA = all_peaks[limbSeq[k][0]-1] #第k个limb中左关节点的候选集合A(不同人的关节点) 246 candB = all_peaks[limbSeq[k][1]-1] #第k个limb中右关节点的候选集合B(不同人的关节点) 247 nA = len(candA) 248 nB = len(candB) 249 # indexA, indexB = limbSeq[k] 250 if(nA != 0 and nB != 0): # 有候选时开始连接 251 connection_candidate = [] 252 for i in range(nA): 253 for j in range(nB): 254 vec = np.subtract(candB[j][:2], candA[i][:2]) 255 norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1]) 256 vec = np.divide(vec, norm) # 计算单位向量 257 258 startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 259 np.linspace(candA[i][1], candB[j][1], num=mid_num)) # 在A[i],B[j]连接线上采样mid_num个点 260 261 # 计算采样点的PAF向量 262 vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 263 for I in range(len(startend))]) 264 vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 265 for I in range(len(startend))]) 266 267 # 采样点的PAF向量与limb的单位向量计算余弦相似度score,内积 268 score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 269 score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0) 270 criterion1 = len(np.nonzero(score_midpts > param_['thre2'])[0]) > 0.8 * len(score_midpts) 271 criterion2 = score_with_dist_prior > 0 272 if criterion1 and criterion2: 273 # (i,j,score,score_all) 274 connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]]) 275 276 connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) # 按score排序 277 connection = np.zeros((0,5)) 278 for c in range(len(connection_candidate)): 279 i,j,s = connection_candidate[c][0:3] 280 if(i not in connection[:,3] and j not in connection[:,4]): 281 connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) # A_id, B_id, score, i, j 282 if(len(connection) >= min(nA, nB)): 283 break 284 285 connection_all.append(connection) # 多个符合当前limb的组合 [[A_id, B_id, score, i, j],...] 286 else: 287 special_k.append(k) 288 connection_all.append([]) 289 290 ''' 291 function: 关节点连成每个人的limb 292 subset: last number in each row is the total parts number of that person 293 subset: the second last number in each row is the score of the overall configuration 294 candidate: 候选关节点 295 connection_all: 候选limb 296 297 ''' 298 subset = -1 * np.ones((0, 20)) 299 candidate = np.array([item for sublist in all_peaks for item in sublist]) # 一个id的(y,x,score,id)(关节点) 300 301 for k in range(len(mapIdx)): 302 if k not in special_k: 303 partAs = connection_all[k][:,0] # 第k个limb,左端点的候选id集合 304 partBs = connection_all[k][:,1] # 第k个limb,右端点的候选id集合 305 indexA, indexB = np.array(limbSeq[k]) - 1 # 关节点index 306 307 for i in range(len(connection_all[k])): #= 1:size(temp,1) 308 found = 0 309 subset_idx = [-1, -1] 310 for j in range(len(subset)): #1:size(subset,1): 遍历subset里每个人,看当前两个关节点出现过几次 311 if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 312 subset_idx[found] = j 313 found += 1 314 315 if found == 1: # 在这个人的subset里连上这个limb 316 j = subset_idx[0] 317 if(subset[j][indexB] != partBs[i]): 318 subset[j][indexB] = partBs[i] 319 subset[j][-1] += 1 320 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 321 elif(subset[j][indexA] != partAs[i]): 322 subset[j][indexA] = partAs[i] 323 subset[j][-1] += 1 324 subset[j][-2] += candidate[partAs[i].astype(int), 2] + connection_all[k][i][2] 325 326 elif found == 2: # if found 2 and disjoint, merge them 327 j1, j2 = subset_idx 328 print "found = 2" 329 membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2] 330 if len(np.nonzero(membership == 2)[0]) == 0: 331 # 如果两个人的相同关节点没有在各自的subset中都连成limb,那么合并两个subset构成一个人 332 subset[j1][:-2] += (subset[j2][:-2] + 1) 333 subset[j1][-2:] += subset[j2][-2:] 334 subset[j1][-2] += connection_all[k][i][2] 335 subset = np.delete(subset, j2, 0) 336 # To-Do 这里有问题, 怎么合并才对? 337 # else: # as like found == 1 338 # subset[j1][indexB] = partBs[i] 339 # subset[j1][-1] += 1 340 # subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 341 342 # if find no partA in the subset, create a new subset 343 elif not found and k < 17: 344 row = -1 * np.ones(20) 345 row[indexA] = partAs[i] 346 row[indexB] = partBs[i] 347 row[-1] = 2 348 row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2] 349 subset = np.vstack([subset, row]) 350 351 # delete some rows of subset which has few parts occur 352 deleteIdx = []; 353 for i in range(len(subset)): 354 if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4: 355 deleteIdx.append(i) 356 subset = np.delete(subset, deleteIdx, axis=0) 357 358 canvas = cv2.imread(test_image) # B,G,R order 359 for i in range(18): 360 for j in range(len(all_peaks[i])): 361 cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) 362 363 stickwidth = 4 364 365 for i in range(17): 366 for n in range(len(subset)): 367 index = subset[n][np.array(limbSeq[i])-1] # limb的两个关节点index 368 if -1 in index: 369 continue 370 cur_canvas = canvas.copy() 371 Y = candidate[index.astype(int), 0] # 两个index点的纵坐标 372 X = candidate[index.astype(int), 1] # 两个index点的横坐标 373 mX = np.mean(X) 374 mY = np.mean(Y) 375 length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 376 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 377 polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1) 378 cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 379 canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 380 381 #Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18)) 382 383 toc =time.time() 384 print 'time is %.5f'%(toc-tic) 385 cv2.imwrite('result.png',canvas)