这是一篇拖了两个月的blog,最近太懒啦,欠了好多债。今天晚上就还上一丢丢吧。
1.pytorch数据读入
主要是重写dataset类,继承一下,然后重写里面的函数,比如init,get_item,len
def __init__(self, root, usage='train', limit=0, train_ratio=0,
transform=None, target_transform=None):
super(dataset, self).__init__()
self.root = root
self.transform = transform
self.target_transform = target_transform
self.usage = usage # train,val,test,mining
self.limit = limit
self.image_name = []
if self.usage == 'train':
self.img_name_list = os.listdir(self.root + usage + '/')
csv_path = self.root + 'label/train_label.csv'
elif self.usage == 'validation':
csv_path = self.root + 'label/valid_label.csv'
elif self.usage == 'mining':
self.ratio = train_ratio
csv_path = self.root + 'label/mining_label.csv'
csv_path2 = self.root + 'label/train_label.csv'
if self.usage == 'test':
for img in os.listdir(self.root):
array = cv2.imread(self.root + img, cv2.IMREAD_COLOR)
self.data.append(array)
else:
csv_file = open(csv_path, 'r', encoding='utf-8')
csv_reader = csv.reader(csv_file)
cnt = 0
for img, label in csv_reader:
self.image_name.append(img)
self.labels.append(label)
cnt += 1
if cnt > self.limit:
break
if self.usage == 'mining':
self.limit = len(self.labels) * self.ratio
csv_file = open(csv_path2, 'r', encoding='utf-8')
csv_reader = csv.reader(csv_file)
rat = len(os.listdir(cf.dataset_path + 'train')) / self.limit
rat = int(rat)
for img, label in csv_reader:
rand = random.randint(1, rat)
if rand % rat == 0:
self.image_name.append(img)
self.labels.append(label)
cnt += 1
if cnt > self.limit:
break
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
img_name = self.image_name[index]
if not self.usage=='mining':
img = cv2.imread(self.root + self.usage + '/' + img_name, cv2.IMREAD_COLOR)
else:
img = cv2.imread(self.root + 'train' + '/' + img_name, cv2.IMREAD_COLOR)
img = Image.fromarray(img)
if self.usage == 'train':
target, filename = self.labels[index], img_name
elif self.usage == 'validation' or self.usage == 'subtest' or self.usage == 'mining':
target = self.labels[index]
if self.transform is not None:
num_rotate = np.random.randint(0, 4)
img = img.rotate(90 * num_rotate)
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
if self.usage == 'train':
return img, target, filename
elif self.usage == 'validation' or self.usage == 'subtest' or self.usage == 'mining':
return img, target
else: # self.usage == 'test'
return img
def __len__(self):
return len(self.labels)
2.提高gpu利用率
训练的时候发现gpu的利用率老是上不去,感觉瓶颈可能就是数据的读取上,测试了一下,确实,开线程数的大小还是有很大影响的,并不是越大越好,太大了之间会有竞争,通讯的开销之类的。
def test_dataloader():
'''
this function is degsined for choosing the best num_workers
:return: null
'''
for i in range(4, 16):
trainloader = torch.utils.data.DataLoader(trainset,batch_size=hp.batch_size,
shuffle = True,num_workers = i,pin_memory = True)
start_time = time.time()
for epoch in range(1, 2):
steps = len(trainloader)
print(steps)
data_iter = iter(trainloader)
for step in range(steps):
inputs, targets, filename = next(data_iter)
if USE_CUDA:
inputs = inputs.cuda()
targets = torch.FloatTensor(np.array(targets).astype(float)).cuda()
inputs, targets = Variable(inputs), Variable(targets)
end_time = time.time()
print("Finish with:{} second, num_workers={}".format(end_time - start_time, i))
3.穷人怎样用大的batchsize
只有单卡的穷人,想要用大的batchsize,但是又没钱加卡,就只能从软件层面来想办法,在pytorch里面就是让梯度不要一下子更新,写个循环,等循环完了再更新,不过,这里的bn 算的batchsize还是小的,所以,尽量少用bn,太不稳定了。
for step in range(steps):
inputs, targets, filename = next(data_iter)
if USE_CUDA:
inputs = inputs.cuda()
targets = torch.FloatTensor(np.array(targets).astype(float)).cuda()
inputs, targets = Variable(inputs), Variable(targets)
outputs = net(inputs)
outputs = torch.squeeze(outputs)
loss = criterion(outputs, targets)
loss.backward()
if (step+1) % accumulate_steps == 0:
optimizer.step()
optimizer.zero_grad()
batch_size = targets.shape[0]
filename_list = filename