pytorch 从入门到放弃之数据集读入、单卡用大batchsize

这是一篇拖了两个月的blog,最近太懒啦,欠了好多债。今天晚上就还上一丢丢吧。
1.pytorch数据读入
主要是重写dataset类,继承一下,然后重写里面的函数,比如init,get_item,len

    def __init__(self, root, usage='train', limit=0, train_ratio=0,
                 transform=None, target_transform=None):
        super(dataset, self).__init__()
        self.root = root
        self.transform = transform
        self.target_transform = target_transform
        self.usage = usage  # train,val,test,mining
        self.limit = limit
        self.image_name = []

        if self.usage == 'train':
            self.img_name_list = os.listdir(self.root + usage + '/')
            csv_path = self.root + 'label/train_label.csv'

        elif self.usage == 'validation':
            csv_path = self.root + 'label/valid_label.csv'

        elif self.usage == 'mining':
            self.ratio = train_ratio
            csv_path = self.root + 'label/mining_label.csv'
            csv_path2 = self.root + 'label/train_label.csv'
        if self.usage == 'test':
            for img in os.listdir(self.root):
                array = cv2.imread(self.root + img, cv2.IMREAD_COLOR)
                self.data.append(array)

        else:
            csv_file = open(csv_path, 'r', encoding='utf-8')
            csv_reader = csv.reader(csv_file)
            cnt = 0

            for img, label in csv_reader:

                self.image_name.append(img)
                self.labels.append(label)
                cnt += 1
                if cnt > self.limit:
                    break

        if self.usage == 'mining':

            self.limit = len(self.labels) * self.ratio
            csv_file = open(csv_path2, 'r', encoding='utf-8')
            csv_reader = csv.reader(csv_file)
            rat = len(os.listdir(cf.dataset_path + 'train')) / self.limit
            rat = int(rat)

            for img, label in csv_reader:
                rand = random.randint(1, rat)
                if rand % rat == 0:
                    self.image_name.append(img)
                    self.labels.append(label)
                    cnt += 1
                    if cnt > self.limit:
                        break

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """

        img_name = self.image_name[index]
        if not self.usage=='mining':
            img = cv2.imread(self.root + self.usage + '/' + img_name, cv2.IMREAD_COLOR)
        else:
            img = cv2.imread(self.root + 'train' + '/' + img_name, cv2.IMREAD_COLOR)

        img = Image.fromarray(img)

        if self.usage == 'train':
            target, filename = self.labels[index], img_name

        elif self.usage == 'validation' or self.usage == 'subtest' or self.usage == 'mining':
            target = self.labels[index]
        if self.transform is not None:
            num_rotate = np.random.randint(0, 4)
            img = img.rotate(90 * num_rotate)
            img = self.transform(img)

        if self.target_transform is not None:

            target = self.target_transform(target)

        if self.usage == 'train':
            return img, target, filename

        elif self.usage == 'validation' or self.usage == 'subtest' or self.usage == 'mining':
            return img, target

        else:  # self.usage == 'test'
            return img

    def __len__(self):
        return len(self.labels)

2.提高gpu利用率
训练的时候发现gpu的利用率老是上不去,感觉瓶颈可能就是数据的读取上,测试了一下,确实,开线程数的大小还是有很大影响的,并不是越大越好,太大了之间会有竞争,通讯的开销之类的。

def test_dataloader():
    '''
    this function is degsined for choosing the best num_workers
    :return: null
    '''
    for i in  range(4, 16):
        trainloader = torch.utils.data.DataLoader(trainset,batch_size=hp.batch_size,
                                                        shuffle = True,num_workers = i,pin_memory = True)
        start_time = time.time()
        for epoch in range(1, 2):
            steps = len(trainloader)
            print(steps)
            data_iter = iter(trainloader)



            for step in range(steps):
                inputs, targets, filename = next(data_iter)
                if USE_CUDA:
                    inputs = inputs.cuda()
                    targets = torch.FloatTensor(np.array(targets).astype(float)).cuda()

                inputs, targets = Variable(inputs), Variable(targets)
        end_time = time.time()
        print("Finish with:{} second, num_workers={}".format(end_time - start_time, i))

3.穷人怎样用大的batchsize
只有单卡的穷人,想要用大的batchsize,但是又没钱加卡,就只能从软件层面来想办法,在pytorch里面就是让梯度不要一下子更新,写个循环,等循环完了再更新,不过,这里的bn 算的batchsize还是小的,所以,尽量少用bn,太不稳定了。

    for step in range(steps):
        inputs, targets, filename = next(data_iter)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = torch.FloatTensor(np.array(targets).astype(float)).cuda()


        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        outputs = torch.squeeze(outputs)
        loss = criterion(outputs, targets)

        loss.backward()
        if (step+1) % accumulate_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        batch_size = targets.shape[0]
        filename_list = filename
    原文作者:涂山容容
    原文地址: https://www.jianshu.com/p/9dac9c9ea825
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞