caffe用起来太笨重了,最近转到pytorch,用起来实在不要太方便,上手也非常快,这里贴一下pytorch官网上的两个小例程,掌握一下它的用法:
例程一:利用nn 这个module构建网络,实现一个图像分类的小功能;
链接:http://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
# -*- coding:utf-8 -*- import torch from torch.autograd import Variable import torchvision import torchvision.transforms as transforms #数据预处理:转换为Tensor,归一化,设置训练集和验证集以及加载子进程数目 transform = transforms.Compose([transforms.ToTensor() , transforms.Normalize((0.5 , 0.5 , 0.5) , (0.5 , 0.5 , 0.5))]) #前面参数是均值,后面是标准差 trainset = torchvision.datasets.CIFAR10(root = './data' , train = True , download = True , transform = transform) trainloader = torch.utils.data.DataLoader(trainset , batch_size = 4 , shuffle = True , num_workers =2) #num_works = 2表示使用两个子进程加载数据 testset = torchvision.datasets.CIFAR10(root = './data' , train = False , download = True , transform = transform) testloader = torch.utils.data.DataLoader(testset , batch_size = 4 , shuffle = True , num_workers = 2) classes = ('plane' , 'car' , 'bird' , 'cat' , 'deer' , 'dog' , 'frog' , 'horse' , 'ship' , 'truck') import matplotlib.pyplot as plt import numpy as np import pylab def imshow(img): img = img / 2 + 0.5 npimg = img.numpy() plt.imshow(np.transpose(npimg , (1 , 2 , 0))) pylab.show() dataiter = iter(trainloader) images , labels = dataiter.next() for i in range(4): p = plt.subplot() p.set_title("label: %5s" % classes[labels[i]]) imshow(images[i]) #构建网络 from torch.autograd import Variable import torch.nn as nn import torch.nn.functional as F import torch.optim as optim class Net(nn.Module): def __init__(self): super(Net , self).__init__() self.conv1 = nn.Conv2d(3 , 6 , 5) self.pool = nn.MaxPool2d(2 , 2) self.conv2 = nn.Conv2d(6 , 16 , 5) self.fc1 = nn.Linear(16 * 5 * 5 , 120) self.fc2 = nn.Linear(120 , 84) self.fc3 = nn.Linear(84 , 10) def forward(self , x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1 , 16 * 5 * 5) #利用view函数使得conv2层输出的16*5*5维的特征图尺寸变为400大小从而方便后面的全连接层的连接 x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() net.cuda() #define loss function criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters() , lr = 0.001 , momentum = 0.9) #train the Network for epoch in range(2): running_loss = 0.0 for i , data in enumerate(trainloader , 0): inputs , labels = data inputs , labels = Variable(inputs.cuda()) , Variable(labels.cuda()) optimizer.zero_grad() #forward + backward + optimizer outputs = net(inputs) loss = criterion(outputs , labels) loss.backward() optimizer.step() running_loss += loss.data[0] if i % 2000 == 1999: print('[%d , %5d] loss: %.3f' % (epoch + 1 , i + 1 , running_loss / 2000)) running_loss = 0.0 print('Finished Training') dataiter = iter(testloader) images , labels = dataiter.next() imshow(torchvision.utils.make_grid(images)) print('GroundTruth:' , ' '.join(classes[labels[j]] for j in range(4))) outputs = net(Variable(images.cuda())) _ , predicted = torch.max(outputs.data , 1) print('Predicted: ' , ' '.join('%5s' % classes[predicted[j]] for j in range(4))) correct = 0 total = 0 for data in testloader: images , labels = data outputs = net(Variable(images.cuda())) _ , predicted = torch.max(outputs.data , 1) correct += (predicted == labels.cuda()).sum() total += labels.size(0) print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) class_correct = torch.ones(10).cuda() class_total = torch.ones(10).cuda() for data in testloader: images , labels = data outputs = net(Variable(images.cuda())) _ , predicted = torch.max(outputs.data , 1) c = (predicted == labels.cuda()).squeeze() #print(predicted.data[0]) for i in range(4): label = labels[i] class_correct[label] += c[i] class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % (classes[i] , 100 * class_correct[i] / class_total[i]))
例程二:在resnet18的预训练模型上进行finetune,然后实现一个ants和bees的二分类功能:
链接:http://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
# -*- coding:utf-8 -*- from __future__ import print_function , division import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler from torch.autograd import Variable import numpy as np import torchvision from torchvision import datasets , models , transforms import matplotlib.pyplot as plt import time import os import pylab #data process data_transforms = { 'train' : transforms.Compose([ transforms.RandomSizedCrop(224) , transforms.RandomHorizontalFlip() , transforms.ToTensor() , transforms.Normalize([0.485 , 0.456 , 0.406] , [0.229 , 0.224 , 0.225]) ]) , 'val' : transforms.Compose([ transforms.Scale(256) , transforms.CenterCrop(224) , transforms.ToTensor() , transforms.Normalize([0.485 , 0.456 , 0.406] , [0.229 , 0.224 , 0.225]) ]) , } data_dir = 'hymenoptera_data' image_datasets = {x : datasets.ImageFolder(os.path.join(data_dir , x) , data_transforms[x]) for x in ['train' , 'val']} dataloders = {x : torch.utils.data.DataLoader(image_datasets[x] , batch_size = 4 , shuffle = True , num_workers = 4) for x in ['train' , 'val']} dataset_sizes = {x : len(image_datasets[x]) for x in ['train' , 'val']} class_names = image_datasets['train'].classes print(class_names) use_gpu = torch.cuda.is_available() #show several images def imshow(inp , title = None): inp = inp.numpy().transpose((1 , 2 , 0)) mean = np.array([0.485 , 0.456 , 0.406]) std = np.array([0.229 , 0.224 , 0.225]) inp = std * inp + mean inp = np.clip(inp , 0 , 1) plt.imshow(inp) if title is not None: plt.title(title) pylab.show() plt.pause(0.001) inputs , classes = next(iter(dataloders['train'])) out = torchvision.utils.make_grid(inputs) imshow(out , title = [class_names[x] for x in classes]) #train the model def train_model(model , criterion , optimizer , scheduler , num_epochs = 25): since = time.time() best_model_wts = model.state_dict() #Returns a dictionary containing a whole state of the module. best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch , num_epochs - 1)) print('-' * 10) #set the mode of model for phase in ['train' , 'val']: if phase == 'train': scheduler.step() #about lr and gamma model.train(True) #set model to training mode else: model.train(False) #set model to evaluate mode running_loss = 0.0 running_corrects = 0 #Iterate over data for data in dataloders[phase]: inputs , labels = data if use_gpu: inputs = Variable(inputs.cuda()) labels = Variable(labels.cuda()) else: inputs = Variable(inputs) lables = Variable(labels) optimizer.zero_grad() #forward outputs = model(inputs) _ , preds = torch.max(outputs , 1) loss = criterion(outputs , labels) #backward if phase == 'train': loss.backward() #backward of gradient optimizer.step() #strategy to drop running_loss += loss.data[0] running_corrects += torch.sum(preds.data == labels.data) epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects / dataset_sizes[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase , epoch_loss , epoch_acc)) if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = model.state_dict() print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60 , time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) model.load_state_dict(best_model_wts) return model #visualizing the model predictions def visualize_model(model , num_images = 6): images_so_far = 0 fig = plt.figure() for i , data in enumerate(dataloders['val']): inputs , labels = data if use_gpu: inputs , labels = Variable(inputs.cuda()) , Variable(labels.cuda()) else: inputs , labels = Variable(inputs) , Variable(labels) outputs = model(inputs) _ , preds = torch.max(outputs.data , 1) for j in range(inputs.size()[0]): images_so_far += 1 ax = plt.subplot(num_images // 2 , 2 , images_so_far) ax.axis('off') ax.set_title('predicted: {}'.format(class_names[preds[j]])) imshow(inputs.cpu().data[j]) if images_so_far == num_images: return #Finetuning the convnet from torchvision.models.resnet import model_urls model_urls['resnet18'] = model_urls['resnet18'].replace('https://' , 'http://') model_ft = models.resnet18(pretrained = True) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs , 2) if use_gpu: model_ft = model_ft.cuda() criterion = nn.CrossEntropyLoss() optimizer_ft = optim.SGD(model_ft.parameters() , lr = 0.001 , momentum = 0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft , step_size = 7 , gamma = 0.1) #start finetuning model_ft = train_model(model_ft , criterion , optimizer_ft , exp_lr_scheduler , num_epochs = 25) torch.save(model_ft.state_dict() , '/home/zf/resnet18.pth') visualize_model(model_ft)
当然finetune的话有两种方式:在这个例子里
(1)只修改最后一层全连接层,输出类数改为2,然后在预训练模型上进行finetune;
(2)固定全连接层前面的卷积层参数,也就是它们不反向传播,只对最后一层进行反向传播;实现的时候前面这些层的requires_grad就设为False就OK了;
代码见下:
model_conv = torchvision.models.resnet18(pretrained=True) for param in model_conv.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, 2) if use_gpu: model_conv = model_conv.cuda() criterion = nn.CrossEntropyLoss() # Observe that only parameters of final layer are being optimized as # opoosed to before. optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1) model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=25)
可以说,从构建网络,到训练网络,再到测试,由于完全是python风格,实在是太方便了~