import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Hyper parameters num_epochs = 5 num_classes = 10 batch_size = 100 learning_rate = 0.001 # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Convolutional neural network (two convolutional layers) class ConvNet(nn.Module): def __init__(self, input_channel, num_classes): super(ConvNet, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(input_channel, 16, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(16), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2) ) # 28*28*1 -> 14*14*16 self.layer2 = nn.Sequential( nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2) ) # 14*14*16 -> 7*7*32 self.fc = nn.Linear(7*7*32, num_classes) def forward(self, input): out = self.layer1(input) out = self.layer2(out) out = out.reshape(out.size(0), -1) out = self.fc(out) return out model = ConvNet(1, num_classes).to(device) # Construct Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model model.train() total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint torch.save(model.state_dict(), 'model.ckpt')
# ---------------------------------------------------------------------------- # # An implementation of https://arxiv.org/pdf/1512.03385.pdf # # See section 4.2 for the model architecture on CIFAR-10 # # Some part of the code was referenced from below # # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py # # ---------------------------------------------------------------------------- # import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters num_epochs = 80 learning_rate = 0.001 # Image preprocessing modules transform = transforms.Compose([ transforms.Pad(4), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor()]) # CIFAR-10 dataset train_dataset = torchvision.datasets.CIFAR10(root='../../data/', train=True, transform=transform, download=True) test_dataset = torchvision.datasets.CIFAR10(root='../../data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False) # 3x3 convolution def conv3x3(in_channels, out_channels, stride=1): return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) # Residual block class ResidualBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1, downsample=None): super(ResidualBlock, self).__init__() self.conv1 = conv3x3(in_channels, out_channels, stride) self.bn1 = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(out_channels, out_channels) self.bn2 = nn.BatchNorm2d(out_channels) self.downsample = downsample def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample: residual = self.downsample(x) out += residual out = self.relu(out) return out # ResNet class ResNet(nn.Module): def __init__(self, block, layers, num_classes=10): super(ResNet, self).__init__() self.in_channels = 16 self.conv = conv3x3(3, 16) self.bn = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.layer1 = self.make_layer(block, 16, layers[0]) self.layer2 = self.make_layer(block, 32, layers[0], 2) self.layer3 = self.make_layer(block, 64, layers[1], 2) self.avg_pool = nn.AvgPool2d(8) self.fc = nn.Linear(64, num_classes) def make_layer(self, block, out_channels, blocks, stride=1): downsample = None if (stride != 1) or (self.in_channels != out_channels): downsample = nn.Sequential( conv3x3(self.in_channels, out_channels, stride=stride), nn.BatchNorm2d(out_channels)) layers = [] layers.append(block(self.in_channels, out_channels, stride, downsample)) self.in_channels = out_channels for i in range(1, blocks): layers.append(block(out_channels, out_channels)) return nn.Sequential(*layers) def forward(self, x): out = self.conv(x) out = self.bn(out) out = self.relu(out) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.avg_pool(out) out = out.view(out.size(0), -1) out = self.fc(out) return out model = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # For updating learning rate def update_lr(optimizer, lr): for param_group in optimizer.param_groups: param_group['lr'] = lr # Train the model total_step = len(train_loader) curr_lr = learning_rate for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}" .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Decay learning rate if (epoch+1) % 20 == 0: curr_lr /= 3 update_lr(optimizer, curr_lr) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the model on the test images: {} %'.format(100 * correct / total)) # Save the model checkpoint torch.save(model.state_dict(), 'resnet.ckpt')
# from torchsummary import summary
# summary(model, (3, 32, 32))
# https://github.com/lanpa/tensorboardX
# from tensorboardX import SummaryWriter
# dummy_input = torch.rand(1, 3, 32, 32).to(device)
# with SummaryWriter(comment='residual') as w:
# w.add_graph(model, (dummy_input, ))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 16, 32, 32] 432 BatchNorm2d-2 [-1, 16, 32, 32] 32 ReLU-3 [-1, 16, 32, 32] 0 Conv2d-4 [-1, 16, 32, 32] 2,304 BatchNorm2d-5 [-1, 16, 32, 32] 32 ReLU-6 [-1, 16, 32, 32] 0 Conv2d-7 [-1, 16, 32, 32] 2,304 BatchNorm2d-8 [-1, 16, 32, 32] 32 ReLU-9 [-1, 16, 32, 32] 0 ResidualBlock-10 [-1, 16, 32, 32] 0 Conv2d-11 [-1, 16, 32, 32] 2,304 BatchNorm2d-12 [-1, 16, 32, 32] 32 ReLU-13 [-1, 16, 32, 32] 0 Conv2d-14 [-1, 16, 32, 32] 2,304 BatchNorm2d-15 [-1, 16, 32, 32] 32 ReLU-16 [-1, 16, 32, 32] 0 ResidualBlock-17 [-1, 16, 32, 32] 0 Conv2d-18 [-1, 32, 16, 16] 4,608 BatchNorm2d-19 [-1, 32, 16, 16] 64 ReLU-20 [-1, 32, 16, 16] 0 Conv2d-21 [-1, 32, 16, 16] 9,216 BatchNorm2d-22 [-1, 32, 16, 16] 64 Conv2d-23 [-1, 32, 16, 16] 4,608 BatchNorm2d-24 [-1, 32, 16, 16] 64 ReLU-25 [-1, 32, 16, 16] 0 ResidualBlock-26 [-1, 32, 16, 16] 0 Conv2d-27 [-1, 32, 16, 16] 9,216 BatchNorm2d-28 [-1, 32, 16, 16] 64 ReLU-29 [-1, 32, 16, 16] 0 Conv2d-30 [-1, 32, 16, 16] 9,216 BatchNorm2d-31 [-1, 32, 16, 16] 64 ReLU-32 [-1, 32, 16, 16] 0 ResidualBlock-33 [-1, 32, 16, 16] 0 Conv2d-34 [-1, 64, 8, 8] 18,432 BatchNorm2d-35 [-1, 64, 8, 8] 128 ReLU-36 [-1, 64, 8, 8] 0 Conv2d-37 [-1, 64, 8, 8] 36,864 BatchNorm2d-38 [-1, 64, 8, 8] 128 Conv2d-39 [-1, 64, 8, 8] 18,432 BatchNorm2d-40 [-1, 64, 8, 8] 128 ReLU-41 [-1, 64, 8, 8] 0 ResidualBlock-42 [-1, 64, 8, 8] 0 Conv2d-43 [-1, 64, 8, 8] 36,864 BatchNorm2d-44 [-1, 64, 8, 8] 128 ReLU-45 [-1, 64, 8, 8] 0 Conv2d-46 [-1, 64, 8, 8] 36,864 BatchNorm2d-47 [-1, 64, 8, 8] 128 ReLU-48 [-1, 64, 8, 8] 0 ResidualBlock-49 [-1, 64, 8, 8] 0 AvgPool2d-50 [-1, 64, 1, 1] 0 Linear-51 [-1, 10] 650 ================================================================ Total params: 195,738 Trainable params: 195,738 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 3.63 Params size (MB): 0.75 Estimated Total Size (MB): 4.38 ----------------------------------------------------------------
import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters sequence_length = 28 input_size = 28 hidden_size = 128 num_layers = 2 num_classes = 10 batch_size = 100 num_epochs = 2 learning_rate = 0.01 # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Recurrent neural network (many-to-one) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): # Set initial hidden and cell states h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size) # Decode the hidden state of the last time step out = self.fc(out[:, -1, :]) return out model = RNN(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint torch.save(model.state_dict(), 'model.ckpt')
import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters sequence_length = 28 input_size = 28 hidden_size = 128 num_layers = 2 num_classes = 10 batch_size = 100 num_epochs = 2 learning_rate = 0.003 # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Bidirectional recurrent neural network (many-to-one) class BiRNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(BiRNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True) self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection def forward(self, x): # Set initial states h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # 2 for bidirection c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2) # Decode the hidden state of the last time step out = self.fc(out[:, -1, :]) return out model = BiRNN(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint torch.save(model.state_dict(), 'model.ckpt')
# Some part of the code was referenced from below. # https://github.com/pytorch/examples/tree/master/word_language_model import torch import torch.nn as nn import numpy as np from torch.nn.utils import clip_grad_norm from data_utils import Dictionary, Corpus # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters embed_size = 128 hidden_size = 1024 num_layers = 1 num_epochs = 5 num_samples = 1000 # number of words to be sampled batch_size = 20 seq_length = 30 learning_rate = 0.002 # Load "Penn Treebank" dataset corpus = Corpus() ids = corpus.get_data('data/train.txt', batch_size) vocab_size = len(corpus.dictionary) num_batches = ids.size(1) // seq_length # RNN based language model class RNNLM(nn.Module): def __init__(self, vocab_size, embed_size, hidden_size, num_layers): super(RNNLM, self).__init__() self.embed = nn.Embedding(vocab_size, embed_size) self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) self.linear = nn.Linear(hidden_size, vocab_size) def forward(self, x, h): # Embed word ids to vectors x = self.embed(x) # Forward propagate LSTM out, (h, c) = self.lstm(x, h) # Reshape output to (batch_size*sequence_length, hidden_size) out = out.reshape(out.size(0)*out.size(1), out.size(2)) # Decode hidden states of all time steps out = self.linear(out) return out, (h, c) model = RNNLM(vocab_size, embed_size, hidden_size, num_layers).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Truncated backpropagation def detach(states): return [state.detach() for state in states] # Train the model for epoch in range(num_epochs): # Set initial hidden and cell states states = (torch.zeros(num_layers, batch_size, hidden_size).to(device), torch.zeros(num_layers, batch_size, hidden_size).to(device)) for i in range(0, ids.size(1) - seq_length, seq_length): # Get mini-batch inputs and targets inputs = ids[:, i:i+seq_length].to(device) targets = ids[:, (i+1):(i+1)+seq_length].to(device) # Forward pass states = detach(states) outputs, states = model(inputs, states) loss = criterion(outputs, targets.reshape(-1)) # Backward and optimize model.zero_grad() loss.backward() clip_grad_norm(model.parameters(), 0.5) optimizer.step() step = (i+1) // seq_length if step % 100 == 0: print ('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}' .format(epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item()))) # Test the model with torch.no_grad(): with open('sample.txt', 'w') as f: # Set intial hidden ane cell states state = (torch.zeros(num_layers, 1, hidden_size).to(device), torch.zeros(num_layers, 1, hidden_size).to(device)) # Select one word id randomly prob = torch.ones(vocab_size) input = torch.multinomial(prob, num_samples=1).unsqueeze(1).to(device) for i in range(num_samples): # Forward propagate RNN output, state = model(input, state) # Sample a word id prob = output.exp() word_id = torch.multinomial(prob, num_samples=1).item() # Fill input with sampled word id for the next time step input.fill_(word_id) # File write word = corpus.dictionary.idx2word[word_id] word = '\n' if word == '<eos>' else word + ' ' f.write(word) if (i+1) % 100 == 0: print('Sampled [{}/{}] words and save to {}'.format(i+1, num_samples, 'sample.txt')) # Save the model checkpoints torch.save(model.state_dict(), 'model.ckpt')
import torch import os class Dictionary(object): def __init__(self): self.word2idx = {} self.idx2word = {} self.idx = 0 def add_word(self, word): if not word in self.word2idx: self.word2idx[word] = self.idx self.idx2word[self.idx] = word self.idx += 1 def __len__(self): return len(self.word2idx) class Corpus(object): def __init__(self): self.dictionary = Dictionary() def get_data(self, path, batch_size=20): # Add words to the dictionary with open(path, 'r') as f: tokens = 0 for line in f: words = line.split() + ['<eos>'] tokens += len(words) for word in words: self.dictionary.add_word(word) # Tokenize the file content ids = torch.LongTensor(tokens) token = 0 with open(path, 'r') as f: for line in f: words = line.split() + ['<eos>'] for word in words: ids[token] = self.dictionary.word2idx[word] token += 1 num_batches = ids.size(0) // batch_size ids = ids[:num_batches*batch_size] return ids.view(batch_size, -1)