In [13]:
import torch
from torch import nn
import torchvision
from torchvision import models
from torchvision import datasets, transforms
from datetime import datetime
from utils import get_mean_and_std

In [2]:
print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

1.10.2
0.11.3
True
NVIDIA A10


## vgg

In [3]:
vgg = models.vgg16(pretrained=True)

In [4]:
vgg

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [5]:
from torchsummary import summary

In [6]:
summary(vgg, input_size=(3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [8]:
in_features = vgg.classifier[6].in_features
vgg.classifier[6] = nn.Linear(in_features, 10)
vgg

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [9]:
summary(vgg, input_size=(3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

## parameters

In [10]:
# dataset
# input_shape = 32
num_classes = 10

# hyper 
batch_size = 64
num_epochs = 5
learning_rate = 1e-3

# gpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:
device

device(type='cuda')

## dataset 与 dataloader

In [12]:
train_dataset = datasets.CIFAR10(root='../data/', 
                               download=True, 
                               train=True, 
                               transform=transforms.ToTensor())
test_dataset = datasets.CIFAR10(root='../data/', 
                               download=True, 
                               train=False, 
                               transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [14]:
get_mean_and_std(train_dataset)

==> Computing mean and std..


100%|██████████| 50000/50000 [00:37<00:00, 1321.94it/s]


(tensor([0.4914, 0.4822, 0.4465]), tensor([0.2023, 0.1994, 0.2010]))

In [16]:
transform = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))
])

In [17]:
train_dataset = datasets.CIFAR10(root='../data/', 
                               download=True, 
                               train=True, 
                               transform=transform)
test_dataset = datasets.CIFAR10(root='../data/', 
                               download=True, 
                               train=False, 
                               transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [18]:
train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                               shuffle=True, 
                                               batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                               shuffle=False, 
                                               batch_size=batch_size)

In [19]:
images, labels = next(iter(train_dataloader))

In [20]:
# batch_size, channels, h, w
images.shape

torch.Size([64, 3, 224, 224])

## model arch

In [21]:
vgg = models.vgg16(pretrained=True)
in_features = vgg.classifier[6].in_features
vgg.classifier[6] = nn.Linear(in_features, 10)
vgg = vgg.to(device)

## model train/fine-tune

In [24]:
criterion = nn.CrossEntropyLoss()
# optimzier = torch.optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.SGD(vgg.parameters(), lr = learning_rate, momentum=0.9,weight_decay=5e-4)
total_batch = len(train_dataloader)

In [25]:
for epoch in range(num_epochs):
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)
        
        # forward
        out = vgg(images)
        loss = criterion(out, labels)
        
        # 标准的处理，用 validate data；这个过程是监督训练过程，用于 early stop
        n_corrects = (out.argmax(axis=1) == labels).sum().item()
        acc = n_corrects/labels.size(0)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()   # 更细 模型参数
        
        if (batch_idx+1) % 100 == 0:
            print(f'{datetime.now()}, {epoch+1}/{num_epochs}, {batch_idx+1}/{total_batch}: {loss.item():.4f}, acc: {acc}')

2023-02-22 00:13:56.818252, 1/5, 100/782: 0.4080, acc: 0.859375
2023-02-22 00:14:28.367012, 1/5, 200/782: 0.4214, acc: 0.90625
2023-02-22 00:15:00.193995, 1/5, 300/782: 0.5746, acc: 0.828125
2023-02-22 00:15:32.204011, 1/5, 400/782: 0.3419, acc: 0.875
2023-02-22 00:16:04.007379, 1/5, 500/782: 0.3597, acc: 0.859375
2023-02-22 00:16:35.717492, 1/5, 600/782: 0.4721, acc: 0.796875
2023-02-22 00:17:07.485007, 1/5, 700/782: 0.1224, acc: 0.953125
2023-02-22 00:18:05.079364, 2/5, 100/782: 0.1760, acc: 0.921875
2023-02-22 00:18:36.859823, 2/5, 200/782: 0.1521, acc: 0.921875
2023-02-22 00:19:08.638372, 2/5, 300/782: 0.2511, acc: 0.90625
2023-02-22 00:19:40.422421, 2/5, 400/782: 0.1399, acc: 0.9375
2023-02-22 00:20:12.218806, 2/5, 500/782: 0.1915, acc: 0.90625
2023-02-22 00:20:44.003433, 2/5, 600/782: 0.1382, acc: 0.9375


KeyboardInterrupt: 

## model evaluation

In [27]:
total = 0
correct = 0
for images, labels in test_dataloader:
    images = images.to(device)
    labels = labels.to(device)
    out = vgg(images)
    preds = torch.argmax(out, dim=1)
    
    total += images.size(0)
    correct += (preds == labels).sum().item()
print(f'{correct}/{total}={correct/total}')

KeyboardInterrupt: 

## model save

In [None]:
torch.save(vgg.state_dict(), 'cnn_cifar.ckpt')