(一)ResNet 残差网络
这是卷积神经网络中的巨佬,它能够将网络做到上百层。更深更大的网络。那他是怎么设计出来的呢?
(1)层数的叠加真的会提高精度吗?
答案是不见得的。如图所示,一般的网络是如左图所示的。随着网络的加大加深,神经网络能够学到的东西也越来越多。但是他们极容易偏离原来的小模型,学到一些奇怪的东西。而我们希望的是如右图那样,越大的模型会包含小的模型,不至于越走越远。那么ResNet具体是怎么做的呢?
(2)残差块的结构
如果用电路做比喻的话,以前我们学到的LeNet,AlexNet,VGG等都是串联组成的网络结构,googlenet中inception块采用了并联的思想。而残差网络采取的是一种短路的思想,就是让更大的函数,等于小函数的累加。可以理解为,大的网络学习的时候,最差也不会比小网络差。
(3) 残差块的细节
共有两种不同的ResNet块
- (1)高宽减半(stride=2)的ResNet块
- (2)接多个高宽不变的ResNet块
各种各样不同的残差块设计:
(4)ResNet的整体架构
它的整体架构和VGG还是非常相似的,可以分为5个stage,每个stage中替换为ResNet块。里面具体的细节还是通过代码看比较直观。
(二)代码实现
import torch
from torch import nn
from torchvision import transforms
import torchvision
from torch.utils import data
from d2l import torch as d2l
import numpy as np
import matplotlib.pyplot as plt
from torch.nn import functional as F
class Residual(nn.Module):
def __init__(self,in_channels,num_channels,use_1xconv=False,stride=1) -> None:
super().__init__()
self.conv1 = nn.Conv2d(in_channels,num_channels,kernel_size=3,stride=stride,padding=1)
self.conv2 = nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)
if use_1xconv:
self.conv3 = nn.Conv2d(in_channels,num_channels,kernel_size=1,stride=stride)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
# self.relu = nn.ReLU(inplace=True) # inplace这个参数就是原地修改,减少内存消耗
def forward(self,x):
y = F.relu(self.bn1(self.conv1(x)))
y = self.bn2(self.conv2(y))
if self.conv3:
x = self.conv3(x)
y += x
return F.relu(y)
# 来看一下经过 residual 的输入输出形状吧
res_blk = Residual(3,6,use_1xconv=True,stride=2)
x = torch.rand(4,3,6,6)
y = res_blk(x)
print(y.shape)
# 参数first_clovk是用来不让高宽减半的,作用于b2,需要注意的是in_channel = num_channel
# 其他的stage都需要第一个residual使得高宽减半
def resnet_block(in_channels, num_channels,num_residuals,first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels,num_channels,use_1xconv=True, stride=2))
else:
blk.append(Residual(num_channels,num_channels))
return blk
# 构造5个stage
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(*resnet_block(64,64,2,first_block=True))
b3 = nn.Sequential(*resnet_block(64,128,2))
b4 = nn.Sequential(*resnet_block(128,256,2))
b5 = nn.Sequential(*resnet_block(256,512,2))
net = nn.Sequential(b1,b2,b3,b4,b5,
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(),
nn.Linear(512,10)
)
print(net)
# 观察一下网络的形状的变化
x = torch.randn(2*224*224).reshape(2,1,224,224)
for layer in net:
x = layer(x)
print(layer.__class__.__name__,"output shape:\t\t",x.shape)
# 现在使用mnist数据集测试一下结果
def load_data_fashion_mnist(batch_size, resize=None):
"""下载或者加载Fashion-MNIST数据集"""
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(
root="../data/",
train=True,
transform=trans,
download=False # 要是没下载过就选择true
)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data/",
train=False,
transform=trans,
download=False # 要是没下载过就选择true
)
return (data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=0),
data.DataLoader(mnist_test,batch_size=batch_size,shuffle=True,num_workers=0))
# 超参数设置
batch_size = 128
learning_rate = 0.01
epochs = 20
# 加载数据
train_iter, test_iter = load_data_fashion_mnist(batch_size,resize=96)
d2l.train_ch6(net, train_iter, test_iter, epochs, learning_rate, d2l.try_gpu())