12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- '''MobileNetV2 in PyTorch.
- See the paper "Inverted Residuals and Linear Bottlenecks:
- Mobile Networks for Classification, Detection and Segmentation" for more details.
- '''
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- class Block(nn.Module):
- '''expand + depthwise + pointwise'''
- def __init__(self, in_planes, out_planes, expansion, stride):
- super(Block, self).__init__()
- self.stride = stride
- planes = expansion * in_planes
- self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
- self.bn1 = nn.BatchNorm2d(planes)
- self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
- self.bn2 = nn.BatchNorm2d(planes)
- self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
- self.bn3 = nn.BatchNorm2d(out_planes)
- self.shortcut = nn.Sequential()
- if stride == 1 and in_planes != out_planes:
- self.shortcut = nn.Sequential(
- nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
- nn.BatchNorm2d(out_planes),
- )
- def forward(self, x):
- out = F.relu(self.bn1(self.conv1(x)))
- out = F.relu(self.bn2(self.conv2(out)))
- out = self.bn3(self.conv3(out))
- out = out + self.shortcut(x) if self.stride==1 else out
- return out
- class MobileNetV2(nn.Module):
- # (expansion, out_planes, num_blocks, stride)
- cfg = [(1, 16, 1, 1),
- (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
- (6, 32, 3, 2),
- (6, 64, 4, 2),
- (6, 96, 3, 1),
- (6, 160, 3, 2),
- (6, 320, 1, 1)]
- def __init__(self, input_channels, output_num):
- super(MobileNetV2, self).__init__()
- # NOTE: change conv1 stride 2 -> 1 for CIFAR10
- self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1, bias=False)
- self.bn1 = nn.BatchNorm2d(32)
- self.layers = self._make_layers(in_planes=32)
- self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
- self.bn2 = nn.BatchNorm2d(1280)
- self.linear = nn.Linear(1280, output_num)
- def _make_layers(self, in_planes):
- layers = []
- for expansion, out_planes, num_blocks, stride in self.cfg:
- strides = [stride] + [1]*(num_blocks-1)
- for stride in strides:
- layers.append(Block(in_planes, out_planes, expansion, stride))
- in_planes = out_planes
- return nn.Sequential(*layers)
- def forward(self, x):
- out = F.relu(self.bn1(self.conv1(x)))
- out = self.layers(out)
- out = F.relu(self.bn2(self.conv2(out)))
- # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
- out = F.avg_pool2d(out, 4)
- out = out.view(out.size(0), -1)
- out = self.linear(out)
- return out
- if __name__ == '__main__':
- import argparse
- parser = argparse.ArgumentParser(description='MobileNetV2 Implementation')
- parser.add_argument('--input_channels', default=3, type=int)
- parser.add_argument('--output_num', default=10, type=int)
- # parser.add_argument('--input_size', default=32, type=int)
- args = parser.parse_args()
-
- model = MobileNetV2(args.input_channels, args.output_num)
- tensor = torch.rand(1, args.input_channels, 32, 32)
- pred = model(tensor)
-
- print(model)
- print("Predictions shape:", pred.shape)
|