- 前言
- 一、什么是ConvMixer?
- 1.网络结构图:
- 二、实现步骤
- 1.pytorch实现
- 2.keras实现
- 总结
前言
继现在炒得火热的Mixer 结构和Vit 结构,最近又出来了一种叫ConvMixer 的结构。真的是服气,刚出没多久,感觉是个新坑,可以踩一踩~。我记得9月10号的时候看见出来的,这两天代码刚开源。
官方链接:
论文地址:https://openreview.net/pdf?id=TVHS5Y4dNvM
Github 地址:https://github.com/tmp-iclr/convmixer
官方给的代码有点难懂,所以这里我给它重构了一下。看起来通俗易懂
GitHub 地址如下:https://github.com/jiantenggei/ConvMixer
一、什么是ConvMixer?ConvMixer,在思想上 与 Vit 和 MLP -Mixer 一致,都是把,通过卷积映射成一个一个特征块,然后就是输入到mixer 或者vit 结构中。
如下图所示:
表面上Vit 和MLP-Mixer 不包含卷积,但大多数实现方式在 embedding时,都会采用卷积。h代表hidden_dim 也就是隐藏层维度,n表示原图像的长宽,p代表patch_size。
这就是ConvMixer的网络结构图,结构很简单。在ConvMixer Layer 中, 使用了深度可分离卷积,GELU 激活函数,逐点卷积。
除了激活函数,卷积结构不就是MobileNet 里面的东西吗?
可能是为了降低模型的复杂性吧!。
但是我看见的还是残差结构(残差:一个能打的都没有~)
首先我们来定义 ConvMixer Layer 结构,代码如下所示:
class ConvMixerLayer(nn.Module):
def __init__(self,dim,kernel_size = 9):
super().__init__()
#残差结构
self.Resnet = nn.Sequential(
nn.Conv2d(dim,dim,kernel_size=kernel_size,groups=dim,padding='same'),
nn.GELU(),
nn.BatchNorm2d(dim)
)
#逐点卷积
self.Conv_1x1 = nn.Sequential(
nn.Conv2d(dim,dim,kernel_size=1),
nn.GELU(),
nn.BatchNorm2d(dim)
)
def forward(self,x):
x = x +self.Resnet(x)
x = self.Conv_1x1(x)
return x
实现过程非常简单,但这里它用得卷积核大小貌似有点大 9x9的卷积核。
定义好之后就可以直接用它来组建我们的完整网络了。代码如下所示:
class ConvMixer(nn.Module):
def __init__(self,dim,depth,kernel_size=9, patch_size=7, n_classes=1000):
super().__init__()
self.conv2d1 = nn.Sequential(
nn.Conv2d(3,dim,kernel_size=patch_size,stride=patch_size),
nn.GELU(),
nn.BatchNorm2d(dim)
)
self.ConvMixer_blocks =nn.ModuleList([])
for _ in range(depth):
self.ConvMixer_blocks.append(ConvMixerLayer(dim=dim,kernel_size=kernel_size))
self.head = nn.Sequential(
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(),
nn.Linear(dim,n_classes)
)
def forward(self,x):
#编码时的卷积
x = self.conv2d1(x)
#多层ConvMixer_block 的计算
for ConvMixer_block in self.ConvMixer_blocks:
x = ConvMixer_block(x)
#分类输出
x = self.head(x)
return x
分为编码时的卷积,多层ConvMixer_block 的计算 和分类输出。这就复现完了看看完整代码:
import torch
import torch.nn as nn
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class ConvMixerLayer(nn.Module):
def __init__(self,dim,kernel_size = 9):
super().__init__()
self.Resnet = nn.Sequential(
nn.Conv2d(dim,dim,kernel_size=kernel_size,groups=dim,padding='same'),
nn.GELU(),
nn.BatchNorm2d(dim)
)
self.Conv_1x1 = nn.Sequential(
nn.Conv2d(dim,dim,kernel_size=1),
nn.GELU(),
nn.BatchNorm2d(dim)
)
def forward(self,x):
x = x +self.Resnet(x)
x = self.Conv_1x1(x)
return x
class ConvMixer(nn.Module):
def __init__(self,dim,depth,kernel_size=9, patch_size=7, n_classes=1000):
super().__init__()
self.conv2d1 = nn.Sequential(
nn.Conv2d(3,dim,kernel_size=patch_size,stride=patch_size),
nn.GELU(),
nn.BatchNorm2d(dim)
)
self.ConvMixer_blocks =nn.ModuleList([])
for _ in range(depth):
self.ConvMixer_blocks.append(ConvMixerLayer(dim=dim,kernel_size=kernel_size))
self.head = nn.Sequential(
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(),
nn.Linear(dim,n_classes)
)
def forward(self,x):
x = self.conv2d1(x)
for ConvMixer_block in self.ConvMixer_blocks:
x = ConvMixer_block(x)
x = self.head(x)
return x
if __name__ == '__main__':
model = ConvMixer(dim=512,depth=1).to(device)
summary(model, (3, 224, 224))
2.keras实现
代码如下:
from keras.models import Model
from keras import backend as K
from tensorflow.keras.layers import DepthwiseConv2D,Add,BatchNormalization, Dense,Conv2D,GlobalAveragePooling2D,Flatten,Softmax,Activation
from tensorflow.python.keras.engine.input_layer import Input
# 还没有去实现
def ConvMixerLayer(input_layer,dim,kernel_size=9):
#深度可分离卷 跨链 结构
x = DepthwiseConv2D(kernel_size=kernel_size,padding='same')(input_layer)
x = Activation('gelu')(x)
x = BatchNormalization()(x)
x = Add()([input_layer,x])
#逐点卷积 结构
x = Conv2D(filters=dim,kernel_size=1)(x)
x = Activation('gelu')(x)
x = BatchNormalization()(x)
return x
def ConvMixer(input_shape=(224,224,3),dim=512,depth=1,kernel_size=9,patch_size=7,n_classes=1000):
input_img= Input(shape=input_shape)
# 第一个卷积块
x = Conv2D(filters=dim,kernel_size=patch_size,strides=patch_size)(input_img)
x = Activation('gelu')(x)
x = BatchNormalization()(x)
# 根据depth 的 ConvMixerLayer 结构
for _ in range(depth):
x = ConvMixerLayer(x,dim,kernel_size=kernel_size)
#全局平局池化
x =GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(n_classes)(x)
x = Softmax()(x)
return Model(input_img,x,name='ConvMixer')
if __name__ == '__main__':
model = ConvMixer()
model.summary()
总结
论文还没看,还没用个数据集去试试。后期做完工作再在这里补全。



