
import paddle import paddle.nn as nn import numpy as np import copy #格式化代码 ctrl+alt+l class Identity(nn.Layer): def __init__(self): super().__init__() def forward(self, x): return x class MLP(nn.Layer): def __init__(self, embed_dim, mlp_ratio, dropout=0.): super().__init__() self.fc1 = nn.Linear(embed_dim, int(embed_dim * mlp_ratio)) self.fc2 = nn.Linear(int(embed_dim * mlp_ratio), embed_dim) self.act = nn.GELU() self.dorpout = nn.Dropout(dropout) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.dorpout(x) x = self.fc2(x) x = self.dorpout(x) return x # [4,3,224,224]->[4,196+2(token个数),768(embed_dim)] class PatchEmbedding(nn.Layer): def __init__(self, img_size=224, in_channels=3, patch_size=16, embed_dim=768, dropout=0.): super().__init__() n_patches = (img_size // patch_size) ** 2 #14*14个 self.patch_embedding = nn.Conv2D(in_channels=in_channels, out_channels=embed_dim, kernel_size=patch_size, stride=patch_size, bias_attr=False) self.dropout = nn.Dropout(dropout) self.cls_token = paddle.create_parameter(shape=[1, 1, embed_dim], dtype="float32", default_initializer=nn.initializer.Constant(0), ) self.distill_token = paddle.create_parameter(shape=[1, 1, embed_dim], dtype="float32", default_initializer=nn.initializer.TruncatedNormal(std=.02)) self.position_embeddings = paddle.create_parameter(shape=[1, n_patches + 2, embed_dim], dtype="float32", default_initializer=nn.initializer.TruncatedNormal(std=0.02)) def forward(self, x): # [n,c,h,w] cls_tokens = self.cls_token.expand([x.shape[0], -1, -1]) # xiecheng [4,1,768]yechengba? distill_tokens = self.distill_token.expand((x.shape[0], -1, -1)) x = self.patch_embedding(x) # [n,c',h',w'] ??为什么输入进去的是思维的,n是干嘛的到底[4 768 14 14] x = x.flatten(2) # 为什么这个没有提示[4,768,196] x = x.transpose([0, 2, 1]) # [n,h'*w',c'][4,196,768] x = paddle.concat([cls_tokens, distill_tokens, x], axis=1)#[4,198,768] out = x + self.position_embeddings out = self.dropout(out) return out class Attention(nn.Layer): def __init__(self, embed_dim, num_heads, qkv_bias=True, dropout=0., attention_dropout=0.): super().__init__() self.num_heads = num_heads self.head_dim = int(embed_dim / num_heads) # 为什么要转换一下类型嫩,因为算出来的是float self.all_head_dim = self.head_dim * self.num_heads self.scales = self.head_dim ** -0.5 # 不应该是all——head——dim吗?? self.qkv = nn.Linear(embed_dim, self.all_head_dim * 3) self.proj = nn.Linear(embed_dim, embed_dim) self.dropout = nn.Dropout(dropout) self.attention_dropout = nn.Dropout(attention_dropout) self.softmax = nn.Softmax(axis=-1) def transpose_multihead(self, x): # x: [N, num_patches, all_head_dim] -> [N, n_heads, num_patches, head_dim] new_shape = x.shape[:-1] + [self.num_heads, self.head_dim] # print("nshape到底长啥样:", new_shape)#[4, 198, 4, 192] x = x.reshape(new_shape) x = x.transpose([0, 2, 1, 3]) return x def forward(self, x): # [N,num_patches,dim] qkv = self.qkv(x).chunk(3, axis=-1) q, k, v = map(self.transpose_multihead, qkv) attn = paddle.matmul(q, k, transpose_y=True) attn = attn * self.scales attn = self.softmax(attn) attn = self.dropout(attn) out = paddle.matmul(attn, v) ##这两句没看懂 out = out.transpose([0, 2, 1, 3]) out = out.reshape(out.shape[:-2] + [self.all_head_dim]) out = self.proj(out) out = -self.dropout(out) return out class EncoderLayer(nn.Layer): # 先归一化,再接个个子层 def __init__(self, embed_dim=768, num_heads=4, qkv_bias=True, mlp_ratio=4.0, dropout=0., attention_dropout=0.): super().__init__() self.attn_norm = nn.LayerNorm(embed_dim) self.attn = Attention(embed_dim, num_heads) self.mlp_norm = nn.LayerNorm(embed_dim) self.mlp = MLP(embed_dim, mlp_ratio) def forward(self, x): h = x print("x:",x.shape) x = self.attn_norm(x) x = self.attn(x) x = h + x h = x x = self.mlp_norm(x) x = self.mlp(x) x = x + h return x class Encoder(nn.Layer): def __init__(self, embed_dim, depth): super().__init__() layer_list = [] for i in range(depth): encoder_layer = EncoderLayer() layer_list.append(encoder_layer) self.layers = nn.LayerList(layer_list) self.norm = nn.LayerNorm(embed_dim) def forward(self, x): for layer in self.layers: x = layer(x) x = self.norm(x) return x[:, 0], x[:, 1] # [4,198,emnbed_dim]中第一个和第二个是那两个特殊的token class DeiT(nn.Layer): def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=1000, embed_dim=768, depth=3, num_heads=8, mlp_ratio=4, qkv_bias=True, dropout=0., attention_dropout=0., droppath=0.): super().__init__() # [4, 3, 224, 224]->[4, 196 + 2(token个数), 768(embed_dim)] self.patch_embedding = PatchEmbedding(img_size=img_size, in_channels=in_channels, patch_size=patch_size, embed_dim=embed_dim, dropout=dropout) self.encoder=Encoder(embed_dim=embed_dim,depth=depth) self.head=nn.Linear(embed_dim,num_classes) self.head_distill=nn.Linear(embed_dim,num_classes) def forward(self,x): x=self.patch_embedding(x) x,x_distill=self.encoder(x) x=self.head(x) x_distill=self.head_distill(x_distill) if(self.training): return x,x_distill else: return (x+x_distill)/2 def main(): model=DeiT() # print(model) paddle.summary(model,(4,3,224,224)) if __name__=="__main__": main()
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)