|
- #!/usr/bin/env python
- # coding: utf-8
- # # 尝试复现BEiT论文
- # !pip install yacs
-
- # # 开始论文复现
- # # 首先复现模型部分
- #
- # 先查找替换,把常见的api替换掉!要注意,Mindspore大部分是元祖,而飞桨里面很多习惯写列表
- # ```python
- # 飞桨 MindSpore
- # nn.Layer nn.Cell
- # nn.functional mindspore.ops
- # nn.Linear nn.Dense
- # F.linear 写个MindSpore版本的,叫mslinear ,原来mindspore有Dense啊!
- # forward construct
- # nn.Conv2D nn.Conv2d
- # dropout 0.0。dropout 1.0 有个1-x的关系
- # F.softmax(x) ops.Softmax(axis=-1)(x)
- # transpose 这个不用转换
- # @符号就是matmul ops.matmul
- # paddle.shape mindspore.ops.shape
- #
- # paddle.create_parameter(
- # shape=[self.num_relative_distance, num_heads],
- # dtype="float32",
- # default_initializer=zeros_,
- # 改成 mindspore.Parameter(ops.Zeros()((1, 1, embed_dim),mindspore.float32))`
- # paddle.linspace mindspore.ops.LinSpace() mindspore的linespace不能运行在cpu下
- # paddle.zeros([2,3]) ops.Zeros()((2, 3), mindspore.float32)
- # paddle.zeros_like ops.zeros_like
- # coords = paddle.stack(paddle.meshgrid([coords_h, coords_w])) # 2, Wh, Ww
- # paddle.meshgrid ops.Meshgrid(indexing="ij") 并将列表换成元组
- # paddle.stack ops.Stack()
- # paddle.flatten ops.Flatten()
- # coords_flatten.unsqueeze(axis=2 ) ops.ExpandDims()(coords_flatten, 2) 最终效果
- # relative_coords = ops.ExpandDims()(coords_flatten, 2) - ops.ExpandDims()(coords_flatten, 1)
- # cancat 用ops.Concat
- # expand ops.BroadcastTo
- # ```
- # In[54]:
-
-
- import numpy as np
- from mindspore import ops
- import mindspore.nn as nn
- from mindspore import Tensor
- from mindspore.ops import Identity
- # import mindspore.nn.functional as F
- import mindspore.ops as F
- import mindspore
- import mindspore.context as context
- # context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
- context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
- # print(f"MindSpore版本:{mindspore.__version__}")
-
- import numpy as np
- # 加入计算时间代码
- import time
- class Timer: #@save
- """记录多次运行时间"""
- def __init__(self):
- self.times = []
- self.start()
-
- def start(self):
- """启动计时器"""
- self.tik = time.time()
-
- def stop(self):
- """停止计时器并将时间记录在列表中"""
- self.times.append(time.time() - self.tik)
- return self.times[-1]
-
- def avg(self):
- """返回平均时间"""
- return sum(self.times) / len(self.times)
-
- def sum(self):
- """返回时间总和"""
- return sum(self.times)
-
- def cumsum(self):
- """返回累计时间"""
- return np.array(self.times).cumsum().tolist()
-
-
- class Benchmark:
- """用于测量运行时间"""
- def __init__(self, description='Done'):
- self.description = description
-
- def __enter__(self):
- self.timer = Timer()
- return self
-
- def __exit__(self, *args):
- print(f'{self.description}: {self.timer.stop():.4f} sec')
- # In[55]:
-
-
- def mslinear(x, weight, bias=None):
- '''MindSpore下的linear函数'''
- out = mindspore.ops.matmul(x,weight.T)
- if bias is not None:
- out += bias
- return out
-
-
-
- # Copyright (c) 2021 PPViT Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- """
- Droppath, reimplement from https://github.com/yueatsprograms/Stochastic_Depth
- """
- import mindspore
- import mindspore.nn as nn
-
-
- class DropPath(nn.Cell):
- """DropPath class"""
- def __init__(self, drop_prob=None):
- super().__init__()
- self.drop_prob = drop_prob
-
- def drop_path(self, inputs):
- """drop path op
- Args:
- input: tensor with arbitrary shape
- drop_prob: float number of drop path probability, default: 0.0
- training: bool, if current mode is training, default: False
- Returns:
- output: output tensor after drop path
- """
- # if prob is 0 or eval mode, return original input
- if self.drop_prob == 0. or not self.training:
- return inputs
- keep_prob = 1 - self.drop_prob
- keep_prob = mindspore.Tensor(keep_prob, dtype='float32')
- shape = (inputs.shape[0], ) + (1, ) * (inputs.ndim - 1) # shape=(N, 1, 1, 1)
- random_tensor = keep_prob + mindspore.numpy.rand(shape, dtype=inputs.dtype)
- random_tensor = random_tensor.floor() # mask
- output = inputs.divide(keep_prob) * random_tensor # divide to keep same output expectation
- return output
-
- def construct(self, inputs):
- return self.drop_path(inputs)
-
-
-
- # def main():
- # import numpy as np
- # tmp = mindspore.Tensor(np.random.rand(8, 16, 8, 8), dtype=mindspore.float32)
- # dp = DropPath(0.5)
- # out = dp(tmp)
- # print(out.shape)
-
- # if __name__ == "__main__":
- # main()
-
-
- # ## 主代码部分
-
- # In[58]:
-
-
- # Copyright (c) 2021 PPViT Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- """
- BEiT in Paddle
-
- A Paddle Implementation of BEiT as described in:
-
- "BEiT: BERT Pre-Training of Image Transformers"
- - Paper Link: https://arxiv.org/abs/2106.08254
- """
- import math
- import copy
- from functools import partial
- import mindspore
- import mindspore.nn as nn
- # import mindspore.nn.functional as F
- import mindspore.ops as F
- # from droppath import DropPath
-
- # 初始化先去掉,以后再做
- # trunc_normal_ = nn.initializer.TruncatedNormal(std=0.02)
- # zeros_ = nn.initializer.Constant(value=0.0)
- # ones_ = nn.initializer.Constant(value=1.0)
-
-
- class Mlp(nn.Cell):
- """MLP module
-
- MLP using nn.Dense and activation is GELU, dropout is applied.
- Ops: fc1 -> act -> dropout -> fc2 -> dropout
-
- """
-
- def __init__(self,
- in_features,
- hidden_features=None,
- out_features=None,
- act_layer=nn.GELU,
- drop=0.0):
- super().__init__()
- out_features = out_features or in_features
- hidden_features = hidden_features or in_features
- self.fc1 = nn.Dense(in_features, hidden_features)
- self.act = act_layer()
- self.fc2 = nn.Dense(hidden_features, out_features)
- self.drop = nn.Dropout(1-drop)
-
- def construct(self, x):
- x = self.fc1(x)
- x = self.act(x)
- x = self.drop(x)
- x = self.fc2(x)
- x = self.drop(x)
- return x
-
- def main():
- import numpy as np
- tmp = mindspore.Tensor(np.random.rand(8, 16), dtype=mindspore.float32)
- mlp = Mlp(16, 32, 512)
- out = mlp(tmp)
- print(out.shape)
-
- if __name__ == "__main__":
- main()
-
-
- # In[59]:
-
-
- class PatchEmbed(nn.Cell):
- """2D Image to Patch Embedding
-
- Apply patch embeddings on input images. Embeddings is implemented using a Conv2D op.
-
- """
- def __init__(self,
- img_size=224,
- patch_size=16,
- in_chans=3,
- embed_dim=768,
- norm_layer=None,
- flatten=True):
- super().__init__()
- tmpimgsize = img_size
- tmppatchsize = patch_size
- img_size = (img_size, img_size)
- patch_size = (patch_size, patch_size)
- self.img_size = img_size
- self.patch_size = patch_size
- self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
- self.num_patches = self.grid_size[0] * self.grid_size[1]
- self.flatten = flatten
-
- self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=tmppatchsize, stride=tmppatchsize)
- self.norm = norm_layer((embed_dim,)) if norm_layer else ops.Identity()
-
-
- def construct(self, x):
- B, C, H, W = x.shape
- x = self.proj(x)
- if self.flatten:
- B, C, H, W = x.shape
- x = x.reshape([B, C, H*W])
- transpose = ops.Transpose()
- x = transpose(x, (0, 2, 1))
- x = self.norm(x)
- return x
-
-
-
-
- def main():
- import numpy as np
- tmp = mindspore.Tensor(np.random.rand(16, 3, 224, 224), dtype=mindspore.float32)
-
- patchembed = PatchEmbed(flatten=True)
- out = patchembed(tmp)
- print(out.shape)
- # print(out)
-
- if __name__ == "__main__":
- main()
-
-
- # ## 开始注意力模块
-
- # In[60]:
-
-
- class Attention(nn.Cell):
- """Attention Layer"""
- def __init__(self,
- dim,
- num_heads=8,
- qkv_bias=False,
- attn_drop=0.1,
- proj_drop=0.1,
- window_size=None,
- attn_head_dim=None):
- super().__init__()
- self.num_heads = num_heads
- head_dim = dim // num_heads
- if attn_head_dim is not None:
- head_dim = attn_head_dim
- all_head_dim = head_dim * self.num_heads
- self.scale = head_dim ** -0.5
-
- # self.qkv = nn.Dense(dim, all_head_dim * 3, bias_attr=False)
- self.qkv = nn.Dense(dim, all_head_dim * 3, has_bias=False)
- if qkv_bias:
- # self.q_bias = paddle.create_parameter(
- # shape=[all_head_dim], dtype="float32", default_initializer=zeros_
- # )
- self.q_bias = mindspore.Parameter(ops.Zeros()((all_head_dim), mindspore.float32))
- # self.v_bias = paddle.create_parameter(
- # shape=[all_head_dim], dtype="float32", default_initializer=zeros_
- # )
- self.v_bias = mindspore.Parameter(ops.Zeros()((all_head_dim), mindspore.float32))
- else:
- self.q_bias = None
- self.v_bias = None
-
- if window_size:
- self.window_size = window_size
- self.num_relative_distance = (2 * window_size[0] - 1) * (
- 2 * window_size[1] - 1
- ) + 3
-
- # self.relative_position_bias_table = paddle.create_parameter(
- # shape=[self.num_relative_distance, num_heads],
- # dtype="float32",
- # default_initializer=zeros_,
- # ) # 2*Wh-1 * 2*Ww-1, nH
- # cls to token & token 2 cls & cls to cls
- self.relative_position_bias_table = mindspore.Parameter(ops.Zeros()((self.num_relative_distance, num_heads), mindspore.float32))
- # get pair-wise relative position index for each token inside the window
- # coords_h = paddle.arange(window_size[0])
- coords_h = Tensor(np.arange(window_size[0]), mindspore.int64)
- # coords_w = paddle.arange(window_size[1])
- coords_w = Tensor(np.arange(window_size[1]), mindspore.int64)
- coords = ops.Stack()(ops.Meshgrid(indexing="ij")((coords_h, coords_w))) # 2, Wh, Ww
- # coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww
- coords_flatten = ops.Flatten()(coords)
- # relative_coords = coords_flatten.unsqueeze(
- # axis=2
- # ) - coords_flatten.unsqueeze(
- # axis=1
- # ) # 2, Wh*Ww, Wh*Ww #??
- relative_coords = ops.ExpandDims()(coords_flatten, 2) - ops.ExpandDims()(coords_flatten, 1)
- relative_coords = relative_coords.transpose([1, 2, 0]) # Wh*Ww, Wh*Ww, 2
- # print(f"relative_coords[:, :, 0] relative_coords.shape{relative_coords.shape}window_size[0] - 1{window_size[0] - 1}")
- # print(f"==")
- relative_coords =relative_coords.astype(mindspore.float32)
- relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0
- relative_coords[:, :, 1] += window_size[1] - 1
- relative_coords[:, :, 0] *= 2 * window_size[1] - 1
- relative_position_index = ops.Zeros()(
- (
- window_size[0] * window_size[1] + 1,
- window_size[0] * window_size[1] + 1,
- ),
- relative_coords.dtype,
- )
- # Wh*Ww, Wh*Ww
- relative_position_index[1:, 1:] = relative_coords.sum(-1)
- relative_position_index[0, 0:] = self.num_relative_distance - 3
- relative_position_index[0:, 0] = self.num_relative_distance - 2
- relative_position_index[0, 0] = self.num_relative_distance - 1
-
- # self.register_buffer("relative_position_index", relative_position_index)
- self.relative_position_index = relative_position_index.astype(mindspore.int64)
- # print(f"==relative_position_index .stop_gradient:{relative_position_index.stop_gradient}")
-
- else:
- self.window_size = None
- self.relative_position_bias_table = None
- self.relative_position_index = None
-
- self.attn_drop = nn.Dropout(1-attn_drop)
- self.proj = nn.Dense(all_head_dim, dim)
- self.proj_drop = nn.Dropout(1-proj_drop)
-
- # def construct(self, x, rel_pos_bias):
- # B, N, C = x.shape
- # qkv_bias = None
- # if self.q_bias is not None:
- # # qkv_bias = paddle.concat(
- # # (self.q_bias, ops.zeros_like(self.v_bias), self.v_bias)
- # # )
- # # print(f"==concat {self.q_bias.shape, ops.zeros_like(self.v_bias).shape, self.v_bias.shape}")
- # # print(f"==concat {self.q_bias.dtype, ops.zeros_like(self.v_bias).dtype, self.v_bias.dtype}")
- # qkv_bias = ops.Concat()((self.q_bias, ops.zeros_like(self.v_bias), self.v_bias))
- # # print(f"==qkv = mslinear {x.shape, self.qkv.weight.shape}")
- # qkv = mslinear(x=x, weight=self.qkv.weight, bias=qkv_bias)
-
- # # qkv = qkv.reshape([paddle.shape(x)[0], paddle.shape(x)[1], 3, self.num_heads, -1]).transpose([2, 0, 3, 1, 4])
- # qkv = qkv.reshape([B, N, 3, self.num_heads, -1]).transpose([2, 0, 3, 1, 4])
- # # make torchscript happy (cannot use tensor as tuple)
- # q, k, v = qkv[0], qkv[1], qkv[2]
-
- # q = q * self.scale
- # # print("==q k:", q.shape, k.shape)
- # # attn = q @ k.transpose([0, 1, 3, 2])
- # attn = ops.matmul(q, k.transpose([0, 1, 3, 2]))
-
- # if self.relative_position_bias_table is not None:
- # relative_position_bias = self.relative_position_bias_table[
- # self.relative_position_index.reshape([-1])
- # ].reshape(
- # [
- # self.window_size[0] * self.window_size[1] + 1,
- # self.window_size[0] * self.window_size[1] + 1,
- # -1,
- # ]
- # ) # Wh*Ww,Wh*Ww,nH
- # relative_position_bias = relative_position_bias.transpose(
- # [2, 0, 1]
- # ) # nH, Wh*Ww, Wh*Ww
-
- # # attn = attn + relative_position_bias.unsqueeze(axis=0)
- # attn = attn + ops.ExpandDims()(relative_position_bias, 0)
-
- # if rel_pos_bias is not None:
- # attn = attn + rel_pos_bias
-
- # attn = nn.Softmax(axis=-1)(attn)
- # attn = self.attn_drop(attn)
-
- # # x = (attn @ v).transpose([0, 2, 1, 3]).reshape([paddle.shape(x)[0], paddle.shape(x)[1], -1])
- # x = ops.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, -1])
- # x = self.proj(x)
- # x = self.proj_drop(x)
- # return x
- def construct(self, x, rel_pos_bias):
- B, N, C = x.shape
- qkv_bias = None
- if self.q_bias is not None:
- qkv_bias = ops.Concat()((self.q_bias, ops.zeros_like(self.v_bias), self.v_bias))
- qkv = mslinear(x=x, weight=self.qkv.weight, bias=qkv_bias)
-
- qkv = qkv.reshape([B, N, 3, self.num_heads, -1]).transpose([2, 0, 3, 1, 4])
- # make torchscript happy (cannot use tensor as tuple)
- q, k, v = qkv[0], qkv[1], qkv[2]
-
- q = q * self.scale
- attn = ops.matmul(q, k.transpose([0, 1, 3, 2]))
-
- if self.relative_position_bias_table is not None:
- relative_position_bias = self.relative_position_bias_table[
- self.relative_position_index.reshape([-1])
- ].reshape(
- [
- self.window_size[0] * self.window_size[1] + 1,
- self.window_size[0] * self.window_size[1] + 1,
- -1,
- ]
- ) # Wh*Ww,Wh*Ww,nH
- relative_position_bias = relative_position_bias.transpose(
- [2, 0, 1]
- ) # nH, Wh*Ww, Wh*Ww
-
- attn = attn + ops.ExpandDims()(relative_position_bias, 0)
-
- if rel_pos_bias is not None:
- attn = attn + rel_pos_bias
-
- attn = nn.Softmax(axis=-1)(attn)
- attn = self.attn_drop(attn)
-
- x = ops.matmul(attn, v).transpose([0, 2, 1, 3]).reshape([B, N, -1])
- x = self.proj(x)
- x = self.proj_drop(x)
- return x
-
-
- def main():
- import numpy as np
- tmp = mindspore.Tensor(np.random.rand(196, 16, 768), dtype=mindspore.float32)
- # print(tmp.shape, tmp.size)
- attention = Attention(dim=768 )
- out = attention(tmp, rel_pos_bias=0.1)
- print(out.shape)
- # print(out)
-
- if __name__ == "__main__":
- main()
-
-
-
- # In[61]:
-
-
- class Block(nn.Cell):
- def __init__(self,
- dim,
- num_heads,
- mlp_ratio=4.0,
- qkv_bias=False,
- drop=0.1,
- attn_drop=0.1,
- drop_path=0.1,
- init_values=None,
- act_layer=nn.GELU,
- norm_layer=nn.LayerNorm,
- window_size=None,
- attn_head_dim=None):
- super().__init__()
- self.norm1 = norm_layer((dim,))
- self.attn = Attention(
- dim,
- num_heads=num_heads,
- qkv_bias=qkv_bias,
- attn_drop=1-attn_drop,
- proj_drop=1-drop,
- window_size=window_size,
- attn_head_dim=attn_head_dim,
- )
- self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
- self.norm2 = norm_layer((dim,))
- mlp_hidden_dim = int(dim * mlp_ratio)
- self.mlp = Mlp(
- in_features=dim,
- hidden_features=mlp_hidden_dim,
- act_layer=act_layer,
- drop=1-drop,
- )
-
- if init_values:
-
- self.gamma_1 = mindspore.Parameter(ops.Zeros()((dim), mindspore.float32))
-
- self.gamma_2 = mindspore.Parameter(ops.Zeros()((dim), mindspore.float32))
- else:
- self.gamma_1, self.gamma_2 = None, None
-
- def construct(self, x, rel_pos_bias):
- if self.gamma_1 is None:
- x = x + self.drop_path(self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias))
- x = x + self.drop_path(self.mlp(self.norm2(x)))
- else:
- x = x + self.drop_path(
- self.gamma_1 * self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias)
- )
- x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
- return x
-
- def main():
- import numpy as np
- tmp = mindspore.Tensor(np.random.rand(196, 16, 768), dtype=mindspore.float32)
- # print(tmp.shape, tmp.size)
- block = Block(dim=768, num_heads=12)
- out = block(tmp, rel_pos_bias=0.1)
- print(out.shape)
- # print(out)
-
- if __name__ == "__main__":
- main()
-
-
- # In[62]:
-
-
- class RelativePositionBias(nn.Cell):
- def __init__(self, window_size, num_heads):
- super().__init__()
- self.window_size = window_size
- self.num_relative_distance = (2 * window_size[0] - 1) * (
- 2 * window_size[1] - 1
- ) + 3
-
- # self.relative_position_bias_table = paddle.create_parameter(
- # shape=[self.num_relative_distance, num_heads],
- # dtype="float32",
- # default_initializer=zeros_,
- # ) # 2*Wh-1 * 2*Ww-1, nH
- # cls to token & token 2 cls & cls to cls
- self.relative_position_bias_table = mindspore.Parameter(ops.Zeros()((self.num_relative_distance, num_heads), mindspore.float32))
-
- # get pair-wise relative position index for each token inside the window
- # coords_h = paddle.arange(window_size[0])
- coords_h = Tensor(np.arange(window_size[0]), mindspore.int64)
- # coords_w = paddle.arange(window_size[1])
- coords_2 = Tensor(np.arange(window_size[1]), mindspore.int64)
- coords = ops.Stack()(ops.Meshgrid(indexing="ij")((coords_h, coords_w))) # 2, Wh, Ww
- # coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww
- coords_flatten = ops.Flatten()(coords)
- # relative_coords = coords_flatten.unsqueeze(axis=2) - coords_flatten.unsqueeze(
- # axis=1
- # ) # 2, Wh*Ww, Wh*Ww
- relative_coords = ops.ExpandDims()(coords_flatten, 2) - ops.ExpandDims()(coords_flatten, 1)
- relative_coords = relative_coords.transpose([1, 2, 0]) # Wh*Ww, Wh*Ww, 2
- relative_coords[:, :, 0] += window_size[0] - 1 # shift to start from 0
- relative_coords[:, :, 1] += window_size[1] - 1
- relative_coords[:, :, 0] *= 2 * window_size[1] - 1
- # relative_position_index = paddle.zeros(
- # [window_size[0] * window_size[1] + 1, window_size[0] * window_size[1] + 1]
- # )
- relative_position_index = ops.Zeros()(
- (window_size[0] * window_size[1] + 1, window_size[0] * window_size[1] + 1), mindspore.int64)
- relative_position_index[1:, 1:] = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
- relative_position_index[0, 0:] = self.num_relative_distance - 3
- relative_position_index[0:, 0] = self.num_relative_distance - 2
- relative_position_index[0, 0] = self.num_relative_distance - 1
-
- # self.register_buffer("relative_position_index", relative_position_index)
- self.relative_position_index = relative_position_index.astype(mindspore.int64)
- # trunc_normal_(self.relative_position_bias_table, std=.02)
-
- def construct(self):
- relative_position_bias = self.relative_position_bias_table[
- self.relative_position_index.reshape([-1])].reshape(
- self.window_size[0] * self.window_size[1] + 1,
- self.window_size[0] * self.window_size[1] + 1, -1) # Wh*Ww,Wh*Ww,nH
- return relative_position_bias.transpose([2, 0, 1]) # nH, Wh*Ww, Wh*Ww
-
-
-
- # In[63]:
-
-
- class Beit(nn.Cell):
- """Beit Layer"""
- def __init__(self,
- img_size=224,
- patch_size=16,
- in_chans=3,
- num_classes=1000,
- embed_dim=768,
- depth=12,
- num_heads=12,
- mlp_ratio=4.0,
- qkv_bias=True,
- drop_rate=0.0,
- attn_drop_rate=0.0,
- drop_path_rate=0.0,
- norm_layer=partial(nn.LayerNorm, epsilon=1e-6),
- init_values=None,
- use_abs_pos_emb=True,
- use_rel_pos_bias=False,
- use_shared_rel_pos_bias=False,
- use_mean_pooling=True,
- init_scale=0.001):
- super().__init__()
- self.num_classes = num_classes
- # num_features for consistency with other models
- self.num_features = self.embed_dim = embed_dim
-
- self.patch_embed = PatchEmbed(
- img_size=img_size,
- patch_size=patch_size,
- in_chans=in_chans,
- embed_dim=embed_dim,
- )
- num_patches = self.patch_embed.num_patches
-
- # self.cls_token = paddle.create_parameter(
- # shape=[1, 1, embed_dim],
- # dtype="float32",
- # default_initializer=trunc_normal_,
- # )
- self.cls_token = mindspore.Parameter(ops.Zeros()((1, 1, embed_dim), mindspore.float32))
- if use_abs_pos_emb:
- # self.pos_embed = paddle.create_parameter(
- # shape=[1, num_patches + 1, embed_dim],
- # dtype="float32",
- # default_initializer=trunc_normal_,
- # )
- self.pos_embed = mindspore.Parameter(ops.Zeros()((1, num_patches + 1, embed_dim), mindspore.float32))
- else:
- self.pos_embed = None
- self.pos_drop = nn.Dropout(1-drop_rate)
-
- if use_shared_rel_pos_bias:
- self.rel_pos_bias = RelativePositionBias(
- window_size=self.patch_embed.grid_size, num_heads=num_heads
- )
- else:
- self.rel_pos_bias = None
-
- # stochastic depth decay rule
- dpr = [x for x in mindspore.ops.LinSpace()(Tensor(0, mindspore.float32), drop_path_rate, depth)]
- self.use_rel_pos_bias = use_rel_pos_bias
- self.blocks = nn.CellList(
- [
- Block(
- dim=embed_dim,
- num_heads=num_heads,
- mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias,
- drop=1-drop_rate,
- attn_drop=1-attn_drop_rate,
- drop_path=dpr[i],
- norm_layer=norm_layer,
- init_values=init_values,
- window_size=self.patch_embed.grid_size if use_rel_pos_bias else None,
- )
- for i in range(depth)
- ]
- )
- self.norm = Identity() if use_mean_pooling else norm_layer((embed_dim,))
- self.fc_norm = norm_layer((embed_dim,)) if use_mean_pooling else None
- self.head = nn.Dense(embed_dim, num_classes) if num_classes > 0 else Identity()
-
- #初始化,先注释掉
- # self.apply(self._init_weights)
- # self.fix_init_weight()
- # if isinstance(self.head, nn.Dense):
- # trunc_normal_(self.head.weight)
- # self.head.weight.set_value(
- # self.head.weight.multiply(paddle.to_tensor(init_scale))
- # )
- # self.head.bias.set_value(
- # self.head.bias.multiply(paddle.to_tensor(init_scale))
- # )
-
- def fix_init_weight(self):
- def rescale(param, layer_id):
- param.set_value(param.divide(paddle.to_tensor(math.sqrt(2.0 * layer_id))))
-
- for layer_id, layer in enumerate(self.blocks):
- rescale(layer.attn.proj.weight, layer_id + 1)
- rescale(layer.mlp.fc2.weight, layer_id + 1)
-
- def _init_weights(self, m):
- if isinstance(m, nn.Dense):
- trunc_normal_(m.weight)
- if isinstance(m, nn.Dense) and m.bias is not None:
- zeros_(m.bias)
- elif isinstance(m, nn.CellNorm):
- zeros_(m.bias)
- ones_(m.weight)
-
- def get_num_layers(self):
- return len(self.blocks)
-
- def get_classifier(self):
- return self.head
-
- def reset_classifier(self, num_classes):
- self.num_classes = num_classes
- self.head = (
- nn.Dense(self.embed_dim, num_classes) if num_classes > 0 else Identity()
- )
-
- def construct_features(self, x):
- x = self.patch_embed(x)
- batch_size, seq_len, _ = x.shape
-
- #cls_tokens = self.cls_token.expand([batch_size, 1, self.embed_dim])
- # cls_tokens = self.cls_token.expand([mindspore.ops.shape(x)[0], 1, self.embed_dim])
- cls_tokens = ops.BroadcastTo((mindspore.ops.shape(x)[0], 1, self.embed_dim))(self.cls_token)
- #cls_tokens = self.cls_token.expand([batch_size, -1, -1])
-
- # x = paddle.concat((cls_tokens, x), axis=1)
- x = ops.Concat(axis=1)((cls_tokens, x))
-
- if self.pos_embed is not None:
- x = x + self.pos_embed
- x = self.pos_drop(x)
-
- rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None
- for blk in self.blocks:
- x = blk(x, rel_pos_bias=rel_pos_bias)
-
- x = self.norm(x)
- if self.fc_norm is not None:
- t = x[:, 1:, :]
- return self.fc_norm(t.mean(1))
-
- return x[:, 0]
-
- def construct(self, x):
- x = self.construct_features(x)
- x = self.head(x)
- return x
-
-
- def build_beit(config):
- """ build beit from config"""
- model = Beit(
- img_size=config.DATA.IMAGE_SIZE,
- num_classes=config.MODEL.NUM_CLASSES,
- patch_size=config.MODEL.PATCH_SIZE,
- embed_dim=config.MODEL.EMBED_DIM,
- depth=config.MODEL.DEPTH,
- num_heads=config.MODEL.NUM_HEADS,
- mlp_ratio=config.MODEL.MLP_RATIO,
- use_abs_pos_emb=config.MODEL.USE_ABS_POS_EMB,
- use_rel_pos_bias=config.MODEL.USE_REL_POS_BIAS,
- init_values=config.MODEL.INIT_VALUES,
- qkv_bias=config.MODEL.QKV_BIAS,
- )
- return model
-
- def main():
- import mindspore
- import numpy
- tmp = mindspore.Tensor(np.random.rand(8, 3, 224, 224), dtype=mindspore.float32)
- # print(tmp.shape, tmp.size)
- tmpbeit = Beit()
- # with Benchmark("MindSpore速度"):
- out = tmpbeit(tmp)
- print(out.shape)
- # print(out)
-
- for i in range(9):
- images = Tensor(numpy.random.rand(2**i, 3, 224, 224), mindspore.float32)
- for j in range(3):
- with Benchmark("MindSpore速度"):
- output = tmpbeit(images)
- print(output.shape)
-
- if __name__ == "__main__":
- main()
- # # 测试模型
-
- # In[64]:
-
-
- # !pip install yacs
-
-
- # In[65]:
-
-
- # Copyright (c) 2021 PPViT Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- import numpy as np
- # 加入计算时间代码
- import time
- class Timer: #@save
- """记录多次运行时间"""
- def __init__(self):
- self.times = []
- self.start()
-
- def start(self):
- """启动计时器"""
- self.tik = time.time()
-
- def stop(self):
- """停止计时器并将时间记录在列表中"""
- self.times.append(time.time() - self.tik)
- return self.times[-1]
-
- def avg(self):
- """返回平均时间"""
- return sum(self.times) / len(self.times)
-
- def sum(self):
- """返回时间总和"""
- return sum(self.times)
-
- def cumsum(self):
- """返回累计时间"""
- return np.array(self.times).cumsum().tolist()
-
-
- class Benchmark:
- """用于测量运行时间"""
- def __init__(self, description='Done'):
- self.description = description
-
- def __enter__(self):
- self.timer = Timer()
- return self
-
- def __exit__(self, *args):
- print(f'{self.description}: {self.timer.stop():.4f} sec')
-
- # 测试模型
- # Copyright (c) 2021 PPViT Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- """Configuration
- Configurations for (1) data processing, (2) model archtecture, and (3) training settings, etc.
- Config can be set by .yaml file or by argparser
- """
- import os
- from yacs.config import CfgNode as CN
- import yaml
-
- _C = CN()
- _C.BASE = ['']
-
- # data settings
- _C.DATA = CN()
- _C.DATA.BATCH_SIZE = 2 # train batch_size on single GPU = 256
- _C.DATA.BATCH_SIZE_EVAL = None # (disabled in update_config) val batch_size on single GPU
- _C.DATA.DATA_PATH = '/dataset/imagenet/' # path to dataset
- _C.DATA.DATASET = 'imagenet2012' # dataset name, currently only support imagenet2012
- _C.DATA.IMAGE_SIZE = 224 # input image size e.g., 224
- _C.DATA.SECOND_IMAGE_SIZE = 112 # 2nd input image size e.g., 112
- _C.DATA.IMAGE_CHANNELS = 3 # input image channels: e.g., 3
- _C.DATA.CROP_PCT = 0.875 # input image scale ratio, scale is applied before centercrop in eval mode
- _C.DATA.NUM_WORKERS = 1 # number of data loading threads
- _C.DATA.IMAGENET_MEAN = [0.5, 0.5, 0.5] # [0.485, 0.456, 0.406] # imagenet mean values
- _C.DATA.IMAGENET_STD = [0.5, 0.5, 0.5] # [0.229, 0.224, 0.225] # imagenet std values
-
- # model general settings
- _C.MODEL = CN()
- _C.MODEL.TYPE = 'beit'
- _C.MODEL.VAE_TYPE = 'dall-e'
- _C.MODEL.NAME = 'beit'
- _C.MODEL.RESUME = None # full model path for resume training
- _C.MODEL.PRETRAINED = None # full model path for finetuning
- _C.MODEL.NUM_CLASSES = 1000 # num of classes for classifier # 1000
- _C.MODEL.DROPOUT = 0.0
- _C.MODEL.ATTENTION_DROPOUT = 0.0
- _C.MODEL.DROPPATH = 0.1
- # model transformer settings
- _C.MODEL.PATCH_SIZE = 16
- _C.MODEL.EMBED_DIM = 768
- _C.MODEL.NUM_HEADS = 12
- _C.MODEL.ATTN_HEAD_SIZE = None # if None, use embed_dim // num_heads as head dim
- _C.MODEL.DEPTH = 12
- _C.MODEL.QK_SCALE = None
- _C.MODEL.QKV_BIAS = True
- _C.MODEL.MLP_RATIO = 4.0 # for cait class_token ratio also set to MLP_RATIO
- _C.MODEL.USE_ABS_POS_EMB = False
- _C.MODEL.USE_REL_POS_BIAS = True
- _C.MODEL.INIT_VALUES = 1e-4
-
-
- # training settings
- _C.TRAIN = CN()
- _C.TRAIN.LAST_EPOCH = 0
- _C.TRAIN.NUM_EPOCHS = 100
- _C.TRAIN.WARMUP_EPOCHS = 20
- _C.TRAIN.WEIGHT_DECAY = 0.05
- _C.TRAIN.LAYER_DECAY = 0.65
- _C.TRAIN.BASE_LR = 4e-3
- _C.TRAIN.WARMUP_START_LR = 0.0
- _C.TRAIN.END_LR = 1e-6
- _C.TRAIN.GRAD_CLIP = None
- _C.TRAIN.ACCUM_ITER = 1
- _C.TRAIN.LINEAR_SCALED_LR = 512
-
- # optimizer
- _C.TRAIN.OPTIMIZER = CN()
- _C.TRAIN.OPTIMIZER.NAME = 'AdamWDL'
- _C.TRAIN.OPTIMIZER.EPS = 1e-8
- _C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999)
-
- # model ema
- _C.TRAIN.MODEL_EMA = True
- _C.TRAIN.MODEL_EMA_DECAY = 0.9999
- _C.TRAIN.MODEL_EMA_FORCE_CPU = False
-
- # data augmentation (optional, check datasets.py)
- _C.TRAIN.SMOOTHING = 0.1
- _C.TRAIN.COLOR_JITTER = 0.4 # if both auto augment and rand augment are False, use color jitter
- _C.TRAIN.AUTO_AUGMENT = False # rand augment is used if both rand and auto augment are set True
- _C.TRAIN.RAND_AUGMENT = True
- _C.TRAIN.RAND_AUGMENT_LAYERS = 2
- _C.TRAIN.RAND_AUGMENT_MAGNITUDE = 9 # scale from 0 to 9
- # mixup params (optional, check datasets.py)
- _C.TRAIN.MIXUP_ALPHA = 0.8
- _C.TRAIN.MIXUP_PROB = 1.0
- _C.TRAIN.MIXUP_SWITCH_PROB = 0.5
- _C.TRAIN.MIXUP_MODE = 'batch'
- _C.TRAIN.CUTMIX_ALPHA = 1.0
- _C.TRAIN.CUTMIX_MINMAX = None
- # random erase params (optional, check datasets.py)
- _C.TRAIN.RANDOM_ERASE_PROB = 0.25
- _C.TRAIN.RANDOM_ERASE_MODE = 'pixel'
- _C.TRAIN.RANDOM_ERASE_COUNT = 1
- _C.TRAIN.RANDOM_ERASE_SPLIT = False
-
- # misc
- _C.SAVE = "./output" # output folder, saves logs and weights
- _C.SAVE_FREQ = 15 # freq to save chpt
- _C.REPORT_FREQ = 20 # freq to logging info
- _C.VALIDATE_FREQ = 1 # freq to do validation
- _C.SEED = 0 # random seed
- _C.EVAL = False # run evaluation only
- _C.AMP = False # auto mix precision training
-
-
- def _update_config_from_file(config, cfg_file):
- """Load cfg file (.yaml) and update config object
-
- Args:
- config: config object
- cfg_file: config file (.yaml)
- Return:
- None
- """
- config.defrost()
- with open(cfg_file, 'r') as infile:
- yaml_cfg = yaml.load(infile, Loader=yaml.FullLoader)
- for cfg in yaml_cfg.setdefault('BASE', ['']):
- if cfg:
- _update_config_from_file(
- config, os.path.join(os.path.dirname(cfg_file), cfg)
- )
- config.merge_from_file(cfg_file)
- config.freeze()
-
-
- def update_config(config, args):
- """Update config by ArgumentParser
- Configs that are often used can be updated from arguments
- Args:
- args: ArgumentParser contains options
- Return:
- config: updated config
- """
- if args.cfg:
- _update_config_from_file(config, args.cfg)
- config.defrost()
- if args.dataset:
- config.DATA.DATASET = args.dataset
- if args.batch_size:
- config.DATA.BATCH_SIZE = args.batch_size
- config.DATA.BATCH_SIZE_EVAL = args.batch_size
- if args.batch_size_eval:
- config.DATA.BATCH_SIZE_EVAL = args.batch_size_eval
- if args.image_size:
- config.DATA.IMAGE_SIZE = args.image_size
- if args.accum_iter:
- config.TRAIN.ACCUM_ITER = args.accum_iter
- if args.data_path:
- config.DATA.DATA_PATH = args.data_path
- if args.output:
- config.SAVE = args.output
- if args.eval:
- config.EVAL = True
- if args.pretrained:
- config.MODEL.PRETRAINED = args.pretrained
- if args.resume:
- config.MODEL.RESUME = args.resume
- if args.last_epoch:
- config.TRAIN.LAST_EPOCH = args.last_epoch
- if args.amp: # only for training
- config.AMP = not config.EVAL
- # config.freeze()
- return config
-
-
- def get_config(cfg_file=None):
- """Return a clone of config and optionally overwrite it from yaml file"""
- config = _C.clone()
- if cfg_file:
- _update_config_from_file(config, cfg_file)
- return config
-
- import argparse
- def get_arguments():
- """return argumeents, this will overwrite the config by (1) yaml file (2) argument values"""
- parser = argparse.ArgumentParser('BEiT finetune')
- parser.add_argument('-cfg', type=str, default=None)
- parser.add_argument('-dataset', type=str, default=None)
- parser.add_argument('-data_path', type=str, default=None)
- parser.add_argument('-output', type=str, default=None)
- parser.add_argument('-batch_size', type=int, default=None)
- parser.add_argument('-batch_size_eval', type=int, default=None)
- parser.add_argument('-image_size', type=int, default=None)
- parser.add_argument('-accum_iter', type=int, default=None)
- parser.add_argument('-pretrained', type=str, default=None)
- parser.add_argument('-resume', type=str, default=None)
- parser.add_argument('-last_epoch', type=int, default=None)
- parser.add_argument('-eval', action='store_true')
- parser.add_argument('-amp', action='store_true')
- arguments = parser.parse_args(['-cfg', "beit_base_patch16_224.yaml"])
- return arguments
-
- config = update_config(get_config(), get_arguments())
- # config = args[0]
- build_model = build_beit
- model = build_model(config)
-
- def main():
- # images = paddle.randn([8, 3, 224, 224])
- import numpy
- images = Tensor(numpy.random.rand(2, 3, 224, 224), mindspore.float32)
- label = 2
- with Benchmark("MindSpore速度"):
- output = model(images)
- print(output.shape)
-
- for i in range(9):
- images = Tensor(numpy.random.rand(2**i, 3, 224, 224), mindspore.float32)
- with Benchmark("MindSpore速度"):
- output = model(images)
- print(output.shape)
-
-
-
-
- if __name__ == "__main__":
- main()
|