1. 规定格式 正如[convolutional]
, 所以这样规定CBAM在cfg文件中的格式:
1 2 3 [cbam] ratio=16 kernelsize=7
2. 修改解析部分 由于添加的这些参数都是自定义的,所以需要修改解析cfg文件的函数,之前讲过,需要修改parse_config.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 def parse_model_cfg (path ): if not path.endswith('.cfg' ): path += '.cfg' if not os.path.exists(path) and \ os.path.exists('cfg' + os.sep + path): path = 'cfg' + os.sep + path with open (path, 'r' ) as f: lines = f.read().split('\n' ) lines = [x for x in lines if x and not x.startswith('#' )] lines = [x.rstrip().lstrip() for x in lines] mdefs = [] for line in lines: if line.startswith('[' ): ''' eg: [shortcut] from=-3 activation=linear ''' mdefs.append({}) mdefs[-1 ]['type' ] = line[1 :-1 ].rstrip() if mdefs[-1 ]['type' ] == 'convolutional' : mdefs[-1 ]['batch_normalize' ] = 0 else : key, val = line.split("=" ) key = key.rstrip() if 'anchors' in key: mdefs[-1 ][key] = np.array([float (x) for x in val.split(',' )]).reshape((-1 , 2 )) else : mdefs[-1 ][key] = val.strip() supported = ['type' , 'batch_normalize' , 'filters' , 'size' ,\ 'stride' , 'pad' , 'activation' , 'layers' , \ 'groups' ,'from' , 'mask' , 'anchors' , \ 'classes' , 'num' , 'jitter' , 'ignore_thresh' ,\ 'truth_thresh' , 'random' ,\ 'stride_x' , 'stride_y' ] f = [] for x in mdefs[1 :]: [f.append(k) for k in x if k not in f] u = [x for x in f if x not in supported] assert not any (u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) return mdefs
1 2 3 4 5 6 7 supported = ['type' , 'batch_normalize' , 'filters' , 'size' ,\ 'stride' , 'pad' , 'activation' , 'layers' , \ 'groups' ,'from' , 'mask' , 'anchors' , \ 'classes' , 'num' , 'jitter' , 'ignore_thresh' ,\ 'truth_thresh' , 'random' ,\ 'stride_x' , 'stride_y' ,\ 'ratio' , 'reduction' , 'kernelsize' ]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 class SELayer (nn.Module ): def __init__ (self, channel, reduction=16 ): super (SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1 ) self.fc = nn.Sequential( nn.Linear(channel, channel // reduction, bias=False ), nn.ReLU(inplace=True ), nn.Linear(channel // reduction, channel, bias=False ), nn.Sigmoid() ) def forward (self, x ): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) y = self.fc(y).view(b, c, 1 , 1 ) return x * y.expand_as(x)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 class SpatialAttention (nn.Module ): def __init__ (self, kernel_size=7 ): super (SpatialAttention, self).__init__() assert kernel_size in (3 ,7 ), "kernel size must be 3 or 7" padding = 3 if kernel_size == 7 else 1 self.conv = nn.Conv2d(2 ,1 ,kernel_size, padding=padding, bias=False ) self.sigmoid = nn.Sigmoid() def forward (self, x ): avgout = torch.mean(x, dim=1 , keepdim=True ) maxout, _ = torch.max (x, dim=1 , keepdim=True ) x = torch.cat([avgout, maxout], dim=1 ) x = self.conv(x) return self.sigmoid(x) class ChannelAttention (nn.Module ): def __init__ (self, in_planes, rotio=16 ): super (ChannelAttention, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1 ) self.max_pool = nn.AdaptiveMaxPool2d(1 ) self.sharedMLP = nn.Sequential( nn.Conv2d(in_planes, in_planes // ratio, 1 , bias=False ), nn.ReLU(), nn.Conv2d(in_planes // rotio, in_planes, 1 , bias=False )) self.sigmoid = nn.Sigmoid() def forward (self, x ): avgout = self.sharedMLP(self.avg_pool(x)) maxout = self.sharedMLP(self.max_pool(x)) return self.sigmoid(avgout + maxout)
4. 设计cfg文件 这里以yolov3-tiny.cfg
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 [net] # Testing batch=1 subdivisions=1 # Training # batch=64 # subdivisions=2 width=416 height=416 channels=3 momentum=0.9 decay=0.0005 angle=0 saturation = 1.5 exposure = 1.5 hue=.1 learning_rate=0.001 burn_in=1000 max_batches = 500200 policy=steps steps=400000,450000 scales=.1,.1 [convolutional] batch_normalize=1 filters=16 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=32 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=64 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=128 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=1 [convolutional] batch_normalize=1 filters=1024 size=3 stride=1 pad=1 activation=leaky [se] reduction=16 # 在backbone结束的地方添加se模块 #####backbone###### [convolutional] batch_normalize=1 filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] batch_normalize=1 filters=512 size=3 stride=1 pad=1 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=18 activation=linear [yolo] mask = 3,4,5 anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 classes=1 num=6 jitter=.3 ignore_thresh = .7 truth_thresh = 1 random=1 [route] layers = -4 [convolutional] batch_normalize=1 filters=128 size=1 stride=1 pad=1 activation=leaky [upsample] stride=2 [route] layers = -1, 8 [convolutional] batch_normalize=1 filters=256 size=3 stride=1 pad=1 activation=leaky [convolutional] size=1 stride=1 pad=1 filters=18 activation=linear [yolo] mask = 0,1,2 anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 classes=1 num=6 jitter=.3 ignore_thresh = .7 truth_thresh = 1 random=1
5. 模型构建 以上都是准备工作,以SE为例,我们修改model.py
1 2 3 4 elif mdef['type' ] == 'se' : modules.add_module( 'se_module' , SELayer(output_filters[-1 ], reduction=int (mdef['reduction' ])))
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 def forward (self, x, var=None ): img_size = x.shape[-2 :] layer_outputs = [] output = [] for i, (mdef, module) in enumerate (zip (self.module_defs, self.module_list)): mtype = mdef['type' ] if mtype in ['convolutional' , 'upsample' , 'maxpool' ]: x = module(x) elif mtype == 'route' : layers = [int (x) for x in mdef['layers' ].split(',' )] if len (layers) == 1 : x = layer_outputs[layers[0 ]] else : try : x = torch.cat([layer_outputs[i] for i in layers], 1 ) except : layer_outputs[layers[1 ]] = F.interpolate( layer_outputs[layers[1 ]], scale_factor=[0.5 , 0.5 ]) x = torch.cat([layer_outputs[i] for i in layers], 1 ) elif mtype == 'shortcut' : x = x + layer_outputs[int (mdef['from' ])] elif mtype == 'yolo' : output.append(module(x, img_size)) layer_outputs.append(x if i in self.routs else [])
1 2 3 4 for i, (mdef, module) in enumerate (zip (self.module_defs, self.module_list)): mtype = mdef['type' ] if mtype in ['convolutional' , 'upsample' , 'maxpool' , 'se' ]: x = module(x)