seo推广名词解释,seo免费自学的网站,做黄金的经常看什么网站,舟山网络科技有限公司文章目录 概要CoordConvSAConv 概要
CoordConv#xff08;Coordinate Convolution#xff09;和SAConv#xff08;Spatial Attention Convolution#xff09;是两种用于神经网络中的特殊卷积操作#xff0c;用于处理图像数据或其他多维数据。以下是它们的简要介绍#x… 文章目录 概要CoordConvSAConv 概要
CoordConvCoordinate Convolution和SAConvSpatial Attention Convolution是两种用于神经网络中的特殊卷积操作用于处理图像数据或其他多维数据。以下是它们的简要介绍 CoordConvCoordinate Convolution
CoordConv 是由Uber AI Labs的研究人员提出的一种卷积操作用于处理图像中的坐标信息。在传统的卷积操作中卷积核在图像上滑动并执行卷积操作但是它们对于图像中的位置信息是不敏感的。CoordConv 的目标是使卷积操作变得位置敏感它在输入特征图中加入了位置信息作为额外的通道。这个位置信息可以是像素的坐标也可以是归一化的坐标值具体取决于应用的场景。
通过将坐标信息与输入特征图拼接在一起CoordConv 能够帮助神经网络更好地学习到输入数据中的空间关系从而提高模型的性能。它在需要考虑输入数据的空间位置信息时特别有用。 SAConvSpatial Attention Convolution
SAConv 是一种引入了空间注意力机制的卷积操作。传统的卷积操作在所有位置都应用相同的卷积核而SAConv 具有可学习的空间注意力权重这意味着它能够动态地调整不同位置的卷积核权重。
SAConv 的关键思想是在进行卷积操作之前先计算每个位置的空间注意力权重。这些权重由神经网络学习得出然后被用来加权输入特征图的不同位置从而生成具有位置敏感性的特征表示。这种机制使得神经网络在处理输入数据时能够更加关注重要的区域从而提高了模型的感知能力和性能。
总的来说CoordConv 和 SAConv 都是为了增强神经网络对输入数据的空间信息处理能力而提出的方法。CoordConv 引入了位置信息通道使得网络对位置信息更敏感而 SAConv 引入了空间注意力机制使得网络能够动态地调整卷积核的权重提高了对不同位置信息的关注度。这两种方法在特定的任务和场景下都能够带来性能的提升。
CoordConv
common.py添加如下
class AddCoords(nn.Module):def __init__(self, with_rFalse):super().__init__()self.with_r with_rdef forward(self, input_tensor):Args:input_tensor: shape(batch, channel, x_dim, y_dim)batch_size, _, x_dim, y_dim input_tensor.size()xx_channel torch.arange(x_dim).repeat(1, y_dim, 1)yy_channel torch.arange(y_dim).repeat(1, x_dim, 1).transpose(1, 2)xx_channel xx_channel.float() / (x_dim - 1)yy_channel yy_channel.float() / (y_dim - 1)xx_channel xx_channel * 2 - 1yy_channel yy_channel * 2 - 1xx_channel xx_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)yy_channel yy_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)ret torch.cat([input_tensor,xx_channel.type_as(input_tensor),yy_channel.type_as(input_tensor)], dim1)if self.with_r:rr torch.sqrt(torch.pow(xx_channel.type_as(input_tensor) - 0.5, 2) torch.pow(yy_channel.type_as(input_tensor) - 0.5, 2))ret torch.cat([ret, rr], dim1)return retclass CoordConv(nn.Module):def __init__(self, in_channels, out_channels, kernel_size1, stride1, with_rFalse):super().__init__()self.addcoords AddCoords(with_rwith_r)in_channels 2if with_r:in_channels 1self.conv Conv(in_channels, out_channels, kkernel_size, sstride)def forward(self, x):x self.addcoords(x)x self.conv(x)return x在yolo.py # yolov7 head
head:[[-1, 1, SPPCSPC, [512]], # 51[-1, 1, CoordConv, [256, 1, 1]],[-1, 1, nn.Upsample, [None, 2, nearest]],[37, 1, CoordConv, [256, 1, 1]], # route backbone P4[[-1, -2], 1, Concat, [1]],[-1, 1, Conv, [256, 1, 1]],[-2, 1, Conv, [256, 1, 1]],[-1, 1, Conv, [128, 3, 1]],[-1, 1, Conv, [128, 3, 1]],[-1, 1, Conv, [128, 3, 1]],[-1, 1, Conv, [128, 3, 1]],[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],[-1, 1, Conv, [256, 1, 1]], # 63[-1, 1, CoordConv, [128, 1, 1]],[-1, 1, nn.Upsample, [None, 2, nearest]],[24, 1, CoordConv, [128, 1, 1]], # route backbone P3[[-1, -2], 1, Concat, [1]],[-1, 1, Conv, [128, 1, 1]],[-2, 1, Conv, [128, 1, 1]],[-1, 1, Conv, [64, 3, 1]],[-1, 1, Conv, [64, 3, 1]],[-1, 1, Conv, [64, 3, 1]],[-1, 1, Conv, [64, 3, 1]],[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],[-1, 1, Conv, [128, 1, 1]], # 75[-1, 1, MP, []],[-1, 1, Conv, [128, 1, 1]],[-3, 1, Conv, [128, 1, 1]],[-1, 1, Conv, [128, 3, 2]],[[-1, -3, 63], 1, Concat, [1]],[-1, 1, Conv, [256, 1, 1]],[-2, 1, Conv, [256, 1, 1]],[-1, 1, Conv, [128, 3, 1]],[-1, 1, Conv, [128, 3, 1]],[-1, 1, Conv, [128, 3, 1]],[-1, 1, Conv, [128, 3, 1]],[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],[-1, 1, Conv, [256, 1, 1]], # 88[-1, 1, MP, []],[-1, 1, Conv, [256, 1, 1]],[-3, 1, Conv, [256, 1, 1]],[-1, 1, Conv, [256, 3, 2]],[[-1, -3, 51], 1, Concat, [1]],[-1, 1, Conv, [512, 1, 1]],[-2, 1, Conv, [512, 1, 1]],[-1, 1, Conv, [256, 3, 1]],[-1, 1, Conv, [256, 3, 1]],[-1, 1, Conv, [256, 3, 1]],[-1, 1, Conv, [256, 3, 1]],[[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],[-1, 1, Conv, [512, 1, 1]], # 101[75, 1, CoordConv, [256, 3, 1]],[88, 1, CoordConv, [512, 3, 1]],[101, 1, CoordConv, [1024, 3, 1]],[[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)]SAConv
在common.py添加
class ConvAWS2d(nn.Conv2d):def __init__(self,in_channels,out_channels,kernel_size,stride1,padding0,dilation1,groups1,biasTrue):super().__init__(in_channels,out_channels,kernel_size,stridestride,paddingpadding,dilationdilation,groupsgroups,biasbias)self.register_buffer(weight_gamma, torch.ones(self.out_channels, 1, 1, 1))self.register_buffer(weight_beta, torch.zeros(self.out_channels, 1, 1, 1))def _get_weight(self, weight):weight_mean weight.mean(dim1, keepdimTrue).mean(dim2,keepdimTrue).mean(dim3, keepdimTrue)weight weight - weight_meanstd torch.sqrt(weight.view(weight.size(0), -1).var(dim1) 1e-5).view(-1, 1, 1, 1)weight weight / stdweight self.weight_gamma * weight self.weight_betareturn weightdef forward(self, x):weight self._get_weight(self.weight)return super()._conv_forward(x, weight, None)def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,missing_keys, unexpected_keys, error_msgs):self.weight_gamma.data.fill_(-1)super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,missing_keys, unexpected_keys, error_msgs)if self.weight_gamma.data.mean() 0:returnweight self.weight.dataweight_mean weight.data.mean(dim1, keepdimTrue).mean(dim2,keepdimTrue).mean(dim3, keepdimTrue)self.weight_beta.data.copy_(weight_mean)std torch.sqrt(weight.view(weight.size(0), -1).var(dim1) 1e-5).view(-1, 1, 1, 1)self.weight_gamma.data.copy_(std)class SAConv2d(ConvAWS2d):def __init__(self,in_channels,out_channels,kernel_size,s1,pNone,g1,d1,actTrue,biasTrue):super().__init__(in_channels,out_channels,kernel_size,strides,paddingautopad(kernel_size, p),dilationd,groupsg,biasbias)self.switch torch.nn.Conv2d(self.in_channels,1,kernel_size1,strides,biasTrue)self.switch.weight.data.fill_(0)self.switch.bias.data.fill_(1)self.weight_diff torch.nn.Parameter(torch.Tensor(self.weight.size()))self.weight_diff.data.zero_()self.pre_context torch.nn.Conv2d(self.in_channels,self.in_channels,kernel_size1,biasTrue)self.pre_context.weight.data.fill_(0)self.pre_context.bias.data.fill_(0)self.post_context torch.nn.Conv2d(self.out_channels,self.out_channels,kernel_size1,biasTrue)self.post_context.weight.data.fill_(0)self.post_context.bias.data.fill_(0)self.bn nn.BatchNorm2d(out_channels)self.act nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())def forward(self, x):# pre-contextavg_x torch.nn.functional.adaptive_avg_pool2d(x, output_size1)avg_x self.pre_context(avg_x)avg_x avg_x.expand_as(x)x x avg_x# switchavg_x torch.nn.functional.pad(x, pad(2, 2, 2, 2), modereflect)avg_x torch.nn.functional.avg_pool2d(avg_x, kernel_size5, stride1, padding0)switch self.switch(avg_x)# sacweight self._get_weight(self.weight)out_s super()._conv_forward(x, weight, None)ori_p self.paddingori_d self.dilationself.padding tuple(3 * p for p in self.padding)self.dilation tuple(3 * d for d in self.dilation)weight weight self.weight_diffout_l super()._conv_forward(x, weight, None)out switch * out_s (1 - switch) * out_lself.padding ori_pself.dilation ori_d# post-contextavg_x torch.nn.functional.adaptive_avg_pool2d(out, output_size1)avg_x self.post_context(avg_x)avg_x avg_x.expand_as(out)out out avg_xreturn self.act(self.bn(out))然后在yolo.py里面添加 和可变形卷积加法一样但是不建议加太多也是只替换3x3卷积上面。比普通卷积复杂度高不建议加太多推理速度变慢尽量少用提高精度。