pytorch构建简单图片特征提取模型

import torch
from torch import nn

torch.manual_seed(0)    # 设置随机数种子确保实验可重复(其实就是设置pytorch里面的参数按照同一个随机数序列取值)


class Mynet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            # output_size=(input_size + 2×padding−kernel_size+1)
            # 保持原形状宽高不变: padding=(kernel_size-1)//2
            # [batch_size, channels:3, height:640, width:640]的图像作为输入
            nn.Conv2d(3, 16, 3, 1, 1),
            # [batch_size, channels:16, height:640, width:640]
            nn.ReLU(True),
            # Relu激活
            nn.Conv2d(16, 32, 5, 1, 2),
            # [batch_size, channels:32, height:640, width:640]
            nn.ReLU(True),
            nn.Conv2d(32, 16, 1, 1, 0),
            # 1*1卷积核进行通道整合
            # [batch_size, channels:16, height:640, width:640]
            nn.ReLU(True),
            # 最大池化层2*2卷积核步长为2(向右向下都是2)
            nn.MaxPool2d(2, 2),
            # [batch_size, channels:16, height:320, width:320]
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            # [batch_size, channels:16, height:160, width:160]
            nn.ReLU(True),
            nn.Conv2d(16, 8, 1, 1, 0),
            # [batch_size, channels:8, height:160, width:160]
            nn.ReLU(True),
            # 平均池化层2*2卷积核步长为2(向右向下都是2)
            nn.AvgPool2d(2, 2),
            # [batch_size, channels:8, height:80, width:80]
            nn.ReLU(True),
        )
        # 展开后为[batch_size, num=8*80*80=51200]
        self.linear = nn.Sequential(
            nn.Linear(51200, 1600),
            # 全连接层[batch_size, num:51200]->[batch_size, num:1600]
            nn.ReLU(True),
            # Relu激活
            nn.Linear(1600, 50),
            # 全连接层[batch_size, num:1600]->[batch_size, num:50]
        )

    def forward(self, x):
        x = self.conv(x)
        # 图片卷积: 由[batch_size, channels:3, height:640, width:640]到[batch_size, channels:8, height:80, width:80]
        x = x.view(x.shape[0], -1)
        # 将除批次信息之外的通道展开由[batch_size, channels:3, height:640, width:640]到[batch_size, num:51200]
        output = self.linear(x)
        # 特征输出[batch_size, num:50]
        return output


if __name__ == '__main__':
    model = Mynet()
    input_tensor = torch.randn(2, 3, 640, 640)  # [batch_size, channels, height, width]
    output = model(input_tensor)
    print(output.shape)
    print(output)

输出如下

C:\ProgramData\anaconda3\envs\test\python.exe D:\testpy\t3\mynet.py 
torch.Size([2, 50])
tensor([[ 0.0233, -0.0229,  0.0055, -0.0183, -0.0030, -0.0113,  0.0067, -0.0149,
         -0.0029, -0.0306,  0.0057, -0.0313, -0.0044,  0.0331, -0.0396, -0.0164,
         -0.0314, -0.0104,  0.0073, -0.0276,  0.0185, -0.0026,  0.0081, -0.0165,
          0.0010, -0.0154,  0.0317, -0.0376, -0.0406, -0.0342, -0.0437,  0.0161,
          0.0201,  0.0106,  0.0272,  0.0364,  0.0103,  0.0046,  0.0009, -0.0325,
         -0.0076,  0.0132,  0.0452, -0.0433, -0.0031, -0.0044,  0.0375,  0.0582,
         -0.0279, -0.0187],
        [ 0.0304, -0.0280,  0.0054, -0.0156, -0.0057, -0.0115,  0.0080, -0.0108,
         -0.0022, -0.0272,  0.0045, -0.0320, -0.0045,  0.0336, -0.0401, -0.0197,
         -0.0296, -0.0060,  0.0100, -0.0298,  0.0157, -0.0032,  0.0140, -0.0183,
         -0.0010, -0.0147,  0.0301, -0.0385, -0.0417, -0.0319, -0.0417,  0.0161,
          0.0219,  0.0117,  0.0325,  0.0342,  0.0086,  0.0044,  0.0009, -0.0329,
         -0.0074,  0.0090,  0.0483, -0.0435, -0.0047, -0.0055,  0.0369,  0.0575,
         -0.0293, -0.0212]], grad_fn=<AddmmBackward0>)

进程已结束,退出代码为 0

如何归一化详见带选框的图片大小归一化\使用opencv进行图像输入神经网络前的归一尺寸处理

以下是刻晴参演的模型(未训练)输出

import numpy as np
import torch
from torch import nn
import cv2
from img_resize import resize_and_pad_image

torch.manual_seed(0)  # 设置随机数种子确保实验可重复(其实就是设置pytorch里面的参数按照同一个随机数序列取值)


class Mynet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            # output_size=(input_size + 2×padding−kernel_size+1)
            # 保持原形状宽高不变: padding=(kernel_size-1)//2
            # [batch_size, channels:3, height:640, width:640]的图像作为输入
            nn.Conv2d(3, 16, 3, 1, 1),
            # [batch_size, channels:16, height:640, width:640]
            nn.ReLU(True),
            # Relu激活
            nn.Conv2d(16, 32, 5, 1, 2),
            # [batch_size, channels:32, height:640, width:640]
            nn.ReLU(True),
            nn.Conv2d(32, 16, 1, 1, 0),
            # 1*1卷积核进行通道整合
            # [batch_size, channels:16, height:640, width:640]
            nn.ReLU(True),
            # 最大池化层2*2卷积核步长为2(向右向下都是2)
            nn.MaxPool2d(2, 2),
            # [batch_size, channels:16, height:320, width:320]
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            # [batch_size, channels:16, height:160, width:160]
            nn.ReLU(True),
            nn.Conv2d(16, 8, 1, 1, 0),
            # [batch_size, channels:8, height:160, width:160]
            nn.ReLU(True),
            # 平均池化层2*2卷积核步长为2(向右向下都是2)
            nn.AvgPool2d(2, 2),
            # [batch_size, channels:8, height:80, width:80]
            nn.ReLU(True),
        )
        # 展开后为[batch_size, num=8*80*80=51200]
        self.linear = nn.Sequential(
            nn.Linear(51200, 1600),
            # 全连接层[batch_size, num:51200]->[batch_size, num:1600]
            nn.ReLU(True),
            # Relu激活
            nn.Linear(1600, 50),
            # 全连接层[batch_size, num:1600]->[batch_size, num:50]
        )

    def forward(self, x):
        x = self.conv(x)
        # 图片卷积: 由[batch_size, channels:3, height:640, width:640]到[batch_size, channels:8, height:80, width:80]
        x = x.view(x.shape[0], -1)
        # 将除批次信息之外的通道展开由[batch_size, channels:3, height:640, width:640]到[batch_size, num:51200]
        output = self.linear(x)
        # 特征输出[batch_size, num:50]
        return output


if __name__ == '__main__':
    model = Mynet()
    img = resize_and_pad_image('../A.jpg')  # 读入刻晴
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 将刻晴从BGR转为RGB,因为oopencv读入是BGR通道格式
    img_array = np.array(img, dtype=np.float32)  # 将刻晴修改为np的float32格式数组减小内存(或显存)占用
    img_array = img_array / 255.0   # 将刻晴数组归一化到0至1之间的小数(因为像素是0到255的uint8类型)
    image_tensor = torch.from_numpy(img_array).permute(2, 0, 1).unsqueeze(0)
    # 这里 permute(2, 0, 1)是为了将通道维度从最后一个维度移动到第一个维度(从 HWC 到 CHW),然后 unsqueeze(0) 是为了在最前面增加一个 batch_size 维度
    print(image_tensor.shape)  # 输出torch.Size([1, 3, 640, 640])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")   # 如果安装了cuda则使用显卡作为载体设备,否则用cpu
    image_tensor = image_tensor.to(device)  # 将刻晴转移到显存上QWQ
    model = model.to(device)    # 将模型转移到显存上
    result = model(image_tensor)
    print(result.shape)
    print(result)

<---这是刻晴orz

运行结果

C:\ProgramData\anaconda3\envs\test\python.exe D:\testpy\t3\mynet.py 
torch.Size([1, 3, 640, 640])
torch.Size([1, 50])
tensor([[ 1.6301e-02, -3.0197e-02,  4.7201e-03, -1.8853e-02, -2.5628e-04,
         -1.2649e-02, -1.3965e-03, -7.5116e-03,  6.9302e-03, -3.1824e-02,
         -4.9574e-05, -3.0658e-02,  9.6092e-05,  3.0587e-02, -4.1966e-02,
         -1.9422e-02, -3.7493e-02, -1.6072e-03,  1.3277e-02, -3.2400e-02,
          1.6708e-02, -1.9345e-03,  6.2375e-03, -2.4938e-02,  6.5929e-03,
         -1.5835e-02,  3.1258e-02, -4.9182e-02, -4.4705e-02, -3.6710e-02,
         -4.1149e-02,  2.9103e-02,  2.8033e-02,  1.6225e-02,  4.1443e-02,
          3.7884e-02,  1.6510e-03,  4.7750e-03,  5.7569e-03, -3.8279e-02,
         -1.5412e-02,  7.7273e-03,  5.8647e-02, -4.7992e-02, -9.9364e-04,
         -4.3572e-03,  4.6278e-02,  5.9638e-02, -2.0950e-02, -2.5759e-02]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

进程已结束,退出代码为 0

才才不会告诉你特征值有别的用处,况且还没有训练,且听下回分解

博猪也在努力学习,大家如果有好的方法可以在评论区评论,欢迎关注博猪一起学习

致力于架构自己的模型(雾),且永远开源

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值