PyTorch学习：参数初始化_pytorch线性层初始化-CSDN博客

本文介绍了如何在PyTorch中对Sequential模型和自定义Module模型进行参数初始化的方法，包括直接修改权重张量和使用torch.nn.init提供的初始化函数。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Sequential 模型的参数初始化


import numpy as np
import torch
from torch import nn

# 定义一个 Sequential 模型
net1 = nn.Sequential(
                nn.Linear(2, 4),　＃nn.Linear(2, 4)　shape为(4,2)
                nn.ReLU(),
                nn.Linear(4, 5),
                nn.ReLU(),
                nn.Linear(5, 2)
               )

# 访问第一层的参数
w1 = net1[0].weight
b1 = net1[0].bias
print(w1)

# 定义一个 Tensor 直接对其进行替换
net1[0].weight.data = torch.from_numpy(np.random.uniform(3, 5, size=(4, 2)))
print(net1[0].weight)
for layer in net1:
    if isinstance(layer, nn.Linear): # 判断是否是线性层
    param_shape = layer.weight.shape
    layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))

# 定义为均值为 0，方差为 0.5 的正态分布

Module 模型的参数初始化

class sim_net(nn.Module):
    def __init__(self):
		super(sim_net, self).__init__()
		self.l1 = nn.Sequential(nn.Linear(2, 4),nn.ReLU())
	    self.l1[0].weight.data = torch.randn(4, 2) # 直接对某一层初始化
	    self.l2 = nn.Sequential(nn.Linear(4, 5),nn.ReLU())
	    self.l3 = nn.Sequential(nn.Linear(5, 2),nn.ReLU())
	def forward(self, x):
		x = self.l1(x)
		x = self.l2(x)
		x = self.l3(x)
		return x
net2 = sim_net()
a=0

# 访问 children
for i in net2.children():
	print(a)
	a+=1
	print(i)

0
Sequential(
(0): Linear(in_features=2, out_features=4, bias=True)
(1): ReLU()
)
1
Sequential(
(0): Linear(in_features=4, out_features=5, bias=True)
(1): ReLU()
)
2
Sequential(
(0): Linear(in_features=5, out_features=2, bias=True)
(1): ReLU()
)

# # 访问 modules
a=0
for i in net2.modules():
print(a)
a+=1
print(i)
#children 只会访问到模型定义中的第一层，因为上面的模型中定义了三个 Sequential，
#所以只会访问到三个 Sequential，而 modules 会访问到最后的结构.
#比如上面的例子，modules 不仅访问到了 Sequential，也访问到了 Sequential 里面，这就对我们做初始化非常方便.

0
sim_net(
(l1): Sequential(
    (0): Linear(in_features=2, out_features=4, bias=True)
    (1): ReLU()
)
(l2): Sequential(
    (0): Linear(in_features=4, out_features=5, bias=True)
    (1): ReLU()
)
(l3): Sequential(
    (0): Linear(in_features=5, out_features=2, bias=True)
    (1): ReLU()
)
)
1
Sequential(
(0): Linear(in_features=2, out_features=4, bias=True)
(1): ReLU()
)
2
Linear(in_features=2, out_features=4, bias=True)
3
ReLU()
4
Sequential(
(0): Linear(in_features=4, out_features=5, bias=True)
(1): ReLU()
)
5
Linear(in_features=4, out_features=5, bias=True)
6
ReLU()
7
Sequential(
(0): Linear(in_features=5, out_features=2, bias=True)
(1): ReLU()
)
8
Linear(in_features=5, out_features=2, bias=True)
9
ReLU()

for layer in net2.modules():
if isinstance(layer, nn.Linear):
print(layer)
param_shape = layer.weight.shape
layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))

Linear(in_features=2, out_features=4, bias=True)
Linear(in_features=4, out_features=5, bias=True)
Linear(in_features=5, out_features=2, bias=True)