transformer回归预测 python代码
时间: 2025-06-10 14:17:51 浏览: 20
### 基于Transformer的回归预测实现
以下是基于Python和PyTorch框架的一个简单示例,展示如何利用Transformer模型进行回归预测。此代码结合了Transformer架构的核心组件以及训练过程中的细节。
#### 数据准备
为了演示方便,假设我们有一个简单的数据集用于回归任务。输入是一个时间序列信号,目标是预测下一个时刻的数值。
```python
import torch
import torch.nn as nn
from torch.optim import Adam
class PositionalEncoding(nn.Module):
"""位置编码模块"""
def __init__(self, d_model, max_len=5000):
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
return x + self.pe[:, :x.size(1)]
class TransformerModel(nn.Module):
"""完整的Transformer模型"""
def __init__(self, input_dim, output_dim, d_model=512, nhead=8, num_layers=6, dropout=0.1):
super(TransformerModel, self).__init__()
encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
self.pos_encoder = PositionalEncoding(d_model)
self.fc_in = nn.Linear(input_dim, d_model)
self.fc_out = nn.Linear(d_model, output_dim)
self.init_weights()
def init_weights(self):
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
def forward(self, src):
src = self.fc_in(src) # 输入线性变换
src = self.pos_encoder(src) # 添加位置编码
output = self.transformer_encoder(src) # 编码器处理
output = self.fc_out(output[:, -1]) # 只取最后一个时间步作为输出并映射到目标维度
return output
```
#### 模型训练与测试
以下是如何使用上述模型完成回归任务的具体流程:
```python
def train(model, data_loader, optimizer, criterion, device='cpu'):
model.train()
total_loss = 0
for batch_idx, (inputs, targets) in enumerate(data_loader):
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs.float()) # 前向传播
loss = criterion(outputs.squeeze(), targets.float()) # 计算损失
# L2正则化项
l2_lambda = 0.001
l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
loss += l2_lambda * l2_norm
optimizer.zero_grad() # 清除梯度
loss.backward() # 反向传播
optimizer.step() # 更新权重
total_loss += loss.item()
avg_loss = total_loss / len(data_loader)
return avg_loss
def evaluate(model, test_loader, criterion, device='cpu'):
model.eval()
total_loss = 0
with torch.no_grad():
for inputs, targets in test_loader:
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs.float())
loss = criterion(outputs.squeeze(), targets.float())
total_loss += loss.item()
avg_loss = total_loss / len(test_loader)
return avg_loss
```
#### 主程序逻辑
构建一个简单的主循环来运行训练和评估阶段。
```python
if __name__ == "__main__":
# 参数设置
input_dim = 10 # 输入特征数
output_dim = 1 # 输出维度(单变量回归)
d_model = 64 # 隐藏层尺寸
nhead = 4 # 多头注意力机制数量
num_layers = 3 # 编码层数量
# 初始化模型、优化器和损失函数
model = TransformerModel(input_dim=input_dim, output_dim=output_dim, d_model=d_model, nhead=nhead, num_layers=num_layers)
optimizer = Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss() # 回归任务通常采用均方误差
# 构建虚拟数据加载器
from torch.utils.data import DataLoader, TensorDataset
X_train = torch.randn((100, 10, input_dim)) # 批次大小为100,序列长度为10
y_train = torch.randn((100, output_dim))
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
epochs = 10
for epoch in range(epochs):
train_loss = train(model, dataloader, optimizer, criterion)
print(f'Epoch {epoch+1}/{epochs}, Training Loss: {train_loss:.4f}')
```
---
### 关键点说明
1. **Transformer Encoder**: 上述代码仅实现了Transformer的编码器部分,适用于回归任务[^4]。
2. **L2正则化**: 在训练过程中加入了L2正则化以防止过拟合[^3]。
3. **自定义数据集**: 用户可以根据实际需求调整`X_train`和`y_train`的数据形式。
---
阅读全文
相关推荐


















