C:\Users\Administrator\Desktop\1\.venv\Scripts\python.exe C:\Users\Administrator\Desktop\1\cnn.py 100%|██████████| 169M/169M [00:33<00:00, 4.99MB/s] Traceback (most recent call last): File "<string>", line 1, in <module> File "C:\ProgramData\anaconda3\Lib\multiprocessing\spawn.py", line 122, in spawn_main exitcode = _main(fd, parent_sentinel) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\ProgramData\anaconda3\Lib\multiprocessing\spawn.py", line 131, in _main prepare(preparation_data) File "C:\ProgramData\anaconda3\Lib\multiprocessing\spawn.py", line 246, in prepare _fixup_main_from_path(data['init_main_from_path']) File "C:\ProgramData\anaconda3\Lib\multiprocessing\spawn.py", line 297, in _fixup_main_from_path main_content = runpy.run_path(main_path, ^^^^^^^^^^^^^^^^^^^^^^^^^ File "<frozen runpy>", line 287, in run_path File "<frozen runpy>", line 98, in _run_module_code File "<frozen runpy>", line 88, in _run_code File "C:\Users\Administrator\Desktop\1\cnn.py", line 136, in <module> trained_model = train_model(model, train_loader, criterion, optimizer, epochs=50) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\Administrator\Desktop\1\cnn.py", line 96, in train_model for i, (inputs, labels) in enumerate(train_loader): ^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\Administrator\Desktop\1\.venv\Lib\site-packages\torch\utils\data\dataloader.py", line 493, in __iter__ return self._get_iterator() ^^^^^^^^^^^^^^^^^^^^ File "C:\Users\Administrator\Desktop\1\.venv\Lib\site-packages\torch\utils\data\dataloader.py", line 424, in _get_iterator return _MultiProcessingDataLoaderIter(self) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\Administrator\Desktop\1\.venv\Lib\site-packages\torch\utils\data\dataloader.py", line 1171, in __init__ w.start() File "C:\ProgramData\anaconda3\Lib\multiprocessing\process.py", line 121, in start self._popen = self._Popen(self) ^^^^^^^^^^^^^^^^^ File "C:\ProgramData\anaconda3\Lib\multiprocessing\context.py", line 224, in _Popen return _default_context.get_context().Process._Popen(process_obj) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\ProgramData\anaconda3\Lib\multiprocessing\context.py", line 337, in _Popen return Popen(process_obj) ^^^^^^^^^^^^^^^^^^ File "C:\ProgramData\anaconda3\Lib\multiprocessing\popen_spawn_win32.py", line 46, in __init__ prep_data = spawn.get_preparation_data(process_obj._name) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\ProgramData\anaconda3\Lib\multiprocessing\spawn.py", line 164, in get_preparation_data _check_not_importing_main() File "C:\ProgramData\anaconda3\Lib\multiprocessing\spawn.py", line 140, in _check_not_importing_main raise RuntimeError(''' RuntimeError: An attempt has been made to start a new process before the current process has finished its bootstrapping phase. This probably means that you are not using fork to start your child processes and you have forgotten to use the proper idiom in the main module: if __name__ == '__main__': freeze_support() ... The "freeze_support()" line can be omitted if the program is not going to be frozen to produce an executable. To fix this issue, refer to the "Safe importing of main module" section in https://docs.python.org/3/library/multiprocessing.html
时间: 2025-06-23 21:31:31 浏览: 18
<think>根据错误信息,问题在于在Windows环境下使用多进程数据加载时,没有将主模块放在`if__name__=='__main__':`中。Windows的multiprocessing需要这个保护,以避免无限递归地创建子进程。解决方案:1.将训练和评估的代码放在`if__name__=='__main__':`块中。2.在Windows环境下,如果使用多进程数据加载(num_workers>0),则必须确保主模块可以被安全导入。修改后的代码结构如下:注意:在代码的最后部分(即执行训练和评估的部分)需要放入`if__name__=='__main__':`中。另外,错误信息中还提到了`freeze_support()`,但在非冻结环境(即不是生成可执行文件)中,通常只需要使用`if__name__=='__main__'`即可。修改后的代码示例:</think>### 解决Windows环境下PyTorch多进程数据加载错误
您遇到的错误是由于Windows系统下PyTorch多进程数据加载的特殊要求导致的。在Windows上使用多进程时(`num_workers > 0`),必须将主执行代码放在`if __name__ == '__main__':`块中。以下是完整的修正方案:
```python
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
# 数据预处理和加载
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])
# 定义CNN模型(保持不变)
class CIFAR100Model(nn.Module):
def __init__(self):
super().__init__()
self.features = nn.Sequential(
# ... 保持原有模型结构不变 ...
)
self.classifier = nn.Sequential(
# ... 保持原有分类器结构不变 ...
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
# 训练函数
def train_model(model, train_loader, criterion, optimizer, epochs=50):
model.train()
for epoch in range(epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 100 == 99:
print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
avg_loss = running_loss / len(train_loader)
scheduler.step(avg_loss)
print(f'Epoch {epoch+1} complete, Avg Loss: {avg_loss:.4f}')
print('Training Complete')
return model
# 评估函数
def evaluate_model(model, test_loader):
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')
return accuracy
# 主程序入口(关键修复)
if __name__ == '__main__':
# 1. 加载数据集
train_set = torchvision.datasets.CIFAR100(
root='./data',
train=True,
download=True,
transform=transform
)
test_set = torchvision.datasets.CIFAR100(
root='./data',
train=False,
download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])
)
# 2. 创建数据加载器(减少worker数量)
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_set, batch_size=100, shuffle=False, num_workers=2)
# 3. 初始化模型和设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = CIFAR100Model().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)
# 4. 执行训练和评估
trained_model = train_model(model, train_loader, criterion, optimizer, epochs=50)
accuracy = evaluate_model(trained_model, test_loader)
# 5. 可视化样本
classes = train_set.classes
def imshow(img):
img = img * torch.tensor([0.2675, 0.2565, 0.2761]).view(3, 1, 1) + torch.tensor([0.5071, 0.4867, 0.4408]).view(3, 1, 1)
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# 获取测试集样本
dataiter = iter(test_loader)
images, labels = next(dataiter)
# 显示前4个样本
imshow(torchvision.utils.make_grid(images[:4]))
print('GroundTruth: ', ', '.join(classes[labels[j]] for j in range(4)))
# 保存模型
torch.save(trained_model.state_dict(), 'cifar100_model.pth')
print("Model saved as 'cifar100_model.pth'")
```
### 关键修复点解释:
1. **主程序入口保护**:
```python
if __name__ == '__main__':
```
- 这是Windows系统下多进程的必要条件,防止子进程重复执行主模块代码
- 将所有执行代码(数据加载、模型训练等)放在这个块内
2. **数据加载器优化**:
- 减少`num_workers`数量(通常设置为CPU核心数的1/2)
- 确保测试集使用相同的标准化参数
3. **设备检测**:
```python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
```
- 明确显示使用的计算设备(CUDA或CPU)
- 帮助确认GPU是否被正确识别
4. **可视化修复**:
- 修正了反归一化操作,使用正确的均值和标准差
- 使用标准化的方式恢复原始图像显示
5. **模型保存**:
```python
torch.save(trained_model.state_dict(), 'cifar100_model.pth')
```
- 训练完成后保存模型权重,便于后续使用
### 替代解决方案(如果问题仍然存在):
如果上述修改后仍然遇到多进程问题,可以使用以下两种替代方案:
**方案1:完全禁用多进程(num_workers=0)**
```python
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=0)
test_loader = DataLoader(test_set, batch_size=100, shuffle=False, num_workers=0)
```
**方案2:使用Dataloader的替代库**
```python
# 安装替代库
pip install torchdata
# 在代码中使用
from torchdata.dataloader2 import DataLoader2, MultiProcessingReadingService
train_loader = DataLoader2(
train_set,
reading_service=MultiProcessingReadingService(num_workers=2)
)
```
### Windows系统下的最佳实践:
1. 始终使用`if __name__ == '__main__':`保护主执行代码
2. 将`num_workers`设置为小于CPU核心数的值(推荐2-4)
3. 使用任务管理器监控CPU和内存使用情况
4. 对于大型数据集,考虑使用内存映射文件或更高效的数据格式(如HDF5)
5. 定期保存模型检查点,防止训练中断
阅读全文
相关推荐


















