import os import torch import matplotlib.pyplot as plt from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection import torchvision.transforms as T from transformers import DetrImageProcessor, DetrForObjectDetection from pycocotools.coco import COCO from tqdm import tqdm # === 参数配置 === image_dir = "coco_aug/images" ann_file = "coco_aug/annotations/instances_train_augmented.json" save_path = "detr_model_loss_plot" epochs = 50 batch_size = 8 lr = 5e-6 # === 类别信息 === coco = COCO(ann_file) cats = coco.loadCats(coco.getCatIds()) id2label = {cat["id"]: cat["name"] for cat in cats} label2id = {v: k for k, v in id2label.items()} num_classes = len(id2label) # === 加载模型与处理器 === processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained( "facebook/detr-resnet-50", num_labels=num_classes, ignore_mismatched_sizes=True ) model.config.id2label = id2label model.config.label2id = label2id device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"🖥️ 当前设备: {device}") model.to(device) model.train() # === 加载数据集 === transform = T.Compose([ T.Resize((512, 512)), # 可改为 320 提速 T.ToTensor() ]) dataset = CocoDetection(root=image_dir, annFile=ann_file, transform=transform) def collate_fn(batch): return list(zip(*batch)) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) optimizer = torch.optim.AdamW(model.parameters(), lr=lr) # === Loss 日志 === loss_list = [] # === 开始训练 === for epoch in range(1, epochs + 1): total_loss = 0.0 for images, targets in tqdm(dataloader, desc=f"Epoch {epoch}"): valid_images = [] valid_targets = [] for i, anns in enumerate(targets): if len(anns) == 0: continue valid_images.append(images[i]) valid_targets.append({ "image_id": anns[0]["image_id"], "annotations": [ { "bbox": ann["bbox"], "category_id": ann["category_id"], "area": ann["bbox"][2] * ann["bbox"][3], "iscrowd": ann.get("iscrowd", 0) } for ann in anns ] }) if not valid_images: continue inputs = processor( images=valid_images, annotations=valid_targets, return_tensors="pt", do_rescale=False # ✅ 避免重复归一化 ) inputs = {k: (v.to(device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()} outputs = model(**inputs) loss = outputs.loss loss.backward() optimizer.step() optimizer.zero_grad() total_loss += loss.item() avg_loss = total_loss / len(dataloader) loss_list.append(avg_loss) print(f"[Epoch {epoch}] Loss: {avg_loss:.4f}") # === 模型保存 === model.save_pretrained(save_path) processor.save_pretrained(save_path) print(f"✅ 模型已保存至:{save_path}") # === 绘制 Loss 曲线 === plt.figure(figsize=(10, 5)) plt.plot(loss_list, label="Total Loss", color="blue") plt.xlabel("Epoch") plt.ylabel("Loss") plt.title("Total Loss Curve") plt.grid(True) plt.legend() plt.tight_layout() plt.savefig("loss_curve.png") plt.show() print("📉 Loss 曲线图已保存为 loss_curve.png")报错Traceback (most recent call last): File "C:\Users\30634\my_detr_project\train_detr.py", line 91, in <module> outputs = model(**inputs) ^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\transformers\models\detr\modeling_detr.py", line 1417, in forward loss, loss_dict, auxiliary_outputs = self.loss_function( ^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\transformers\loss\loss_for_object_detection.py", line 552, in ForObjectDetectionLoss loss_dict = criterion(outputs_loss, labels) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\transformers\loss\loss_for_object_detection.py", line 253, in forward indices = self.matcher(outputs_without_aux, targets) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1751, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1762, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\transformers\loss\loss_for_object_detection.py", line 350, in forward bbox_cost = torch.cdist(out_bbox, target_bbox, p=1) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\30634\my_detr_project\.venv\Lib\site-packages\torch\functional.py", line 1505, in cdist return _VF.cdist(x1, x2, p, None) # type: ignore[attr-defined] ^^^^^^^^^^^^^^^^^^^^^^^^^^ RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument x2 in method wrapper_CUDA___cdist_forward)
时间: 2025-06-18 08:56:40 浏览: 25
从报错信息来看,问题出在 `torch.cdist` 函数中。具体来说,`torch.cdist` 需要两个输入张量(`out_bbox` 和 `target_bbox`)位于同一个设备上,但当前代码中存在一个张量在 GPU 上(`cuda:0`),而另一个张量在 CPU 上。
### 问题分析
1. **错误来源**:
- `out_bbox` 和 `target_bbox` 的设备不一致。
- `target_bbox` 可能没有被正确地移动到 GPU 上。
2. **解决方案**:
- 确保所有输入数据(包括 `targets` 中的张量)都被移动到与模型相同的设备上。
---
### 修改后的代码
以下是修复后的代码部分:
```python
# === 开始训练 ===
for epoch in range(1, epochs + 1):
total_loss = 0.0
for images, targets in tqdm(dataloader, desc=f"Epoch {epoch}"):
valid_images = []
valid_targets = []
for i, anns in enumerate(targets):
if len(anns) == 0:
continue
valid_images.append(images[i])
valid_targets.append({
"image_id": anns[0]["image_id"],
"annotations": [
{
"bbox": torch.tensor(ann["bbox"], dtype=torch.float32).to(device), # 移动到 device
"category_id": ann["category_id"],
"area": ann["bbox"][2] * ann["bbox"][3],
"iscrowd": ann.get("iscrowd", 0)
} for ann in anns
]
})
if not valid_images:
continue
inputs = processor(
images=valid_images,
annotations=valid_targets,
return_tensors="pt",
do_rescale=False # ✅ 避免重复归一化
)
inputs = {k: (v.to(device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
# 确保 targets 中的所有张量也在 device 上
for target in valid_targets:
for annotation in target["annotations"]:
if isinstance(annotation["bbox"], torch.Tensor):
annotation["bbox"] = annotation["bbox"].to(device)
outputs = model(**inputs)
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)
loss_list.append(avg_loss)
print(f"[Epoch {epoch}] Loss: {avg_loss:.4f}")
```
---
### 代码解释
1. **目标张量移动到 GPU**:
- 在处理 `targets` 时,将每个标注框(`bbox`)转换为 `torch.Tensor` 并显式调用 `.to(device)` 方法将其移动到 GPU 上。
- 这样可以确保 `out_bbox` 和 `target_bbox` 都位于同一设备上。
2. **输入数据处理**:
- 使用 `processor` 处理图像和标注数据,并将返回的张量移动到指定设备(`device`)。
3. **模型前向传播**:
- 调用 `model(**inputs)` 进行前向传播。
- 计算损失并进行反向传播。
---
###
阅读全文