class Unet(nn.Module):
def __init__(self, num_classes):
def forward(self, x):
return out
我需要的输出结果是这样的,图片按照代码和题目要求输出,包括Original Image
Ground Truth
Prediction三部分,都要有对应的输出,并且参与测试的图片都要输出,需要补全上述代码,英文输出:
输出结果:
Starting training...
Epoch 1/20: 100%|██████████| 46/46 [00:15<00:00, 3.04it/s, loss=2.49]Epoch 1/20, Training Loss: 2.8437
Validation Loss: 2.4612
New best model with validation loss: 2.4612
Epoch 2/20: 100%|██████████| 46/46 [00:15<00:00, 3.00it/s, loss=1.59]Epoch 2/20, Training Loss: 2.0684
Validation Loss: 1.5868
New best model with validation loss: 1.5868
Epoch 3/20: 100%|██████████| 46/46 [00:15<00:00, 3.00it/s, loss=1.26]Epoch 3/20, Training Loss: 1.3412
Validation Loss: 1.1896
New best model with validation loss: 1.1896
Epoch 4/20: 100%|██████████| 46/46 [00:15<00:00, 3.02it/s, loss=1.16]Epoch 4/20, Training Loss: 1.0508
Validation Loss: 1.0617
New best model with validation loss: 1.0617
Epoch 5/20: 100%|██████████| 46/46 [00:15<00:00, 2.99it/s, loss=0.812]
Epoch 5/20, Training Loss: 0.9584
Validation Loss: 1.0257
New best model with validation loss: 1.0257
Epoch 6/20: 100%|██████████| 46/46 [00:15<00:00, 2.96it/s, loss=0.841]Epoch 6/20, Training Loss: 0.9038
Validation Loss: 1.0027
New best model with validation loss: 1.0027
Epoch 7/20: 100%|██████████| 46/46 [00:16<00:00, 2.84it/s, loss=0.77]Epoch 7/20, Training Loss: 0.8736
Validation Loss: 0.9764
New best model with validation loss: 0.9764
Epoch 8/20: 100%|██████████| 46/46 [00:16<00:00, 2.87it/s, loss=0.809]Epoch 8/20, Training Loss: 0.8373
Validation Loss: 0.9694
New best model with validation loss: 0.9694
Epoch 9/20: 100%|██████████| 46/46 [00:15<00:00, 2.99it/s, loss=1.04]Epoch 9/20, Training Loss: 0.8129
Validation Loss: 0.9442
New best model with validation loss: 0.9442
Epoch 10/20: 100%|██████████| 46/46 [00:15<00:00, 3.00it/s, loss=0.838]Epoch 10/20, Training Loss: 0.7859
Validation Loss: 0.9309
New best model with validation loss: 0.9309
Epoch 11/20: 100%|██████████| 46/46 [00:15<00:00, 3.01it/s, loss=0.799]Epoch 11/20, Training Loss: 0.7673
Validation Loss: 0.9087
New best model with validation loss: 0.9087
Epoch 12/20: 100%|██████████| 46/46 [00:15<00:00, 3.02it/s, loss=0.673]Epoch 12/20, Training Loss: 0.7386
Validation Loss: 0.9185
Epoch 13/20: 100%|██████████| 46/46 [00:15<00:00, 3.00it/s, loss=0.638]Epoch 13/20, Training Loss: 0.6899
Validation Loss: 0.8576
New best model with validation loss: 0.8576
Epoch 14/20: 100%|██████████| 46/46 [00:15<00:00, 3.01it/s, loss=0.553]Epoch 14/20, Training Loss: 0.6538
Validation Loss: 0.8267
New best model with validation loss: 0.8267
Epoch 15/20: 100%|██████████| 46/46 [00:14<00:00, 3.07it/s, loss=0.765]
Epoch 15/20, Training Loss: 0.6342
Validation Loss: 0.8240
New best model with validation loss: 0.8240
Epoch 16/20: 100%|██████████| 46/46 [00:15<00:00, 2.99it/s, loss=0.688]Epoch 16/20, Training Loss: 0.6203
Validation Loss: 0.8336
Epoch 17/20: 100%|██████████| 46/46 [00:15<00:00, 2.99it/s, loss=0.518]Epoch 17/20, Training Loss: 0.6099
Validation Loss: 0.8014
New best model with validation loss: 0.8014
Epoch 18/20: 100%|██████████| 46/46 [00:15<00:00, 2.93it/s, loss=0.444]Epoch 18/20, Training Loss: 0.6023
Validation Loss: 0.8169
Epoch 19/20: 100%|██████████| 46/46 [00:15<00:00, 2.98it/s, loss=0.822]Epoch 19/20, Training Loss: 0.5885
Validation Loss: 0.8045
Epoch 20/20: 100%|██████████| 46/46 [00:15<00:00, 2.90it/s, loss=0.425]
Epoch 20/20, Training Loss: 0.5659
Validation Loss: 0.7840
New best model with validation loss: 0.7840
Training finished!
<ipython-input-5-1f21aef180ff>:213: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
model.load_state_dict(torch.load("best_segmentation_model.pth"))
Model saved to simple_segmentation_model.pth
Visualizing model predictions:
而且还要满足题目要求:
Task 1. Implement Unet and train it on the PASCAL VOC dataset
The Unet paper is here: https://arxiv.org/pdf/1505.04597
Use any number of tricks that you can
You cannot use pretrained models, though (until we learn about transfer learning)
You must achieve > 15 mean IOU (the code for evaluation is in the end of the notebook)
Grading rubric:
mean IOU > 15, 10 points
mean 12 < IOU <= 15, 8 points
mean 10 <= IOU <= 12, 5 points
mean IOU < 10, 0 points
Important: you need to achieve 10 and more IOU using all 21 classes from PASCAL VOC
In the end of the notebook you must execute the last cell and pass the tests, otherwise you will receive 0.
其中不可修改的代码要保证全部正常输出:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
import torchvision.models as models
from torchvision.datasets import VOCSegmentation
from tqdm import tqdm
torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
DATA_DIR = "./data"
BATCH_SIZE = 32
NUM_EPOCHS = 20 # Increased to get better results
LEARNING_RATE = 0.0001 # Lowered to improve stability
IMAGE_SIZE = (224, 224)
# PASCAL VOC has 21 classes (including background)
NUM_CLASSES = 21
# PASCAL VOC class labels for visualization
VOC_CLASSES = [
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
# Color map for visualization
VOC_COLORMAP = [
[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
[128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0],
[64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128], [64, 128, 128],
[192, 128, 128], [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
[0, 64, 128]
]
class SegmentationTransform:
def __init__(self, size, is_train=False):
self.size = size
self.is_train = is_train
def __call__(self, image, mask):
if self.is_train and np.random.random() > 0.5:
image = TF.hflip(image)
mask = TF.hflip(mask)
if self.is_train and np.random.random() > 0.7:
angle = np.random.randint(-10, 10)
image = TF.rotate(image, angle, interpolation=Image.BILINEAR)
mask = TF.rotate(mask, angle, interpolation=Image.NEAREST)
if self.is_train and np.random.random() > 0.7:
brightness_factor = np.random.uniform(0.8, 1.2)
contrast_factor = np.random.uniform(0.8, 1.2)
image = TF.adjust_brightness(image, brightness_factor)
image = TF.adjust_contrast(image, contrast_factor)
image = TF.resize(image, self.size, interpolation=Image.BILINEAR)
mask = TF.resize(mask, self.size, interpolation=Image.NEAREST)
image = TF.to_tensor(image)
mask_array = np.array(mask)
mask_array[mask_array == 255] = 0 # Set ignore pixels to background
mask = torch.from_numpy(mask_array).long()
# Normalize image
image = TF.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
return image, mask
class VOCDatasetWrapper(Dataset):
def __init__(self, dataset, transform=None):
self.dataset = dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
image, mask = self.dataset[idx]
if self.transform:
image, mask = self.transform(image, mask)
return image, mask
voc_train = VOCSegmentation(root=DATA_DIR, year='2012', image_set='train', download=True)
voc_val = VOCSegmentation(root=DATA_DIR, year='2012', image_set='val', download=True)
train_transform = SegmentationTransform(IMAGE_SIZE, is_train=True)
val_transform = SegmentationTransform(IMAGE_SIZE, is_train=False)
train_dataset = VOCDatasetWrapper(voc_train, train_transform)
val_dataset = VOCDatasetWrapper(voc_val, val_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) # Reduced workers
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) # Reduced workers
# Display some examples from the dataset
def visualize_examples(dataset, num_examples=3):
fig, axes = plt.subplots(num_examples, 2, figsize=(12, 4 * num_examples))
for i in range(num_examples):
# Get a sample
idx = np.random.randint(0, len(dataset))
image, mask = dataset.dataset[idx]
# Original image
axes[i, 0].imshow(image)
axes[i, 0].set_title(f"Original Image {idx}")
axes[i, 0].axis('off')
# Colored mask
colored_mask = np.zeros((mask.size[1], mask.size[0], 3), dtype=np.uint8)
mask_array = np.array(mask)
for class_idx, color in enumerate(VOC_COLORMAP):
colored_mask[mask_array == class_idx] = color
axes[i, 1].imshow(colored_mask)
axes[i, 1].set_title(f"Segmentation Mask {idx}")
axes[i, 1].axis('off')
plt.tight_layout()
plt.show()
# Visualize examples before training
print("Displaying dataset examples:")
visualize_examples(train_dataset)
import torch
def evaluate_segmentation(model, val_loader, num_classes, device='cuda'):
model.eval()
confusion_matrix = torch.zeros(num_classes, num_classes, dtype=torch.long, device=device)
ignore_index = 255
with torch.no_grad():
for images, masks in val_loader:
images = images.to(device)
masks = masks.to(device)
outputs = model(images)
preds = torch.argmax(outputs, dim=1) # [B, H, W]
preds = preds.view(-1)
masks = masks.view(-1)
# Filter out ignore pixels
valid_mask = (masks != ignore_index)
preds = preds[valid_mask]
gt = masks[valid_mask]
# Vectorized confusion matrix update
indices = gt * num_classes + preds # also on the GPU
bins = torch.bincount(indices, minlength=num_classes*num_classes)
confusion_matrix += bins.reshape(num_classes, num_classes)
# Move confusion matrix back to CPU if you need .item() or numpy
confusion_matrix = confusion_matrix.cpu()
# Compute IoU
class_iou = []
for c in range(num_classes):
TP = confusion_matrix[c, c].item()
FN = confusion_matrix[c, :].sum().item() - TP
FP = confusion_matrix[:, c].sum().item() - TP
denom = TP + FP + FN
if denom == 0:
iou_c = float('nan')
else:
iou_c = TP / denom
class_iou.append(iou_c)
# mean_iou
valid_iou = [x for x in class_iou if not np.isnan(x)]
mean_iou = float(np.mean(valid_iou)) if len(valid_iou) > 0 else 0.0
return class_iou, mean_iou
class_iou, mean_iou = evaluate_segmentation(
model=trained_model,
val_loader=val_loader,
num_classes=NUM_CLASSES,
device=device
)
# Print results
for i, iou_val in enumerate(class_iou):
print(f"Class {i} IoU = {iou_val:.4f}")
print(f"Mean IoU over {len(class_iou)} classes = {mean_iou:.4f}")
尤其是这部分一定要保证可以正常输出但不能更改代码:
assert mean_iou > 0.10, 'Your IOU must be larger than 10 to get the grade'
if mean_iou > 0.15:
print('Full grade, 10 points')
elif 0.12 < mean_iou <= 0.15:
print('Partial grade, 8 points')
elif 0.10 < mean_iou <= 0.12:
print('Partial grade, 5 points')
else:
print('IOU is less than 10, 0 points')
print('All tests pass!')
最新发布