"""
Train a YOLOv5 model on a custom dataset.
Models and datasets download automatically from the latest YOLOv5 release.
Models: https://github.com/ultralytics/yolov5/tree/master/models
Datasets: https://github.com/ultralytics/yolov5/tree/master/data
Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
Usage:
$ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED)
$ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch
"""
import argparse
import math
import os
import random
import sys
import time
from copy import deepcopy
from datetime import datetime
from pathlib import Path
import numpy as np
import torch
import torch.distributed as dist
import torch.nn as nn
import yaml
from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import SGD, Adam, AdamW, lr_scheduler
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT))
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
import val
from models.experimental import attempt_load
from models.yolo import Model
from utils.autoanchor import check_anchors
from utils.autobatch import check_train_batch_size
from utils.callbacks import Callbacks
from utils.datasets import create_dataloader
from utils.downloads import attempt_download
from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements,
check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle,
print_args, print_mutation, strip_optimizer)
from utils.loggers import Loggers
from utils.loggers.wandb.wandb_utils import check_wandb_resume
from utils.loss import ComputeLoss
from utils.metrics import fitness
from utils.plots import plot_evolve, plot_labels
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
RANK = int(os.getenv('RANK', -1))
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
def train(hyp,
opt,
device,
callbacks
):
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
w = save_dir / 'weights'
(w.parent if evolve else w).mkdir(parents=True, exist_ok=True)
last, best = w / 'last.pt', w / 'best.pt'
if isinstance(hyp, str):
with open(hyp, errors='ignore') as f:
hyp = yaml.safe_load(f)
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
if not evolve:
with open(save_dir / 'hyp.yaml', 'w') as f:
yaml.safe_dump(hyp, f, sort_keys=False)
with open(save_dir / 'opt.yaml', 'w') as f:
yaml.safe_dump(vars(opt), f, sort_keys=False)
data_dict = None
if RANK in [-1, 0]:
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)
if loggers.wandb:
data_dict = loggers.wandb.data_dict
if resume:
weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
for k in methods(loggers):
callbacks.register_action(k, callback=getattr(loggers, k))
plots = not evolve
cuda = device.type != 'cpu'
init_seeds(1 + RANK)
with torch_distributed_zero_first(LOCAL_RANK):
data_dict = data_dict or check_dataset(data)
train_path, val_path = data_dict['train'], data_dict['val']
nc = 1 if single_cls else int(data_dict['nc'])
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'
is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')
check_suffix(weights, '.pt')
pretrained = weights.endswith('.pt')
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights)
ckpt = torch.load(weights, map_location='cpu')
"""
两种加载模型的方式: opt.cfg / ckpt['model'].yaml
使用resume-断点训练: 选择ckpt['model']yaml创建模型, 且不加载anchor
使用断点训练时,保存的模型会保存anchor,所以不需要加载
"""
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []
csd = ckpt['model'].float().state_dict()
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)
model.load_state_dict(csd, strict=False)
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')
else:
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)
freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]
for k, v in model.named_parameters():
v.requires_grad = True
if any(x in k for x in freeze):
LOGGER.info(f'freezing {k}')
v.requires_grad = False
gs = max(int(model.stride.max()), 32)
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)
if RANK == -1 and batch_size == -1:
batch_size = check_train_batch_size(model, imgsz)
loggers.on_params_update({"batch_size": batch_size})
"""
nbs = 64
batchsize = 16
accumulate = 64 / 16 = 4
模型梯度累计accumulate次之后就更新一次模型 相当于使用更大batch_size
"""
nbs = 64
accumulate = max(round(nbs / batch_size), 1)
hyp['weight_decay'] *= batch_size * accumulate / nbs
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
g0, g1, g2 = [], [], []
for v in model.modules():
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
g2.append(v.bias)
if isinstance(v, nn.BatchNorm2d):
g0.append(v.weight)
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
g1.append(v.weight)
if opt.optimizer == 'Adam':
optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))
elif opt.optimizer == 'AdamW':
optimizer = AdamW(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))
else:
optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']})
optimizer.add_param_group({'params': g2})
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
f"{len(g0)} weight (no decay), {len(g1)} weight, {len(g2)} bias")
del g0, g1, g2
if opt.cos_lr:
lf = one_cycle(1, hyp['lrf'], epochs)
else:
lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf']
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
ema = ModelEMA(model) if RANK in [-1, 0] else None
start_epoch, best_fitness = 0, 0.0
if pretrained:
if ckpt['optimizer'] is not None:
optimizer.load_state_dict(ckpt['optimizer'])
best_fitness = ckpt['best_fitness']
if ema and ckpt.get('ema'):
ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
ema.updates = ckpt['updates']
start_epoch = ckpt['epoch'] + 1
if resume:
assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
if epochs < start_epoch:
LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
epochs += ckpt['epoch']
del ckpt, csd
if cuda and RANK == -1 and torch.cuda.device_count() > 1:
LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
model = torch.nn.DataParallel(model)
if opt.sync_bn and cuda and RANK != -1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
LOGGER.info('Using SyncBatchNorm()')
train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache,
rect=opt.rect, rank=LOCAL_RANK, workers=workers,
image_weights=opt.image_weights, quad=opt.quad,
prefix=colorstr('train: '), shuffle=True)
mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())
nb = len(train_loader)
assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
if RANK in [-1, 0]:
val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls,
hyp=hyp, cache=None if noval else opt.cache,
rect=True, rank=-1, workers=workers * 2, pad=0.5,
prefix=colorstr('val: '))[0]
if not resume:
labels = np.concatenate(dataset.labels, 0)
if plots:
plot_labels(labels, names, save_dir)
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
model.half().float()
callbacks.run('on_pretrain_routine_end')
if cuda and RANK != -1:
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
nl = de_parallel(model).model[-1].nl
hyp['box'] *= 3 / nl
hyp['cls'] *= nc / 80 * 3 / nl
hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl
hyp['label_smoothing'] = opt.label_smoothing
model.nc = nc
model.hyp = hyp
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
model.names = names
t0 = time.time()
nw = max(round(hyp['warmup_epochs'] * nb), 100)
last_opt_step = -1
maps = np.zeros(nc)
results = (0, 0, 0, 0, 0, 0, 0)
scheduler.last_epoch = start_epoch - 1
scaler = amp.GradScaler(enabled=cuda)
stopper = EarlyStopping(patience=opt.patience)
compute_loss = ComputeLoss(model)
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
f"Logging results to {colorstr('bold', save_dir)}\n"
f'Starting training for {epochs} epochs...')
for epoch in range(start_epoch, epochs):
model.train()
if opt.image_weights:
"""
如果设置进行图片采样策略,
则根据前面初始化的图片采样权重model.class_weights以及maps配合每张图片包含的类别数
通过random.choices生成图片索引indices从而进行采样
"""
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)
mloss = torch.zeros(3, device=device)
if RANK != -1:
train_loader.sampler.set_epoch(epoch)
pbar = enumerate(train_loader)
LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
if RANK in [-1, 0]:
pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')
optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar:
ni = i + nb * epoch
imgs = imgs.to(device, non_blocking=True).float() / 255
"""
热身训练(前nw次迭代)
在前nw次迭代中, 根据以下方式选取accumulate和学习率
"""
if ni <= nw:
xi = [0, nw]
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
for j, x in enumerate(optimizer.param_groups):
"""
bias的学习率从0.1下降到基准学习率lr*lf(epoch),
其他的参数学习率从0增加到lr*lf(epoch).
lf为上面设置的余弦退火的衰减函数
动量momentum也从0.9慢慢变到hyp['momentum'](default=0.937)
"""
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
if opt.multi_scale:
"""
Multi-scale 设置多尺度训练,从imgsz * 0.5, imgsz * 1.5 + gs随机选取尺寸
"""
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs
sf = sz / max(imgs.shape[2:])
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
with amp.autocast(enabled=cuda):
pred = model(imgs)
loss, loss_items = compute_loss(pred, targets.to(device))
if RANK != -1:
loss *= WORLD_SIZE
if opt.quad:
loss *= 4.
scaler.scale(loss).backward()
if ni - last_opt_step >= accumulate:
scaler.step(optimizer)
scaler.update()
optimizer.zero_grad()
if ema:
ema.update(model)
last_opt_step = ni
if RANK in [-1, 0]:
mloss = (mloss * i + loss_items) / (i + 1)
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'
pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % (
f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots, opt.sync_bn)
if callbacks.stop_training:
return
lr = [x['lr'] for x in optimizer.param_groups]
scheduler.step()
if RANK in [-1, 0]:
callbacks.run('on_train_epoch_end', epoch=epoch)
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
if not noval or final_epoch:
"""
测试使用的是ema(指数移动平均 对模型的参数做平均)的模型
results: [1] Precision 所有类别的平均precision(最大f1时)
[1] Recall 所有类别的平均recall
[1] map@0.5 所有类别的平均mAP@0.5
[1] map@0.5:0.95 所有类别的平均mAP@0.5:0.95
[1] box_loss 验证集回归损失, obj_loss 验证集置信度损失, cls_loss 验证集分类损失
maps: [80] 所有类别的mAP@0.5:0.95
"""
results, maps, _ = val.run(data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=ema.ema,
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
plots=False,
callbacks=callbacks,
compute_loss=compute_loss)
fi = fitness(np.array(results).reshape(1, -1))
if fi > best_fitness:
best_fitness = fi
log_vals = list(mloss) + list(results) + lr
callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
"""
保存带checkpoint的模型用于inference或resuming training
保存模型, 还保存了epoch, results, optimizer等信息
optimizer将不会在最后一轮完成后保存
model保存的是EMA的模型
"""
if (not nosave) or (final_epoch and not evolve):
ckpt = {'epoch': epoch,
'best_fitness': best_fitness,
'model': deepcopy(de_parallel(model)).half(),
'ema': deepcopy(ema.ema).half(),
'updates': ema.updates,
'optimizer': optimizer.state_dict(),
'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
'date': datetime.now().isoformat()}
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
torch.save(ckpt, w / f'epoch{epoch}.pt')
del ckpt
callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
if RANK == -1 and stopper(epoch=epoch, fitness=fi):
break
if RANK in [-1, 0]:
LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
for f in last, best:
if f.exists():
strip_optimizer(f)
if f is best:
LOGGER.info(f'\nValidating {f}...')
results, _, _ = val.run(data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=attempt_load(f, device).half(),
iou_thres=0.65 if is_coco else 0.60,
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
save_json=is_coco,
verbose=True,
plots=True,
callbacks=callbacks,
compute_loss=compute_loss)
if is_coco:
callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
callbacks.run('on_train_end', last, best, plots, epoch, results)
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
torch.cuda.empty_cache()
return results
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300)
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=True, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
parser.add_argument('--entity', default=None, help='W&B: Entity')
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
opt = parser.parse_known_args()[0] if known else parser.parse_args()
return opt
def main(opt, callbacks=Callbacks()):
if RANK in [-1, 0]:
print_args(FILE.stem, opt)
check_git_status()
check_requirements(exclude=['thop'])
if opt.resume and not check_wandb_resume(opt) and not opt.evolve:
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
opt = argparse.Namespace(**yaml.safe_load(f))
opt.cfg, opt.weights, opt.resume = '', ckpt, True
LOGGER.info(f'Resuming training from {ckpt}')
else:
opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
if opt.evolve:
if opt.project == str(ROOT / 'runs/train'):
opt.project = str(ROOT / 'runs/evolve')
opt.exist_ok, opt.resume = opt.resume, False
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
device = select_device(opt.device, batch_size=opt.batch_size)
if LOCAL_RANK != -1:
msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
assert not opt.image_weights, f'--image-weights {msg}'
assert not opt.evolve, f'--evolve {msg}'
assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
torch.cuda.set_device(LOCAL_RANK)
device = torch.device('cuda', LOCAL_RANK)
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
if not opt.evolve:
train(opt.hyp, opt, device, callbacks)
if WORLD_SIZE > 1 and RANK == 0:
LOGGER.info('Destroying process group... ')
dist.destroy_process_group()
else:
meta = {'lr0': (1, 1e-5, 1e-1),
'lrf': (1, 0.01, 1.0),
'momentum': (0.3, 0.6, 0.98),
'weight_decay': (1, 0.0, 0.001),
'warmup_epochs': (1, 0.0, 5.0),
'warmup_momentum': (1, 0.0, 0.95),
'warmup_bias_lr': (1, 0.0, 0.2),
'box': (1, 0.02, 0.2),
'cls': (1, 0.2, 4.0),
'cls_pw': (1, 0.5, 2.0),
'obj': (1, 0.2, 4.0),
'obj_pw': (1, 0.5, 2.0),
'iou_t': (0, 0.1, 0.7),
'anchor_t': (1, 2.0, 8.0),
'anchors': (2, 2.0, 10.0),
'fl_gamma': (0, 0.0, 2.0),
'hsv_h': (1, 0.0, 0.1),
'hsv_s': (1, 0.0, 0.9),
'hsv_v': (1, 0.0, 0.9),
'degrees': (1, 0.0, 45.0),
'translate': (1, 0.0, 0.9),
'scale': (1, 0.0, 0.9),
'shear': (1, 0.0, 10.0),
'perspective': (0, 0.0, 0.001),
'flipud': (1, 0.0, 1.0),
'fliplr': (0, 0.0, 1.0),
'mosaic': (1, 0.0, 1.0),
'mixup': (1, 0.0, 1.0),
'copy_paste': (1, 0.0, 1.0)}
with open(opt.hyp, errors='ignore') as f:
hyp = yaml.safe_load(f)
if 'anchors' not in hyp:
hyp['anchors'] = 3
opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)
evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
if opt.bucket:
os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}')
"""
遗传算法调参:遵循适者生存、优胜劣汰的法则,即寻优过程中保留有用的,去除无用的。
遗传算法需要提前设置4个参数: 群体大小/进化代数/交叉概率/变异概率
"""
for _ in range(opt.evolve):
if evolve_csv.exists():
parent = 'single'
x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
n = min(5, len(x))
x = x[np.argsort(-fitness(x))][:n]
w = fitness(x) - fitness(x).min() + 1E-6
if parent == 'single' or len(x) == 1:
x = x[random.choices(range(n), weights=w)[0]]
elif parent == 'weighted':
x = (x * w.reshape(n, 1)).sum(0) / w.sum()
mp, s = 0.8, 0.2
npr = np.random
npr.seed(int(time.time()))
g = np.array([meta[k][0] for k in hyp.keys()])
ng = len(meta)
v = np.ones(ng)
while all(v == 1):
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
for i, k in enumerate(hyp.keys()):
hyp[k] = float(x[i + 7] * v[i])
for k, v in meta.items():
hyp[k] = max(hyp[k], v[1])
hyp[k] = min(hyp[k], v[2])
hyp[k] = round(hyp[k], 5)
results = train(hyp.copy(), opt, device, callbacks)
callbacks = Callbacks()
print_mutation(results, hyp.copy(), save_dir, opt.bucket)
plot_evolve(evolve_csv)
LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
f"Results saved to {colorstr('bold', save_dir)}\n"
f'Usage example: $ python train.py --hyp {evolve_yaml}')
def run(**kwargs):
opt = parse_opt(True)
for k, v in kwargs.items():
setattr(opt, k, v)
main(opt)
return opt
if __name__ == "__main__":
opt = parse_opt()
main(opt)