yolov8实例分割——制作并训练自己的数据集（保姆级教程）

深海不是海

已于 2025-03-17 15:41:31 修改

阅读量2k

点赞数 7

CC 4.0 BY-SA版权

文章标签： YOLO

于 2024-12-23 15:20:19 首次发布

本文链接：https://blog.csdn.net/weixin_52250546/article/details/144667503

事先需要了解的：

1.yolov8s-seg,pt官方pt文件自动标注出来的标签，是txt格式的，但是这种格式，你用labelme打开无法查看标签在图片上的对应位置，只能转化为json格式才行。

2.训练分割的数据格式也必须是txt。

综上所述，大致步骤如下：自动标注、txt转化为json，labelme查看修改标签、json转化为txt。

步骤一：官方文件自动标注命令：

yolo segment predict model=C:\Users\23028005\Desktop\yolo11s-seg.pt source='E:\photo' save_txt=True

步骤二：txt转化为json

import os
import glob
import cv2
import json
import numpy as np


def convert_txt_to_labelme_json(txt_path, image_path, output_dir, class_name, image_fmt='.jpg'):
    """
    将文本文件转换为LabelMe格式的JSON文件。
    :param txt_path: 文本文件所在的路径
    :param image_path: 图像文件所在的路径
    :param output_dir: 输出JSON文件的目录
    :param class_name: 类别名称列表，索引对应类别ID
    :param image_fmt: 图像文件格式，默认为'.jpg'
    :return:
    """
    # 获取所有文本文件路径
    txts = glob.glob(os.path.join(txt_path, "*.txt"))
    for txt in txts:
        # 初始化LabelMe JSON结构
        labelme_json = {
            'version': '5.5.0',
            'flags': {},
            'shapes': [],
            'imagePath': None,
            'imageData': None,
            'imageHeight': None,
            'imageWidth': None,
        }
        # 获取文本文件名
        txt_name = os.path.basename(txt)
        # 根据文本文件名生成对应的图像文件名
        image_name = os.path.splitext(txt_name)[0] + image_fmt
        labelme_json['imagePath'] = os.path.basename(image_name)  # 在 JSON 中记录图像文件名
        # 构造完整图像路径
        image_name = os.path.join(image_path, image_name)
        # 检查图像文件是否存在
        if not os.path.exists(image_name):
            raise Exception('txt 文件={}, 找不到对应的图像={}'.format(txt, image_name))
        # 读取图像
        image = cv2.imdecode(np.fromfile(image_name, dtype=np.uint8), cv2.IMREAD_COLOR)
        # 获取图像高度和宽度
        h, w = image.shape[:2]
        labelme_json['imageHeight'] = h
        labelme_json['imageWidth'] = w
        # 读取文本文件内容
        with open(txt, 'r') as t:
            lines = t.readlines()
            for line in lines:
                # 检查是否为空行
                if not line.strip():
                    continue  # 跳过空行
                content = line.strip().split(' ')
                # 检查行内容是否符合格式要求（至少需要一个类别 ID 和坐标信息）
                if len(content) < 3:  # 至少一个类别 ID 和一组坐标
                    print(f"警告：跳过格式错误的行 -> {line.strip()} in {txt}")
                    continue
                try:
                    label = class_name[int(content[0])]  # 根据类别 ID 获取标签名称
                except (ValueError, IndexError):
                    print(f"警告：跳过无法解析的类别 ID -> {content[0]} in {txt}")
                    continue
                # 解析点坐标
                point_list = []
                for index in range(1, len(content)):
                    if index % 2 == 1:  # 下标为奇数，对应横坐标
                        x = float(content[index]) * w
                        point_list.append(x)
                    else:  # 下标为偶数，对应纵坐标
                        y = float(content[index]) * h
                        point_list.append(y)
                # 将点列表转换为二维列表，每两个值表示一个点
                point_list = [point_list[i:i + 2] for i in range(0, len(point_list), 2)]
                # 构造 shape 字典
                shape = {
                    'label': label,
                    'points': point_list,
                    'group_id': None,
                    'description': None,
                    'shape_type': 'polygon',
                    'flags': {},
                    'mask': None
                }
                labelme_json['shapes'].append(shape)
        # 生成 JSON 文件名
        json_name = os.path.splitext(txt_name)[0] + '.json'
        json_name_path = os.path.join(output_dir, json_name)
        # 写入 JSON 文件
        with open(json_name_path, 'w') as fd:
            json.dump(labelme_json, fd, indent=2)
        print("save json={}".format(json_name_path))


if __name__ == '__main__':
    txt_path = r'C:\Users\23028005\runs\segment\predict7'
    image_path = r'C:\Users\23028005\runs\segment\predict7'
    output_dir = r'C:\Users\23028005\runs\segment\predict7'
    # 标签列表
    class_name = ['person']  # 标签类别名，这里记得更换
    convert_txt_to_labelme_json(txt_path, image_path, output_dir, class_name)

步骤三：自行修改标签，比如有漏标的情况，这时候标注完，原json文件没修改前，里面的字段"imageData"为null，如下所示：

更改标注后，"imageData"可能变为类似于base64之类的格式编码，这个不用管，继续走步骤四

步骤四：json转化为txt，代码如下：

import json
import os
import chardet


def detect_encoding(file_path):
    """自动检测文件编码"""
    with open(file_path, 'rb') as f:
        raw_data = f.read(100)  # 读取文件的前 100 个字节进行检测
        result = chardet.detect(raw_data)
        return result['encoding']


def labelme2yolo_seg(class_name, json_dir, labels_dir):
    """
    将 labelme 软件标注的 json 格式转换为 yolo_seg 格式
    :param json_dir: labelme 标注好的 *.json 文件所在文件夹
    :param labels_dir: 转换后的 *.txt 保存文件夹
    :param class_name: 数据集中的类别标签
    """
    list_labels = []  # 存放 json 文件列表

    # 创建保存转换结果的文件夹
    if not os.path.exists(labels_dir):
        os.mkdir(labels_dir)

    # 遍历所有 json 文件
    for files in os.listdir(json_dir):
        if files.endswith('.json'):  # 只处理 .json 文件
            file_path = os.path.join(json_dir, files)
            list_labels.append(file_path)

    for labels in list_labels:
        # 自动检测文件编码
        encoding = detect_encoding(labels)
        print(f"文件 {labels} 的编码为: {encoding}")
        try:
            # 使用检测到的编码读取 JSON 文件
            with open(labels, "r", encoding=encoding) as f:
                file_in = json.load(f)
                shapes = file_in["shapes"]

            txt_filename = os.path.basename(labels).replace(".json", ".txt")
            txt_path = os.path.join(labels_dir, txt_filename)

            with open(txt_path, "w+", encoding="utf-8") as file_handle:
                for shape in shapes:
                    line_content = []  # 初始化每个形状的坐标信息
                    line_content.append(str(class_name.index(shape['label'])))  # 添加类别索引
                    # 添加坐标信息
                    for point in shape["points"]:
                        x = point[0] / file_in["imageWidth"]
                        y = point[1] / file_in["imageHeight"]
                        line_content.append(str(x))
                        line_content.append(str(y))
                    file_handle.write(" ".join(line_content) + "\n")
        except Exception as e:
            print(f"处理文件 {labels} 时出错: {e}")


# 示例
class_name = ['smoke']  # 这里的类别名记得更换！！不然的话，你的标签可能转换出来的是从种类1开始的，yolov8训练都是从种类0开始的
json_dir = r'E:\hongwai\photos'
labels_dir = r'E:\hongwai\photos'
labelme2yolo_seg(class_name, json_dir, labels_dir)

这时候再把数据集分类一下，分为训练集，验证集，然后训练即可

训练过程如下：

1.建一个关于数据的yaml文件（复制粘贴修改自己的内容）

内容如下：



train: /home/datasets/test1/train # 118287 images   #换自己的训练路径
val: /home/datasets/test1/val  # 5000 images     #换自己的验证路径


# number of classes
nc: 1     #改自己的种类数

# class names
# names: ['person', 'ebike', 'face','mask','gas','bike', 'babycar', 'opendoor', 'closedoor', 'halfdoor', 'cat', 'dog', 'occlusion', 'smoking', 'fall']
#names:
  #0:smoke

names: ['smoke']      #改为自己的训练的名称

2.建一个训练模型的yaml文件，比如你要用n s m l x中的一个，我用的是l大小的模型，内容如下：



# Parameters
nc: 1 # number of classes         #改为自己的种类数量
scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
  s: [0.33, 0.50, 1024]
  m: [0.67, 0.75, 768]
  l: [1.00, 1.00, 512]
  x: [1.00, 1.25, 512]

# YOLOv8.0n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9

# YOLOv8.0n head
head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2f, [512]] # 12

  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 15 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 12], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 9], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)

  - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)

然后去v8官网下一个自己需要的pt文件放在这一级目录下，我下载的是yolov8l-seg.pt，等你运行下面的训练代码的时候，会自动识别上面的yaml文件中的对应模型大小，进行训练，比如我的是v8l.pt，那么会自动加载yaml中的 l: [1.00, 1.00, 512] 这一行，也就是选择这种深度的模型进行训练，然后开始训练步骤。打开终端，然后进入你搭建好的v8深度学习环境

在你刚刚的pt文件的那一级目录下运行命令行
yolo segment train data=***.yaml(这里改为你刚刚放数据的那一个yaml文件) model=yolov8l-seg.yaml(这个就是刚刚你新建的那个确定模型大小的yaml文件) pretrained=yolov8l-seg.pt epochs=80 imgsz=640 batch=8

等待训练完成即可！

在训练之前，有个小坑需要注意一下：

大家平时训练yolov5可能用习惯了，在修改yolov8数据集的yaml文件可能会忽略一个问题，导致训练跑不通，一直报错：

所以我又去查看了一下我的源文件：

我这里的names用的格式是：

names:
0:smoke
哪怕你nc只是1，names也只设置的了一个种类，最后那还是会报错：
RuntimeError: Dataset 'huangdaoshihua.yaml' error ❌ huangdaoshihua.yaml 'names' length 7 and 'nc: 1' must match.

所以还是需要你用v8的官方源格式：

用这样的方式去写！！