Python将Labelme目标检测数据集和JSON标注文件进行数据增广

FriendshipT

已于 2025-07-13 13:26:47 修改

阅读量263

点赞数 3

CC 4.0 BY-SA版权

分类专栏： YOLO系列 Python日常小操作人工智能应用文章标签： python 目标检测 json 人工智能 YOLO

于 2025-07-13 13:18:57 首次发布

本文链接：https://blog.csdn.net/FriendshipTang/article/details/149308614

Python日常小操作同时被 3 个专栏收录

72 篇文章

订阅专栏

YOLO系列

58 篇文章

订阅专栏

人工智能应用

25 篇文章

订阅专栏

在这里插入图片描述

前言

由于本人水平有限，难免出现错漏，敬请批评改正。
更多精彩内容，可点击进入Python日常小操作专栏、OpenCV-Python小应用专栏、YOLO系列专栏、自然语言处理专栏、人工智能混合编程实践专栏或我的个人主页查看
人工智能混合编程实践：C++调用Python ONNX进行YOLOv8推理
人工智能混合编程实践：C++调用封装好的DLL进行YOLOv8实例分割
人工智能混合编程实践：C++调用Python ONNX进行图像超分重建
人工智能混合编程实践：C++调用Python AgentOCR进行文本识别
通过计算实例简单地理解PatchCore异常检测
Python将YOLO格式实例分割数据集转换为COCO格式实例分割数据集
YOLOv8 Ultralytics：使用Ultralytics框架训练RT-DETR实时目标检测模型
基于DETR的人脸伪装检测
YOLOv7训练自己的数据集（口罩检测）
YOLOv8训练自己的数据集（足球检测）
YOLOv5：TensorRT加速YOLOv5模型推理
YOLOv5：IoU、GIoU、DIoU、CIoU、EIoU
玩转Jetson Nano（五）：TensorRT加速YOLOv5目标检测
YOLOv5：添加SE、CBAM、CoordAtt、ECA注意力机制
YOLOv5：yolov5s.yaml配置文件解读、增加小目标检测层
Python将COCO格式实例分割数据集转换为YOLO格式实例分割数据集
YOLOv5：使用7.0版本训练自己的实例分割模型（车辆、行人、路标、车道线等实例分割）
使用Kaggle GPU资源免费体验Stable Diffusion开源项目

前提条件

熟悉Python

实验环境

Python 3.x （面向对象的高级语言）
albumentations==1.3.1
opencv-python==4.12.0.88

Labelme目标检测数据集和JSON标注文件进行数据增广

背景：将标注好的数据集，随机仿射变换，以达到数据增强的目的。
目录结构示例

在这里插入图片描述

png：图片数据集。
json：Labelme标注文件。

在这里插入图片描述

{
  "version": "5.5.0",
  "flags": {},
  "shapes": [
    {
      "score": 0.99,
      "label": "2",
      "points": [
        [
          310.379898,
          333.107918
        ],
        [
          429.721062,
          434.07660999999996
        ]
      ],
      "group_id": null,
      "description": null,
      "shape_type": "rectangle",
      "flags": {},
      "mask": null
    },
    {
      "score": 0.99,
      "label": "2",
      "points": [
        [
          68.5886295,
          312.772174
        ],
        [
          229.72838250000004,
          436.130706
        ]
      ],
      "group_id": null,
      "description": null,
      "shape_type": "rectangle",
      "flags": {},
      "mask": null
    },
    {
      "score": 0.99,
      "label": "2",
      "points": [
        [
          208.7640663,
          331.285174
        ],
        [
          250.1050137,
          380.598482
        ]
      ],
      "group_id": null,
      "description": null,
      "shape_type": "rectangle",
      "flags": {},
      "mask": null
    },
    {
      "score": 0.99,
      "label": "7",
      "points": [
        [
          237.97699020000002,
          318.716904
        ],
        [
          286.3156158,
          373.767064
        ]
      ],
      "group_id": null,
      "description": null,
      "shape_type": "rectangle",
      "flags": {},
      "mask": null
    },
    {
      "score": 0.99,
      "label": "2",
      "points": [
        [
          303.30663135000003,
          336.1300624
        ],
        [
          327.16299465,
          362.2645136
        ]
      ],
      "group_id": null,
      "description": null,
      "shape_type": "rectangle",
      "flags": {},
      "mask": null
    },
    {
      "score": 0.99,
      "label": "2",
      "points": [
        [
          284.1491136,
          331.6867246
        ],
        [
          306.15991440000005,
          349.0834754
        ]
      ],
      "group_id": null,
      "description": null,
      "shape_type": "rectangle",
      "flags": {},
      "mask": null
    }
  ],
  "imagePath": "1.png",
  "imageData": null,
  "imageHeight": 484,
  "imageWidth": 579
}

代码实现

import os
import json
import cv2
import numpy as np
import albumentations as A
from pathlib import Path
import random
from tqdm import tqdm

# 支持的图片格式
supported_formats = {'.jpg', '.jpeg', '.png', '.bmp'}

def load_labelme_json(json_path):
    """加载Labelme JSON文件和对应的图像，提取矩形框标注"""
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"⚠️ JSON解析错误: {json_path} - {str(e)}")
        return None, None, None, None
    
    # 获取图像路径
    image_path = os.path.join(os.path.dirname(json_path), data["imagePath"])
    
    # 检查图像格式
    if not any(image_path.lower().endswith(fmt) for fmt in supported_formats):
        print(f"⚠️ 跳过不支持的图片格式: {image_path}")
        return None, None, None, None
    
    # 加载图像
    image = cv2.imread(image_path)
    if image is None:
        print(f"⚠️ 图片加载失败: {image_path}")
        return None, None, None, None
    
    # 提取矩形框标注
    bboxes = []
    labels = []
    for shape in data["shapes"]:
        # 只处理矩形框
        if shape.get("shape_type", "") != "rectangle":
            continue
            
        # 获取矩形框坐标
        points = np.array(shape["points"], dtype=np.float32)
        x_min, y_min = points[0]
        x_max, y_max = points[1]
        
        # 确保坐标正确性
        x_min, x_max = min(x_min, x_max), max(x_min, x_max)
        y_min, y_max = min(y_min, y_max), max(y_min, y_max)
        
        # 转换为Python原生float类型
        x_min, y_min, x_max, y_max = float(x_min), float(y_min), float(x_max), float(y_max)
        
        # 添加到列表
        bboxes.append([x_min, y_min, x_max, y_max])
        labels.append(shape["label"])
    
    return image, bboxes, labels, data

def get_augmentation_pipeline(transform_type, img_shape):
    """获取针对检测任务的增强管道"""
    height, width = img_shape[:2]
    
    if transform_type == "horizontal_flip":
        return A.Compose([
            A.HorizontalFlip(p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "vertical_flip":
        return A.Compose([
            A.VerticalFlip(p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "rotate_90":
        return A.Compose([
            A.RandomRotate90(p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "rotate":
        # 随机旋转角度 -15到15度
        return A.Compose([
            A.Rotate(limit=15, border_mode=cv2.BORDER_CONSTANT, value=0, p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "scale":
        # 随机缩放 80%-120%
        return A.Compose([
            A.Affine(scale=(0.8, 1.2), mode=cv2.BORDER_CONSTANT, cval=0, p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "shift":
        # 随机平移 ±10%
        return A.Compose([
            A.Affine(translate_percent=(-0.1, 0.1), mode=cv2.BORDER_CONSTANT, cval=0, p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "crop":
        # 随机裁剪 80%-95% 区域
        crop_size = random.uniform(0.8, 0.95)
        return A.Compose([
            A.RandomResizedCrop(
                height=height,
                width=width,
                scale=(crop_size, crop_size),
                ratio=(1.0, 1.0),
                p=1
            )
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "brightness":
        # 随机亮度对比度调整
        return A.Compose([
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "noise":
        # 添加高斯噪声
        return A.Compose([
            A.GaussNoise(var_limit=(10.0, 50.0), p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "motion_blur":
        # 运动模糊
        return A.Compose([
            A.MotionBlur(blur_limit=7, p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    elif transform_type == "color_jitter":
        # 颜色抖动
        return A.Compose([
            A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=1)
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
    
    else:
        return A.Compose([], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

def apply_augmentation(transform_type, image, bboxes, labels):
    """应用增强变换到图像和边界框"""
    if not bboxes or not labels:
        return image.copy(), [], []
    
    # 确保bboxes是numpy数组
    bboxes = np.array(bboxes, dtype=np.float32)
    
    # 应用变换
    try:
        transform = get_augmentation_pipeline(transform_type, image.shape)
        transformed = transform(image=image, bboxes=bboxes, class_labels=labels)
    except Exception as e:
        print(f"⚠️ 应用增强时出错 ({transform_type}): {str(e)}")
        return image.copy(), [], []
    
    transformed_image = transformed['image']
    transformed_bboxes = transformed['bboxes']
    transformed_labels = transformed['class_labels']
    
    # 过滤掉无效的边界框
    valid_bboxes = []
    valid_labels = []
    for bbox, label in zip(transformed_bboxes, transformed_labels):
        # 确保坐标是Python原生浮点数
        x_min, y_min, x_max, y_max = bbox
        x_min, y_min, x_max, y_max = float(x_min), float(y_min), float(x_max), float(y_max)
        
        # 检查边界框是否有效
        if x_max - x_min > 2 and y_max - y_min > 2 and 0 <= x_min < x_max and 0 <= y_min < y_max:
            valid_bboxes.append([x_min, y_min, x_max, y_max])
            valid_labels.append(label)
    
    return transformed_image, valid_bboxes, valid_labels

def save_augmented_data(image, bboxes, labels, original_data, output_dir, base_name, index, original_format):
    """保存增强后的图像和标注文件"""
    # 使用原始文件名前缀 + 序号 + 原始后缀
    filename = f"{base_name}_{index}"
    img_path = os.path.join(output_dir, filename + original_format)
    json_path = os.path.join(output_dir, filename + ".json")
    
    # 保存图像
    cv2.imwrite(img_path, image)
    
    # 创建新的JSON数据结构
    new_shapes = []
    for bbox, label in zip(bboxes, labels):
        x_min, y_min, x_max, y_max = bbox
        # 确保所有坐标都是Python原生float类型
        new_shapes.append({
            "label": label,
            "points": [
                [float(x_min), float(y_min)], 
                [float(x_max), float(y_max)]
            ],
            "shape_type": "rectangle",
            "flags": {}
        })
    
    # 确保图像高度宽度是int类型
    height, width = image.shape[:2]
    
    new_data = {
        "version": original_data.get("version", "5.0.1"),
        "flags": original_data.get("flags", {}),
        "shapes": new_shapes,
        "imagePath": filename + original_format,
        "imageData": None,
        "imageHeight": int(height),
        "imageWidth": int(width)
    }
    
    # 保存JSON文件 - 确保所有浮点数都是Python原生类型
    try:
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump(new_data, f, indent=2, ensure_ascii=False, default=json_float_default)
    except Exception as e:
        print(f"⚠️ 保存JSON失败: {json_path} - {str(e)}")
        # 删除可能已创建的部分文件
        if os.path.exists(img_path):
            os.remove(img_path)
        if os.path.exists(json_path):
            os.remove(json_path)

def json_float_default(obj):
    """自定义JSON序列化函数，处理NumPy类型"""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return str(obj)

def detect_augmentation(input_dir, output_dir, iterations_per_image=5):
    """执行检测数据集的数据增强"""
    # 增强类型列表
    transform_types = [
        "horizontal_flip",
        "vertical_flip",
        "rotate_90",
        "rotate",
        "scale",
        "shift",
        "crop",
        "brightness",
        "noise",
        "motion_blur",
        "color_jitter"
    ]
    
    # 确保输出目录存在
    os.makedirs(output_dir, exist_ok=True)
    
    # 获取所有JSON文件
    json_files = [f for f in os.listdir(input_dir) if f.endswith(".json")]
    
    # 处理每个JSON文件
    for json_file in tqdm(json_files, desc="Processing images"):
        json_path = os.path.join(input_dir, json_file)
        
        try:
            # 加载数据和标注
            image, bboxes, labels, data = load_labelme_json(json_path)
            if image is None:
                print(f"⚠️ 图片加载失败: {json_file}")
                continue
            if not bboxes:
                print(f"⚠️ 无有效标注: {json_file}")
            
            # 获取原始图片信息
            image_path = data["imagePath"]
            original_format = Path(image_path).suffix
            if original_format.lower() not in supported_formats:
                print(f"⚠️ 不支持的图片格式: {original_format}")
                continue
            
            # 获取原始文件名前缀
            base_name = Path(image_path).stem
            
            # 保存原始图像作为第一个样本 (序号0)
            save_augmented_data(
                image.copy(), 
                bboxes, 
                labels, 
                data, 
                output_dir, 
                base_name, 
                0, 
                original_format
            )
            
            # 应用增强
            for i in range(1, iterations_per_image + 1):
                # 随机选择增强类型
                trans_type = random.choice(transform_types)
                
                try:
                    # 应用增强
                    augmented_image, augmented_bboxes, augmented_labels = apply_augmentation(
                        trans_type, 
                        image.copy(), 
                        bboxes, 
                        labels
                    )
                    
                    # 检查是否有有效的边界框
                    if not augmented_bboxes:
                        print(f"⚠️ 增强后无有效边界框: {base_name}_{i}{original_format} ({trans_type})")
                        continue
                    
                    # 保存增强结果
                    save_augmented_data(
                        augmented_image, 
                        augmented_bboxes, 
                        augmented_labels, 
                        data, 
                        output_dir, 
                        base_name, 
                        i, 
                        original_format
                    )
                except Exception as e:
                    print(f"⚠️ 增强失败 ({trans_type}): {str(e)}")
        
        except Exception as e:
            print(f"⚠️ 处理文件 {json_file} 时出错: {str(e)}")

    print(f"✅ 数据增强完成! 增强后的数据保存在: {output_dir}")


if __name__ == "__main__":
    # 设置输入输出目录
    input_dir = r"detect"  # 原始数据集路径
    output_dir = r"detect"  # 增强后输出路径
    
    # 控制每张图像增强次数 (不包括原始图像)
    iterations_per_image = 5
    
    # 执行增强
    detect_augmentation(input_dir, output_dir, iterations_per_image)

输出结果

在这里插入图片描述

png：图片数据集。
json：Labelme标注文件。

在这里插入图片描述

由于本人水平有限，难免出现错漏，敬请批评改正。
更多精彩内容，可点击进入Python日常小操作专栏、OpenCV-Python小应用专栏、YOLO系列专栏、自然语言处理专栏、人工智能混合编程实践专栏或我的个人主页查看
人工智能混合编程实践：C++调用Python ONNX进行YOLOv8推理
人工智能混合编程实践：C++调用封装好的DLL进行YOLOv8实例分割
人工智能混合编程实践：C++调用Python ONNX进行图像超分重建
人工智能混合编程实践：C++调用Python AgentOCR进行文本识别
通过计算实例简单地理解PatchCore异常检测
Python将YOLO格式实例分割数据集转换为COCO格式实例分割数据集
YOLOv8 Ultralytics：使用Ultralytics框架训练RT-DETR实时目标检测模型
基于DETR的人脸伪装检测
YOLOv7训练自己的数据集（口罩检测）
YOLOv8训练自己的数据集（足球检测）
YOLOv5：TensorRT加速YOLOv5模型推理
YOLOv5：IoU、GIoU、DIoU、CIoU、EIoU
玩转Jetson Nano（五）：TensorRT加速YOLOv5目标检测
YOLOv5：添加SE、CBAM、CoordAtt、ECA注意力机制
YOLOv5：yolov5s.yaml配置文件解读、增加小目标检测层
Python将COCO格式实例分割数据集转换为YOLO格式实例分割数据集
YOLOv5：使用7.0版本训练自己的实例分割模型（车辆、行人、路标、车道线等实例分割）
使用Kaggle GPU资源免费体验Stable Diffusion开源项目