labelme类别编辑小工具（类别合并、删除和保留）-CSDN博客

为了方便对已经勾绘的类别编辑，写了以下代码：

import os
import json
import shutil
from collections import defaultdict

# ====================== 配置区域 ====================== #
input_dir = r"D:\plant_seg_datasets\20250524\image"
output_dir = input_dir + "_edited"

# 标签合并配置 (旧标签: 新标签)
merge_dict = {
    # "old_label1": "new_label",
    # "old_label2": "new_label",
}

# 要删除的标签集合
labels_to_delete = set()  # 例如：{"trash", "ignore"}

# 要保留的标签集合 (设为None表示保留所有)
labels_to_keep = {"hei_shui_yin_lian_hua"}  # 只保留这个标签


# ======================================================== #

def process_labelme_files(input_dir, output_dir):
    """处理LabelMe JSON文件，执行标签操作"""
    os.makedirs(output_dir, exist_ok=True)

    # 统计计数器
    stats = defaultdict(lambda: defaultdict(int))
    processed_files = 0
    total_json_files = 0
    skipped_files = 0

    for filename in os.listdir(input_dir):
        if not filename.endswith(".json"):
            # 复制非JSON文件（如图片）
            src = os.path.join(input_dir, filename)
            dst = os.path.join(output_dir, filename)
            if not os.path.exists(dst):
                shutil.copy2(src, dst)
            continue

        total_json_files += 1
        json_path = os.path.join(input_dir, filename)

        with open(json_path, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
            except json.JSONDecodeError:
                print(f"⚠️ 错误: {filename} 不是有效的JSON文件，已跳过")
                continue

        new_shapes = []
        image_modified = False
        found_labels = set()

        print(f"\n📄 处理文件: {filename}")

        for shape in data.get("shapes", []):
            original_label = shape["label"]
            current_label = original_label
            found_labels.add(original_label)

            # 1. 删除操作
            if original_label in labels_to_delete:
                print(f"  - 删除标签: {original_label}")
                stats[filename]["deleted"] += 1
                image_modified = True
                continue

            # 2. 合并操作
            if original_label in merge_dict:
                new_label = merge_dict[original_label]
                print(f"  - 合并标签: {original_label} → {new_label}")
                current_label = new_label
                stats[filename]["merged"] += 1
                image_modified = True

            # 3. 保留操作
            if labels_to_keep is not None:
                if current_label not in labels_to_keep:
                    print(f"  - 过滤标签: {current_label} (不在保留列表中)")
                    stats[filename]["filtered"] += 1
                    image_modified = True
                    continue
                else:
                    print(f"  - 保留标签: {current_label}")

            # 更新标签并保留形状
            if current_label != original_label:
                shape["label"] = current_label
            new_shapes.append(shape)

        # 打印文件中的标签统计
        print(f"  🔍 文件中存在的标签: {', '.join(found_labels)}")

        # 仅当有修改时才更新JSON
        if image_modified:
            data["shapes"] = new_shapes
            new_json_path = os.path.join(output_dir, filename)
            with open(new_json_path, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False, indent=2)
            processed_files += 1
            print(f"  ✅ 文件已修改")
        else:
            # 无修改则直接复制原文件
            src = os.path.join(input_dir, filename)
            dst = os.path.join(output_dir, filename)
            if not os.path.exists(dst):
                shutil.copy2(src, dst)
            skipped_files += 1
            print(f"  ⏩ 文件未修改（已复制）")

    return processed_files, skipped_files, total_json_files, stats


def print_summary(processed_files, skipped_files, total_json_files, stats):
    """打印处理结果摘要"""
    print("\n" + "=" * 50)
    print(f"✅ 处理完成! 共处理 {total_json_files} 个JSON文件")
    print(f"  - 修改文件: {processed_files}")
    print(f"  - 未修改文件: {skipped_files}")
    print("=" * 50)

    total_counts = defaultdict(int)

    for filename, counts in stats.items():
        print(f"\n📄 文件: {filename}")
        for op, count in counts.items():
            op_name = {
                "deleted": "删除",
                "merged": "合并",
                "filtered": "过滤"
            }.get(op, op)
            print(f"  - {op_name}: {count}个标签")
            total_counts[op] += count

    if total_counts:
        print("\n📊 总计操作:")
        for op, count in total_counts.items():
            op_name = {
                "deleted": "删除",
                "merged": "合并",
                "filtered": "过滤"
            }.get(op, op)
            print(f"  - {op_name}: {count}个标签")
    else:
        print("\nℹ️ 没有标签被修改")

    print(f"\n📁 输出目录: {output_dir}")


if __name__ == "__main__":
    print("=" * 50)
    print(f"🛠️ 开始处理LabelMe数据集")
    print(f"📂 输入目录: {input_dir}")
    print("=" * 50 + "\n")

    print("🔧 配置信息:")
    print(f"  - 合并规则: {merge_dict or '无'}")
    print(f"  - 删除标签: {labels_to_delete or '无'}")
    print(f"  - 保留标签: {labels_to_keep or '所有标签'}")
    print()

    processed_files, skipped_files, total_json_files, stats = process_labelme_files(input_dir, output_dir)
    print_summary(processed_files, skipped_files, total_json_files, stats)