为了方便对已经勾绘的类别编辑,写了以下代码:
import os
import json
import shutil
from collections import defaultdict
# ====================== 配置区域 ====================== #
input_dir = r"D:\plant_seg_datasets\20250524\image"
output_dir = input_dir + "_edited"
# 标签合并配置 (旧标签: 新标签)
merge_dict = {
# "old_label1": "new_label",
# "old_label2": "new_label",
}
# 要删除的标签集合
labels_to_delete = set() # 例如:{"trash", "ignore"}
# 要保留的标签集合 (设为None表示保留所有)
labels_to_keep = {"hei_shui_yin_lian_hua"} # 只保留这个标签
# ======================================================== #
def process_labelme_files(input_dir, output_dir):
"""处理LabelMe JSON文件,执行标签操作"""
os.makedirs(output_dir, exist_ok=True)
# 统计计数器
stats = defaultdict(lambda: defaultdict(int))
processed_files = 0
total_json_files = 0
skipped_files = 0
for filename in os.listdir(input_dir):
if not filename.endswith(".json"):
# 复制非JSON文件(如图片)
src = os.path.join(input_dir, filename)
dst = os.path.join(output_dir, filename)
if not os.path.exists(dst):
shutil.copy2(src, dst)
continue
total_json_files += 1
json_path = os.path.join(input_dir, filename)
with open(json_path, "r", encoding="utf-8") as f:
try:
data = json.load(f)
except json.JSONDecodeError:
print(f"⚠️ 错误: {filename} 不是有效的JSON文件,已跳过")
continue
new_shapes = []
image_modified = False
found_labels = set()
print(f"\n📄 处理文件: {filename}")
for shape in data.get("shapes", []):
original_label = shape["label"]
current_label = original_label
found_labels.add(original_label)
# 1. 删除操作
if original_label in labels_to_delete:
print(f" - 删除标签: {original_label}")
stats[filename]["deleted"] += 1
image_modified = True
continue
# 2. 合并操作
if original_label in merge_dict:
new_label = merge_dict[original_label]
print(f" - 合并标签: {original_label} → {new_label}")
current_label = new_label
stats[filename]["merged"] += 1
image_modified = True
# 3. 保留操作
if labels_to_keep is not None:
if current_label not in labels_to_keep:
print(f" - 过滤标签: {current_label} (不在保留列表中)")
stats[filename]["filtered"] += 1
image_modified = True
continue
else:
print(f" - 保留标签: {current_label}")
# 更新标签并保留形状
if current_label != original_label:
shape["label"] = current_label
new_shapes.append(shape)
# 打印文件中的标签统计
print(f" 🔍 文件中存在的标签: {', '.join(found_labels)}")
# 仅当有修改时才更新JSON
if image_modified:
data["shapes"] = new_shapes
new_json_path = os.path.join(output_dir, filename)
with open(new_json_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
processed_files += 1
print(f" ✅ 文件已修改")
else:
# 无修改则直接复制原文件
src = os.path.join(input_dir, filename)
dst = os.path.join(output_dir, filename)
if not os.path.exists(dst):
shutil.copy2(src, dst)
skipped_files += 1
print(f" ⏩ 文件未修改(已复制)")
return processed_files, skipped_files, total_json_files, stats
def print_summary(processed_files, skipped_files, total_json_files, stats):
"""打印处理结果摘要"""
print("\n" + "=" * 50)
print(f"✅ 处理完成! 共处理 {total_json_files} 个JSON文件")
print(f" - 修改文件: {processed_files}")
print(f" - 未修改文件: {skipped_files}")
print("=" * 50)
total_counts = defaultdict(int)
for filename, counts in stats.items():
print(f"\n📄 文件: {filename}")
for op, count in counts.items():
op_name = {
"deleted": "删除",
"merged": "合并",
"filtered": "过滤"
}.get(op, op)
print(f" - {op_name}: {count}个标签")
total_counts[op] += count
if total_counts:
print("\n📊 总计操作:")
for op, count in total_counts.items():
op_name = {
"deleted": "删除",
"merged": "合并",
"filtered": "过滤"
}.get(op, op)
print(f" - {op_name}: {count}个标签")
else:
print("\nℹ️ 没有标签被修改")
print(f"\n📁 输出目录: {output_dir}")
if __name__ == "__main__":
print("=" * 50)
print(f"🛠️ 开始处理LabelMe数据集")
print(f"📂 输入目录: {input_dir}")
print("=" * 50 + "\n")
print("🔧 配置信息:")
print(f" - 合并规则: {merge_dict or '无'}")
print(f" - 删除标签: {labels_to_delete or '无'}")
print(f" - 保留标签: {labels_to_keep or '所有标签'}")
print()
processed_files, skipped_files, total_json_files, stats = process_labelme_files(input_dir, output_dir)
print_summary(processed_files, skipped_files, total_json_files, stats)