VisDrone2019 转化为COCO格式

这篇博客介绍了如何使用Python将VisDrone2019数据集转换为COCO格式,以适应目标检测任务。作者提供了详细的代码,包括读取标注文件、处理边界框坐标、创建COCO格式的类别信息等步骤,并且分别处理训练集和验证集。转换后的数据集可用于训练和评估深度学习目标检测模型。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

VisDrone2019 转化为COCO格式

 

为了实验需要,需要将VisDrone2019数据集转化为COCO格式,将自己的转化代码公布,希望帮到相同研究方向的人。

你的VisDrone2019解压后的目录应该如下所示:

第一个参数输入上面目录的路径,第二个参数是要输出的路径

只添加了检测训练必要的数据,COCO格式多余的数据都设为空

import os
import cv2
from tqdm import tqdm
import json


def convert_to_cocodetection(dir, output_dir):
    train_dir = os.path.join(dir, "VisDrone2019-DET-train")
    val_dir = os.path.join(dir, "VisDrone2019-DET-val")
    train_annotations = os.path.join(train_dir, "annotations")
    val_annotations = os.path.join(val_dir, "annotations")
    train_images = os.path.join(train_dir, "images")
    val_images = os.path.join(val_dir, "images")
    id_num = 0

   categories = [
        {"id": 0, "name": "ignored regions"},
        {"id": 1, "name": "pedestrian"},
        {"id": 2, "name": "people"},
        {"id": 3, "name": "bicycle"},
        {"id": 4, "name": "car"},
        {"id": 5, "name": "van"},
        {"id": 6, "name": "truck"},
        {"id": 7, "name": "tricycle"},
        {"id": 8, "name": "awning-tricycle"},
        {"id": 9, "name": "bus"},
        {"id": 10, "name": "motor"},
        {"id": 11, "name": "others"}
    ]
    for mode in ["train", "val"]:
        images = []
        annotations = []
        print(f"start loading {mode} data...")
        if mode == "train":
            set = os.listdir(train_annotations)
            annotations_path = train_annotations
            images_path = train_images
        else:
            set = os.listdir(val_annotations)
            annotations_path = val_annotations
            images_path = val_images
        for i in tqdm(set):
            f = open(annotations_path + "/" + i, "r")
            name = i.replace(".txt", "")
            image = {}
            height, width = cv2.imread(images_path + "/" + name + ".jpg").shape[:2]
            file_name = name + ".jpg"
            image["file_name"] = file_name
            image["height"] = height
            image["width"] = width
            image["id"] = name
            images.append(image)
            for line in f.readlines():
                annotation = {}
                line = line.replace("\n", "")
                if line.endswith(","):  # filter data
                    line = line.rstrip(",")
                line_list = [int(i) for i in line.split(",")]
                bbox_xywh = [line_list[0], line_list[1], line_list[2], line_list[3]]
                annotation["image_id"] = name
                annotation["score"] = line_list[4]
                annotation["bbox"] = bbox_xywh
                annotation["category_id"] = int(line_list[5])
                annotation["id"] = id_num
                annotation["iscrowd"] = 0
                annotation["segmentation"] = []
                annotation["area"] = bbox_xywh[2] * bbox_xywh[3]
                id_num += 1
                annotations.append(annotation)
        dataset_dict = {}
        dataset_dict["images"] = images
        dataset_dict["annotations"] = annotations
        dataset_dict["categories"] = categories
        json_str = json.dumps(dataset_dict)
        with open(f'{output_dir}/VisDrone2019-DET_{mode}_coco.json', 'w') as json_file:
            json_file.write(json_str)
    print("json file write done...")


def get_test_namelist(dir, out_dir):
    full_path = out_dir + "/" + "test.txt"
    file = open(full_path, 'w')
    for name in tqdm(os.listdir(dir)):
        name = name.replace(".txt", "")
        file.write(name + "\n")
    file.close()
    return None


def centerxywh_to_xyxy(boxes):
    """
    args:
        boxes:list of center_x,center_y,width,height,
    return:
        boxes:list of x,y,x,y,cooresponding to top left and bottom right
    """
    x_top_left = boxes[0] - boxes[2] / 2
    y_top_left = boxes[1] - boxes[3] / 2
    x_bottom_right = boxes[0] + boxes[2] / 2
    y_bottom_right = boxes[1] + boxes[3] / 2
    return [x_top_left, y_top_left, x_bottom_right, y_bottom_right]


def centerxywh_to_topleftxywh(boxes):
    """
    args:
        boxes:list of center_x,center_y,width,height,
    return:
        boxes:list of x,y,x,y,cooresponding to top left and bottom right
    """
    x_top_left = boxes[0] - boxes[2] / 2
    y_top_left = boxes[1] - boxes[3] / 2
    width = boxes[2]
    height = boxes[3]
    return [x_top_left, y_top_left, width, height]


def clamp(coord, width, height):
    if coord[0] < 0:
        coord[0] = 0
    if coord[1] < 0:
        coord[1] = 0
    if coord[2] > width:
        coord[2] = width
    if coord[3] > height:
        coord[3] = height
    return coord


if __name__ == '__main__':
    convert_to_cocodetection(r"",r"")

### 将VisDrone2019数据集换为COCO格式 #### 数据集概述 VisDrone2019 是一个广泛应用于无人机视角下的目标检测和视频分析的数据集。将其换成 COCO 格式有助于利用现有的工具和技术来处理这些图像和视频。 #### 换流程详解 对于 VisDrone2019 的 DET 和 VID 部分,均需遵循特定的步骤来进行格式换: - **理解原始标签结构** 原始 VisDrone2019 数据集中每张图片对应多个标注框的信息被记录在一个文本文件里,每一行代表一个物体实例及其属性[^2]。 - **创建 JSON 文件模板** 创建符合 COCO API 规范的新 JSON 文件作为输出容器,此文件应包含 images, annotations 及 categories 字段定义[^3]。 - **编写映射逻辑** 编写 Python 或其他编程语言脚本来遍历原生 annotation files 并填充至上述构建好的 JSON 结构内;注意不同类别 ID 映射关系以及 bbox 参数调整[^4]。 ```python import json from collections import defaultdict def convert_visdrone_to_coco(visdrone_ann_file, coco_output_path): """ Converts a single VisDrone annotation file to the COCO format. Args: visdrone_ann_file (str): Path of input VisDrone annotation txt file. coco_output_path (str): Output path for saving converted COCO formatted data as .json file. Returns: None """ # Initialize dictionaries/lists required by COCO structure info = {"description": "Converted from VisDrone dataset"} licenses = [] images = [] annotations = [] category_map = { 'pedestrian': 1, 'people': 2, ... } image_id_counter = 0 ann_id_counter = 0 with open(visdrone_ann_file) as f: lines = f.readlines() img_info = {} # Placeholder for current processing image information for line in lines: parts = list(map(int, filter(None, line.strip().split(',')))) if not any(parts[:8]): continue width, height = parts[-2:] filename = str(image_id_counter).zfill(7)+'.jpg' new_image_entry = dict( id=image_id_counter, width=width, height=height, file_name=f'images/{filename}' ) images.append(new_image_entry) x_min, y_min, box_width, box_height, score, obj_category, truncation, occlusion = parts[:-2] area = float(box_width * box_height) segmentation = [[x_min, y_min, x_min + box_width, y_min, x_min + box_width, y_min + box_height, x_min, y_min + box_height]] anno_dict = dict( id=ann_id_counter, image_id=image_id_counter, category_id=obj_category, bbox=[float(x_min), float(y_min), float(box_width), float(box_height)], area=float(area), iscrowd=int(truncation>0 or occlusion>0), ignore=0, segmentations=segmentation ) annotations.append(anno_dict) ann_id_counter += 1 image_id_counter += 1 final_json_structure = {'info': info,'licenses': licenses,'categories':[{'id':v,'name':k}for k,v in category_map.items()],'images':images,'annotations':annotations} with open(coco_output_path,"w")as outfile: json.dump(final_json_structure,outfile) if __name__ == '__main__': convert_visdrone_to_coco('path/to/your/visdrone_annotation.txt', './output/coco_format.json') ``` 这段代码展示了如何读取单个 VisDrone 注解文件并将之储为 COCO 格式的 JSON 文件[^5]。
评论 13
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值