Grasp-Anything 2D平面抓取自己的数据集复现

蜀中廖化
已于 2025-07-04 19:20:44 修改
阅读量513
点赞数 5
CC 4.0 BY-SA版权
文章标签：平面机器人
于 2025-06-03 10:08:38 首次发布
本文链接：https://blog.csdn.net/qq_40725313/article/details/148392501
数据收集标注

roLabelImg
仿照cornell数据集（不需要标注负向标签，只需要标注正向标签即可）
在这里插入图片描述
标注矩形的抓取框

数据增强代码如下：
# -*- coding: utf-8 -*-
import torch
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import cv2
from PIL import Image, ImageEnhance, ImageFilter
random.seed(0)


class DataAugmentationOnDetection:
    def __init__(self):
        super(DataAugmentationOnDetection, self).__init__()

    # 以下的几个参数类型中，image的类型全部如下类型
    # 参数类型： image：Image.open(path)
    def resize_keep_ratio(self, image, boxes, target_size):
        """
            参数类型： image：Image.open(path)， boxes:Tensor， target_size:int
            功能：将图像缩放到size尺寸，调整相应的boxes,同时保持长宽比（最长的边是target size
        """
        old_size = image.size[0:2]  # 原始图像大小
        # 取最小的缩放比例
        ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size)))  # 计算原始图像宽高与目标图像大小的比例，并取其中的较小值
        new_size = tuple([int(i * ratio) for i in old_size])  # 根据上边求得的比例计算在保持比例前提下得到的图像大小
        # boxes 不用变化，因为是等比例变化
        return image.resize(new_size, Image.BILINEAR), boxes

    def resizeDown_keep_ratio(self, image, boxes, target_size):
        """ 与上面的函数功能类似，但它只降低图片的尺寸，不会扩大图片尺寸"""
        old_size = image.size[0:2]  # 原始图像大小
        # 取最小的缩放比例
        ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size)))  # 计算原始图像宽高与目标图像大小的比例，并取其中的较小值
        ratio = min(ratio, 1)
        new_size = tuple([int(i * ratio) for i in old_size])  # 根据上边求得的比例计算在保持比例前提下得到的图像大小

        # boxes 不用变化，因为是等比例变化
        return image.resize(new_size, Image.BILINEAR), boxes

    def resize(self, img, boxes, size):
        # ---------------------------------------------------------
        # 类型为 img=Image.open(path)，boxes:Tensor，size:int
        # 功能为：将图像长和宽缩放到指定值size，并且相应调整boxes
        # ---------------------------------------------------------
        return img.resize((size, size), Image.BILINEAR), boxes

    def random_flip_horizon(self, img, boxes, h_rate=1):
        # -------------------------------------
        # 随机水平翻转
        # -------------------------------------
        if np.random.random() < h_rate:
            transform = transforms.RandomHorizontalFlip(p=1)
            img = transform(img)
            if len(boxes) > 0:
                # 水平翻转时，x坐标需要调整
                width, height = img.size
                for i in range(len(boxes)):
                    box = boxes[i].reshape(-1, 2)
                    for j in range(len(box)):
                        box[j, 0] = width - box[j, 0]
                    boxes[i] = box.reshape(-1)
        return img, boxes

    def random_flip_vertical(self, img, boxes, v_rate=1):
        # 随机垂直翻转
        if np.random.random() < v_rate:
            transform = transforms.RandomVerticalFlip(p=1)
            img = transform(img)
            if len(boxes) > 0:
                # 垂直翻转时，y坐标需要调整
                width, height = img.size
                for i in range(len(boxes)):
                    box = boxes[i].reshape(-1, 2)
                    for j in range(len(box)):
                        box[j, 1] = height - box[j, 1]
                    boxes[i] = box.reshape(-1)
        return img, boxes

    def center_crop(self, img, boxes, target_size=None):
        # -------------------------------------
        # 中心裁剪 ，裁剪成 (size, size) 的正方形, 仅限图形，w,h
        # 这里用比例是很难算的，转成x1,y1, x2, y2格式来计算
        # -------------------------------------
        w, h = img.size
        size = min(w, h)
        if len(boxes) > 0:
            # 转换到xyxy格式
            width, height = img.size
            in_boundary = []
            new_boxes = []
            for box in boxes:
                box = box.reshape(-1, 2)
                valid = True
                new_points = []
                for point in box:
                    x, y = point
                    # 计算裁剪后的坐标
                    if w > h:
                        x_new = x - (w - h) // 2
                        y_new = y
                    else:
                        x_new = x
                        y_new = y - (h - w) // 2
                    # 检查点是否在裁剪区域内
                    if 0 <= x_new < size and 0 <= y_new < size:
                        new_points.append([x_new, y_new])
                    else:
                        valid = False
                        break
                if valid and len(new_points) > 0:
                    new_boxes.append(torch.tensor(new_points).reshape(-1))
                    in_boundary.append(True)
                else:
                    in_boundary.append(False)
            boxes = torch.stack(new_boxes) if new_boxes else torch.tensor([])
        # 图像转换
        transform = transforms.CenterCrop(size)
        img = transform(img)
        if target_size:
            img = img.resize((target_size, target_size), Image.BILINEAR)
        # 在返回前添加可视化代码（仅用于调试）
        # if len(boxes) > 0:
        #     plot_pics(img, boxes)  # 调用之前的可视化函数
        return img, boxes

    # ------------------------------------------------------
    # 以下img皆为Tensor类型
    # ------------------------------------------------------

    def random_bright(self, img, u=120, p=1):
        # -------------------------------------
        # 随机亮度变换
        # -------------------------------------
        if np.random.random() < p:
            alpha = np.random.uniform(-u, u)/255
            img += alpha
            img = img.clamp(min=0.0, max=1.0)
        return img

    def random_contrast(self, img, lower=0.5, upper=1.5, p=1):
        # -------------------------------------
        # 随机增强对比度
        # -------------------------------------
        if np.random.random() < p:
            alpha = np.random.uniform(lower, upper)
            img *= alpha
            img = img.clamp(min=0, max=1.0)
        return img

    def random_saturation(self, img, lower=0.5, upper=1.5, p=1):
        # 随机饱和度变换，针对彩色三通道图像，中间通道乘以一个值
        if np.random.random() < p:
            alpha = np.random.uniform(lower, upper)
            img[1] = img[1] * alpha
            img[1] = img[1].clamp(min=0, max=1.0)
        return img

    def add_gasuss_noise(self, img, mean=0, std=0.1):
        noise = torch.normal(mean, std, img.shape)
        img += noise
        img = img.clamp(min=0, max=1.0)
        return img

    def add_salt_noise(self, img):
        noise = torch.rand(img.shape)
        alpha = np.random.random()/5 + 0.7
        img[noise[:, :, :] > alpha] = 1.0
        return img

    def add_pepper_noise(self, img):
        noise = torch.rand(img.shape)
        alpha = np.random.random()/5 + 0.7
        img[noise[:, :, :] > alpha] = 0
        return img
    
    def random_hsv(self, img, hgain=0.5, sgain=0.5, vgain=0.5):
        """调整图像HSV色彩空间参数"""
        img = np.array(img).astype(np.float32) / 255.0
        
        # 调整色相、饱和度、明度
        h, s, v = cv2.split(cv2.cvtColor(img, cv2.COLOR_RGB2HSV))
        h = (h + np.random.uniform(-hgain, hgain) * 180) % 180
        s = s * (1 + np.random.uniform(-sgain, sgain))
        v = v * (1 + np.random.uniform(-vgain, vgain))
        
        # 裁剪值范围
        s = np.clip(s, 0, 1)
        v = np.clip(v, 0, 1)
        
        img = cv2.cvtColor(cv2.merge([h, s, v]), cv2.COLOR_HSV2RGB)
        img = (img * 255).astype(np.uint8)
        return Image.fromarray(img)
    
    # 修改后的方法：模糊处理
    def random_blur(self, img):
        """应用随机类型的模糊"""
        blur_type = np.random.choice(['gaussian', 'motion', 'average'])
        
        if blur_type == 'gaussian':
            radius = np.random.uniform(0.5, 2.0)
            return img.filter(ImageFilter.GaussianBlur(radius=radius))
        
        elif blur_type == 'motion':
            # 使用预定义的有效奇数核大小列表
            valid_sizes = [3, 5]  # 只使用3x3, 5x5, 7x7的核
            size = np.random.choice(valid_sizes)
            
            # 创建运动模糊核
            kernel = np.zeros((size, size), dtype=np.float32)
            kernel[int((size-1)/2), :] = np.ones(size)  # 水平方向模糊
            kernel /= size  # 归一化
            
            print(f"应用运动模糊，核大小: {size}x{size}")
            return img.filter(ImageFilter.Kernel((size, size), kernel.flatten().tolist()))
        
        else:  # average blur
            size = np.random.randint(2, 5)  # 平均模糊的radius可以是偶数
            return img.filter(ImageFilter.BoxBlur(radius=size))
    
    # 修改后的方法：添加雾效果
    def add_fog(self, img, density_range=(0.2, 0.7)):
        """模拟图像中的雾效果"""
        img = np.array(img).astype(np.float32) / 255.0
        height, width = img.shape[:2]
        
        # 创建雾效果
        density = np.random.uniform(*density_range)
        fog = np.random.rand(height, width) * density
        fog = np.expand_dims(fog, axis=2)  # 扩展维度以匹配图像通道
        
        # 混合雾和图像
        img = img * (1 - fog) + fog * 0.8  # 0.8是雾的基础亮度
        img = np.clip(img, 0, 1) * 255
        return Image.fromarray(img.astype(np.uint8))
    
    # 修改后的方法：添加雨效果
    def add_rain(self, img, intensity='light'):
        """模拟不同强度的雨效果"""
        img = np.array(img)
        height, width = img.shape[:2]
        
        # 根据强度设置雨滴参数
        if intensity == 'light':
            rain_drops = np.random.randint(100, 300)
            drop_length = np.random.randint(5, 10)
            drop_width = 1
            color = (200, 200, 200)  # 雨的颜色
        else:  # heavy
            rain_drops = np.random.randint(300, 800)
            drop_length = np.random.randint(10, 20)
            drop_width = 2
            color = (180, 180, 180)
        
        # 创建雨滴
        for _ in range(rain_drops):
            x = np.random.randint(0, width)
            y = np.random.randint(0, height)
            cv2.line(img, (x, y), (x, y + drop_length), color, drop_width)
        
        return Image.fromarray(img)
    
    # 修改后的方法：Cutout随机遮挡
    def random_cutout(self, img, max_cuts=5, max_size=0.1):
        """在图像上添加矩形遮挡区域"""
        img = np.array(img)
        height, width = img.shape[:2]
        max_pixel_size = int(max(width, height) * max_size)
        
        for _ in range(np.random.randint(1, max_cuts + 1)):
            # 随机选择遮挡区域的位置和大小
            x = np.random.randint(0, width)
            y = np.random.randint(0, height)
            w = np.random.randint(5, max_pixel_size)
            h = np.random.randint(5, max_pixel_size)
            
            # 确保区域在图像内
            x1 = max(0, x - w // 2)
            y1 = max(0, y - h // 2)
            x2 = min(width, x + w // 2)
            y2 = min(height, y + h // 2)
            
            # 添加遮挡（灰色或随机噪声）
            if np.random.random() < 0.7:
                img[y1:y2, x1:x2] = np.random.randint(64, 192)
            else:
                noise = np.random.randint(0, 256, (y2 - y1, x2 - x1, 3), dtype=np.uint8)
                img[y1:y2, x1:x2] = noise
        
        return Image.fromarray(img)


def plot_pics(img, boxes):
    # 显示图像和候选框，img是Image.Open()类型, boxes是Tensor类型
    plt.imshow(img)
    label_colors = [(213, 110, 89)]
    for i in range(len(boxes)):
        box = boxes[i].reshape(-1, 2)
        x_coords = box[:, 0].tolist() + [box[0, 0].tolist()]
        y_coords = box[:, 1].tolist() + [box[0, 1].tolist()]
        plt.plot(x_coords, y_coords, 'r-', linewidth=2)
    plt.show()

def get_image_list(image_path):
    # 根据图片文件，查找所有png图片并返回列表
    files_list = []
    for root, sub_dirs, files in os.walk(image_path):
        for special_file in files:
            if special_file.lower().endswith('.png'):
                files_list.append(special_file)
    return files_list

def get_label_files(label_path, image_name):
    # 根据图片信息，查找对应的两个label文件
    prefix = image_name.split('_')[0]  # 获取pcd前缀
    time_part = image_name[len(prefix)+1:-5]  # 获取时间部分
    # 查找对应的两个标签文件
    pos_file = os.path.join(label_path, f"{prefix}_{time_part}cpos.txt")
    neg_file = os.path.join(label_path, f"{prefix}_{time_part}cneg.txt")
    
    pos_data = []
    if os.path.exists(pos_file) and os.path.getsize(pos_file) > 0:
        with open(pos_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for i in range(0, len(lines), 4):  # 每4行一组
                if i+3 < len(lines):
                    points = []
                    for line in lines[i:i+4]:
                        x, y = line.strip().split()  # 拆分x和y坐标
                        points.append(float(x))       # 按顺序添加x
                        points.append(float(y))       # 按顺序添加y
                    pos_data.append(points)
    
    neg_data = []
    if os.path.exists(neg_file) and os.path.getsize(neg_file) > 0:
        with open(neg_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for i in range(0, len(lines), 4):  # 每4行一组
                if i+3 < len(lines):
                    points = []
                    for line in lines[i:i+4]:
                        x, y = line.strip().split()  # 拆分x和y坐标
                        points.append(float(x))       # 按顺序添加x
                        points.append(float(y))       # 按顺序添加y
                    neg_data.append(points)
    
    return torch.tensor(pos_data) if pos_data else torch.tensor([]), \
           torch.tensor(neg_data) if neg_data else torch.tensor([])

def modify_filename(filename, prefix):
    # 修改文件名，在pcd后面添加前缀
    parts = filename.split('_')
    if parts:
        parts[0] = parts[0] + prefix
        return '_'.join(parts)
    return filename

def save_labels(boxes, save_path, image_name, prefix, label_type):
    # 保存标签文件，boxes是Tensor类型，label_type是'pos'或'neg'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    # 修改文件名，在pcd后面添加前缀
    new_image_name = modify_filename(image_name, prefix)
    label_filename = new_image_name[:-5] + f"c{label_type}.txt"
    label_path = os.path.join(save_path, label_filename)
    
    with open(label_path, 'w', encoding='utf-8') as f:
        if len(boxes) > 0:
            for box in boxes:
                box = box.reshape(-1, 2)
                for point in box:
                    x, y = point
                    f.write(f"{x:.6f} {y:.6f}\n")

def save_depth(image_name, save_path, prefix, depth_folder=r"F:\XionganRoboticArm\demo\Grasp-Anything-main\data\dataset\dataset1\dataset\\1"):
    """
    保存对应的深度图到指定路径
    image_name: 彩色图像文件名（如 pcd20250527_150433r.png）
    save_path: 保存根路径
    prefix: 文件名前缀（如 "fh_"）
    depth_folder: 深度图所在子文件夹，若为None则与彩色图同文件夹
    """
    depth_images_path = os.path.join(save_path, "depth_images")
    if not os.path.exists(depth_images_path):
        os.makedirs(depth_images_path)
    
    try:
        # 解析深度图文件名
        base_name = image_name[:-5]  # 去掉.png后缀
        if base_name.endswith('r'):
            depth_base_name = base_name[:-1] + 'd'  # 将'r'替换为'd'
        else:
            depth_base_name = base_name + 'd'  # 假设无'r'后缀时直接加'd'
        depth_original_name = f"{depth_base_name}.tiff"
        
        # 确定深度图原始路径
        if depth_folder:
            original_depth_path = os.path.join(depth_folder, depth_original_name)
        else:
            original_depth_path = os.path.join(os.path.dirname(image_name), depth_original_name)
        
        # 检查深度图是否存在
        if os.path.exists(original_depth_path):
            # 重命名并保存深度图
            new_depth_name = modify_filename(depth_original_name, prefix)
            depth_img = Image.open(original_depth_path)
            depth_img.save(os.path.join(depth_images_path, new_depth_name))
            return True, f"已保存深度图: {new_depth_name}"
        else:
            return False, f"警告: 未找到深度图 {original_depth_path}"
    except Exception as e:
        return False, f"保存深度图失败: {e}"


def save_result(img, pos_boxes, neg_boxes, save_path, prefix, image_name):
    # img: 需要时Image类型的数据， prefix 前缀
    # 将结果保存到save path指示的路径中
    images_path = os.path.join(save_path, "images")
    labels_path = os.path.join(save_path, "labels")
    
    if not os.path.exists(images_path):
        os.makedirs(images_path)
    if not os.path.exists(labels_path):
        os.makedirs(labels_path)
    
    try:
        # 修改图片文件名，在pcd后面添加前缀
        new_image_name = modify_filename(image_name, prefix)
        img.save(os.path.join(images_path, new_image_name))
        
        # 保存正样本标签
        save_labels(pos_boxes, labels_path, image_name, prefix, 'pos')
        # 保存负样本标签
        save_labels(neg_boxes, labels_path, image_name, prefix, 'neg')

        depth_success, depth_msg = save_depth(image_name, save_path, prefix)
        print(f"INFO: {depth_msg}")

    except Exception as e:
        print(f"ERROR: 保存 {image_name} 失败: {e}")

def runAugumentation(image_path, save_path):
    image_list = get_image_list(image_path)
    for image_name in image_list:
        print("处理中: " + image_name)
        img_path = os.path.join(image_path, image_name)
        img = Image.open(img_path)
        pos_boxes, neg_boxes = get_label_files(image_path, image_name)  # 图片和标签在同一目录
        
        # 下面是执行的数据增强功能，可自行选择
        # Image类型的参数
        DAD = DataAugmentationOnDetection()

        """ 尺寸变换   """
        # 水平旋转
        t_img, t_pos_boxes = DAD.random_flip_horizon(img.copy(), pos_boxes.clone())
        _, t_neg_boxes = DAD.random_flip_horizon(img.copy(), neg_boxes.clone())
        save_result(t_img, t_pos_boxes, t_neg_boxes, save_path, "fh_", image_name)
        # plot_pics(t_img, t_pos_boxes)
        
        # 竖直旋转
        t_img, t_pos_boxes = DAD.random_flip_vertical(img.copy(), pos_boxes.clone())
        _, t_neg_boxes = DAD.random_flip_vertical(img.copy(), neg_boxes.clone())
        save_result(t_img, t_pos_boxes, t_neg_boxes, save_path, "fv_", image_name)
        # plot_pics(t_img, t_pos_boxes)
        # center_crop
        # t_img, t_pos_boxes = DAD.center_crop(img.copy(), pos_boxes.clone(), 1024)
        # _, t_neg_boxes = DAD.center_crop(img.copy(), neg_boxes.clone(), 1024)
        # save_result(t_img, t_pos_boxes, t_neg_boxes, save_path, "cc_", image_name)
        # plot_pics(t_img, t_pos_boxes)
        """ 图像变换，用tensor类型"""
        to_tensor = transforms.ToTensor()
        to_image = transforms.ToPILImage()
        img_tensor = to_tensor(img)

        # random_bright
        t_img_tensor = DAD.random_bright(img_tensor.clone())
        t_img = to_image(t_img_tensor)
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rb_", image_name)
        # plot_pics(t_img, pos_boxes)
        # random_contrast 对比度变化
        t_img_tensor = DAD.random_contrast(img_tensor.clone())
        t_img = to_image(t_img_tensor)
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rc_", image_name)
        # plot_pics(t_img, pos_boxes)
        # random_saturation 饱和度变化
        t_img_tensor = DAD.random_saturation(img_tensor.clone())
        t_img = to_image(t_img_tensor)
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rs_", image_name)
        # plot_pics(t_img, pos_boxes)
        # 高斯噪声
        t_img_tensor = DAD.add_gasuss_noise(img_tensor.clone())
        t_img = to_image(t_img_tensor)
        save_result(t_img, pos_boxes, neg_boxes, save_path, "gn_", image_name)
        # plot_pics(t_img, pos_boxes)
        # add_salt_noise
        t_img_tensor = DAD.add_salt_noise(img_tensor.clone())
        t_img = to_image(t_img_tensor)
        save_result(t_img, pos_boxes, neg_boxes, save_path, "sn_", image_name)
        # plot_pics(t_img, pos_boxes)
        # add_pepper_noise
        t_img_tensor = DAD.add_pepper_noise(img_tensor.clone())
        t_img = to_image(t_img_tensor)
        save_result(t_img, pos_boxes, neg_boxes, save_path, "pn_", image_name)
        # plot_pics(t_img, pos_boxes)
        # random_hsv
        t_img = DAD.random_hsv(img.copy())
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rhsv_", image_name)
        # plot_pics(t_img, pos_boxes)
        # random_blur
        t_img = DAD.random_blur(img.copy())
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rblur_", image_name)
        # plot_pics(t_img, pos_boxes)
        # add_fog
        t_img = DAD.add_fog(img.copy())
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rfog_", image_name)
        # plot_pics(t_img, pos_boxes)
        # add_rain
        t_img = DAD.add_rain(img.copy(), intensity='light')
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rrain_light_", image_name)
        t_img = DAD.add_rain(img.copy(), intensity='heavy')
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rrain_heavy_", image_name)
        # plot_pics(t_img, pos_boxes)
        # random_cutout
        t_img = DAD.random_cutout(img.copy())
        save_result(t_img, pos_boxes, neg_boxes, save_path, "rcutout_", image_name)
        # plot_pics(t_img, pos_boxes)


        print("完成:     " + image_name)


if __name__ == '__main__':
    # 图片和标签路径
    data_path = r"F:\dataset\1"
    # 保存路径
    save_path = r"F:\dataset\aug"
    
    # 运行
    runAugumentation(data_path, save_path)