数据收集 标注
roLabelImg
仿照cornell数据集(不需要标注负向标签,只需要标注正向标签即可)
标注矩形的抓取框
数据增强代码如下:
# -*- coding: utf-8 -*-
import torch
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import cv2
from PIL import Image, ImageEnhance, ImageFilter
random.seed(0)
class DataAugmentationOnDetection:
def __init__(self):
super(DataAugmentationOnDetection, self).__init__()
# 以下的几个参数类型中,image的类型全部如下类型
# 参数类型: image:Image.open(path)
def resize_keep_ratio(self, image, boxes, target_size):
"""
参数类型: image:Image.open(path), boxes:Tensor, target_size:int
功能:将图像缩放到size尺寸,调整相应的boxes,同时保持长宽比(最长的边是target size
"""
old_size = image.size[0:2] # 原始图像大小
# 取最小的缩放比例
ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size))) # 计算原始图像宽高与目标图像大小的比例,并取其中的较小值
new_size = tuple([int(i * ratio) for i in old_size]) # 根据上边求得的比例计算在保持比例前提下得到的图像大小
# boxes 不用变化,因为是等比例变化
return image.resize(new_size, Image.BILINEAR), boxes
def resizeDown_keep_ratio(self, image, boxes, target_size):
""" 与上面的函数功能类似,但它只降低图片的尺寸,不会扩大图片尺寸"""
old_size = image.size[0:2] # 原始图像大小
# 取最小的缩放比例
ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size))) # 计算原始图像宽高与目标图像大小的比例,并取其中的较小值
ratio = min(ratio, 1)
new_size = tuple([int(i * ratio) for i in old_size]) # 根据上边求得的比例计算在保持比例前提下得到的图像大小
# boxes 不用变化,因为是等比例变化
return image.resize(new_size, Image.BILINEAR), boxes
def resize(self, img, boxes, size):
# ---------------------------------------------------------
# 类型为 img=Image.open(path),boxes:Tensor,size:int
# 功能为:将图像长和宽缩放到指定值size,并且相应调整boxes
# ---------------------------------------------------------
return img.resize((size, size), Image.BILINEAR), boxes
def random_flip_horizon(self, img, boxes, h_rate=1):
# -------------------------------------
# 随机水平翻转
# -------------------------------------
if np.random.random() < h_rate:
transform = transforms.RandomHorizontalFlip(p=1)
img = transform(img)
if len(boxes) > 0:
# 水平翻转时,x坐标需要调整
width, height = img.size
for i in range(len(boxes)):
box = boxes[i].reshape(-1, 2)
for j in range(len(box)):
box[j, 0] = width - box[j, 0]
boxes[i] = box.reshape(-1)
return img, boxes
def random_flip_vertical(self, img, boxes, v_rate=1):
# 随机垂直翻转
if np.random.random() < v_rate:
transform = transforms.RandomVerticalFlip(p=1)
img = transform(img)
if len(boxes) > 0:
# 垂直翻转时,y坐标需要调整
width, height = img.size
for i in range(len(boxes)):
box = boxes[i].reshape(-1, 2)
for j in range(len(box)):
box[j, 1] = height - box[j, 1]
boxes[i] = box.reshape(-1)
return img, boxes
def center_crop(self, img, boxes, target_size=None):
# -------------------------------------
# 中心裁剪 ,裁剪成 (size, size) 的正方形, 仅限图形,w,h
# 这里用比例是很难算的,转成x1,y1, x2, y2格式来计算
# -------------------------------------
w, h = img.size
size = min(w, h)
if len(boxes) > 0:
# 转换到xyxy格式
width, height = img.size
in_boundary = []
new_boxes = []
for box in boxes:
box = box.reshape(-1, 2)
valid = True
new_points = []
for point in box:
x, y = point
# 计算裁剪后的坐标
if w > h:
x_new = x - (w - h) // 2
y_new = y
else:
x_new = x
y_new = y - (h - w) // 2
# 检查点是否在裁剪区域内
if 0 <= x_new < size and 0 <= y_new < size:
new_points.append([x_new, y_new])
else:
valid = False
break
if valid and len(new_points) > 0:
new_boxes.append(torch.tensor(new_points).reshape(-1))
in_boundary.append(True)
else:
in_boundary.append(False)
boxes = torch.stack(new_boxes) if new_boxes else torch.tensor([])
# 图像转换
transform = transforms.CenterCrop(size)
img = transform(img)
if target_size:
img = img.resize((target_size, target_size), Image.BILINEAR)
# 在返回前添加可视化代码(仅用于调试)
# if len(boxes) > 0:
# plot_pics(img, boxes) # 调用之前的可视化函数
return img, boxes
# ------------------------------------------------------
# 以下img皆为Tensor类型
# ------------------------------------------------------
def random_bright(self, img, u=120, p=1):
# -------------------------------------
# 随机亮度变换
# -------------------------------------
if np.random.random() < p:
alpha = np.random.uniform(-u, u)/255
img += alpha
img = img.clamp(min=0.0, max=1.0)
return img
def random_contrast(self, img, lower=0.5, upper=1.5, p=1):
# -------------------------------------
# 随机增强对比度
# -------------------------------------
if np.random.random() < p:
alpha = np.random.uniform(lower, upper)
img *= alpha
img = img.clamp(min=0, max=1.0)
return img
def random_saturation(self, img, lower=0.5, upper=1.5, p=1):
# 随机饱和度变换,针对彩色三通道图像,中间通道乘以一个值
if np.random.random() < p:
alpha = np.random.uniform(lower, upper)
img[1] = img[1] * alpha
img[1] = img[1].clamp(min=0, max=1.0)
return img
def add_gasuss_noise(self, img, mean=0, std=0.1):
noise = torch.normal(mean, std, img.shape)
img += noise
img = img.clamp(min=0, max=1.0)
return img
def add_salt_noise(self, img):
noise = torch.rand(img.shape)
alpha = np.random.random()/5 + 0.7
img[noise[:, :, :] > alpha] = 1.0
return img
def add_pepper_noise(self, img):
noise = torch.rand(img.shape)
alpha = np.random.random()/5 + 0.7
img[noise[:, :, :] > alpha] = 0
return img
def random_hsv(self, img, hgain=0.5, sgain=0.5, vgain=0.5):
"""调整图像HSV色彩空间参数"""
img = np.array(img).astype(np.float32) / 255.0
# 调整色相、饱和度、明度
h, s, v = cv2.split(cv2.cvtColor(img, cv2.COLOR_RGB2HSV))
h = (h + np.random.uniform(-hgain, hgain) * 180) % 180
s = s * (1 + np.random.uniform(-sgain, sgain))
v = v * (1 + np.random.uniform(-vgain, vgain))
# 裁剪值范围
s = np.clip(s, 0, 1)
v = np.clip(v, 0, 1)
img = cv2.cvtColor(cv2.merge([h, s, v]), cv2.COLOR_HSV2RGB)
img = (img * 255).astype(np.uint8)
return Image.fromarray(img)
# 修改后的方法:模糊处理
def random_blur(self, img):
"""应用随机类型的模糊"""
blur_type = np.random.choice(['gaussian', 'motion', 'average'])
if blur_type == 'gaussian':
radius = np.random.uniform(0.5, 2.0)
return img.filter(ImageFilter.GaussianBlur(radius=radius))
elif blur_type == 'motion':
# 使用预定义的有效奇数核大小列表
valid_sizes = [3, 5] # 只使用3x3, 5x5, 7x7的核
size = np.random.choice(valid_sizes)
# 创建运动模糊核
kernel = np.zeros((size, size), dtype=np.float32)
kernel[int((size-1)/2), :] = np.ones(size) # 水平方向模糊
kernel /= size # 归一化
print(f"应用运动模糊,核大小: {size}x{size}")
return img.filter(ImageFilter.Kernel((size, size), kernel.flatten().tolist()))
else: # average blur
size = np.random.randint(2, 5) # 平均模糊的radius可以是偶数
return img.filter(ImageFilter.BoxBlur(radius=size))
# 修改后的方法:添加雾效果
def add_fog(self, img, density_range=(0.2, 0.7)):
"""模拟图像中的雾效果"""
img = np.array(img).astype(np.float32) / 255.0
height, width = img.shape[:2]
# 创建雾效果
density = np.random.uniform(*density_range)
fog = np.random.rand(height, width) * density
fog = np.expand_dims(fog, axis=2) # 扩展维度以匹配图像通道
# 混合雾和图像
img = img * (1 - fog) + fog * 0.8 # 0.8是雾的基础亮度
img = np.clip(img, 0, 1) * 255
return Image.fromarray(img.astype(np.uint8))
# 修改后的方法:添加雨效果
def add_rain(self, img, intensity='light'):
"""模拟不同强度的雨效果"""
img = np.array(img)
height, width = img.shape[:2]
# 根据强度设置雨滴参数
if intensity == 'light':
rain_drops = np.random.randint(100, 300)
drop_length = np.random.randint(5, 10)
drop_width = 1
color = (200, 200, 200) # 雨的颜色
else: # heavy
rain_drops = np.random.randint(300, 800)
drop_length = np.random.randint(10, 20)
drop_width = 2
color = (180, 180, 180)
# 创建雨滴
for _ in range(rain_drops):
x = np.random.randint(0, width)
y = np.random.randint(0, height)
cv2.line(img, (x, y), (x, y + drop_length), color, drop_width)
return Image.fromarray(img)
# 修改后的方法:Cutout随机遮挡
def random_cutout(self, img, max_cuts=5, max_size=0.1):
"""在图像上添加矩形遮挡区域"""
img = np.array(img)
height, width = img.shape[:2]
max_pixel_size = int(max(width, height) * max_size)
for _ in range(np.random.randint(1, max_cuts + 1)):
# 随机选择遮挡区域的位置和大小
x = np.random.randint(0, width)
y = np.random.randint(0, height)
w = np.random.randint(5, max_pixel_size)
h = np.random.randint(5, max_pixel_size)
# 确保区域在图像内
x1 = max(0, x - w // 2)
y1 = max(0, y - h // 2)
x2 = min(width, x + w // 2)
y2 = min(height, y + h // 2)
# 添加遮挡(灰色或随机噪声)
if np.random.random() < 0.7:
img[y1:y2, x1:x2] = np.random.randint(64, 192)
else:
noise = np.random.randint(0, 256, (y2 - y1, x2 - x1, 3), dtype=np.uint8)
img[y1:y2, x1:x2] = noise
return Image.fromarray(img)
def plot_pics(img, boxes):
# 显示图像和候选框,img是Image.Open()类型, boxes是Tensor类型
plt.imshow(img)
label_colors = [(213, 110, 89)]
for i in range(len(boxes)):
box = boxes[i].reshape(-1, 2)
x_coords = box[:, 0].tolist() + [box[0, 0].tolist()]
y_coords = box[:, 1].tolist() + [box[0, 1].tolist()]
plt.plot(x_coords, y_coords, 'r-', linewidth=2)
plt.show()
def get_image_list(image_path):
# 根据图片文件,查找所有png图片并返回列表
files_list = []
for root, sub_dirs, files in os.walk(image_path):
for special_file in files:
if special_file.lower().endswith('.png'):
files_list.append(special_file)
return files_list
def get_label_files(label_path, image_name):
# 根据图片信息,查找对应的两个label文件
prefix = image_name.split('_')[0] # 获取pcd前缀
time_part = image_name[len(prefix)+1:-5] # 获取时间部分
# 查找对应的两个标签文件
pos_file = os.path.join(label_path, f"{prefix}_{time_part}cpos.txt")
neg_file = os.path.join(label_path, f"{prefix}_{time_part}cneg.txt")
pos_data = []
if os.path.exists(pos_file) and os.path.getsize(pos_file) > 0:
with open(pos_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
for i in range(0, len(lines), 4): # 每4行一组
if i+3 < len(lines):
points = []
for line in lines[i:i+4]:
x, y = line.strip().split() # 拆分x和y坐标
points.append(float(x)) # 按顺序添加x
points.append(float(y)) # 按顺序添加y
pos_data.append(points)
neg_data = []
if os.path.exists(neg_file) and os.path.getsize(neg_file) > 0:
with open(neg_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
for i in range(0, len(lines), 4): # 每4行一组
if i+3 < len(lines):
points = []
for line in lines[i:i+4]:
x, y = line.strip().split() # 拆分x和y坐标
points.append(float(x)) # 按顺序添加x
points.append(float(y)) # 按顺序添加y
neg_data.append(points)
return torch.tensor(pos_data) if pos_data else torch.tensor([]), \
torch.tensor(neg_data) if neg_data else torch.tensor([])
def modify_filename(filename, prefix):
# 修改文件名,在pcd后面添加前缀
parts = filename.split('_')
if parts:
parts[0] = parts[0] + prefix
return '_'.join(parts)
return filename
def save_labels(boxes, save_path, image_name, prefix, label_type):
# 保存标签文件,boxes是Tensor类型,label_type是'pos'或'neg'
if not os.path.exists(save_path):
os.makedirs(save_path)
# 修改文件名,在pcd后面添加前缀
new_image_name = modify_filename(image_name, prefix)
label_filename = new_image_name[:-5] + f"c{label_type}.txt"
label_path = os.path.join(save_path, label_filename)
with open(label_path, 'w', encoding='utf-8') as f:
if len(boxes) > 0:
for box in boxes:
box = box.reshape(-1, 2)
for point in box:
x, y = point
f.write(f"{x:.6f} {y:.6f}\n")
def save_depth(image_name, save_path, prefix, depth_folder=r"F:\XionganRoboticArm\demo\Grasp-Anything-main\data\dataset\dataset1\dataset\\1"):
"""
保存对应的深度图到指定路径
image_name: 彩色图像文件名(如 pcd20250527_150433r.png)
save_path: 保存根路径
prefix: 文件名前缀(如 "fh_")
depth_folder: 深度图所在子文件夹,若为None则与彩色图同文件夹
"""
depth_images_path = os.path.join(save_path, "depth_images")
if not os.path.exists(depth_images_path):
os.makedirs(depth_images_path)
try:
# 解析深度图文件名
base_name = image_name[:-5] # 去掉.png后缀
if base_name.endswith('r'):
depth_base_name = base_name[:-1] + 'd' # 将'r'替换为'd'
else:
depth_base_name = base_name + 'd' # 假设无'r'后缀时直接加'd'
depth_original_name = f"{depth_base_name}.tiff"
# 确定深度图原始路径
if depth_folder:
original_depth_path = os.path.join(depth_folder, depth_original_name)
else:
original_depth_path = os.path.join(os.path.dirname(image_name), depth_original_name)
# 检查深度图是否存在
if os.path.exists(original_depth_path):
# 重命名并保存深度图
new_depth_name = modify_filename(depth_original_name, prefix)
depth_img = Image.open(original_depth_path)
depth_img.save(os.path.join(depth_images_path, new_depth_name))
return True, f"已保存深度图: {new_depth_name}"
else:
return False, f"警告: 未找到深度图 {original_depth_path}"
except Exception as e:
return False, f"保存深度图失败: {e}"
def save_result(img, pos_boxes, neg_boxes, save_path, prefix, image_name):
# img: 需要时Image类型的数据, prefix 前缀
# 将结果保存到save path指示的路径中
images_path = os.path.join(save_path, "images")
labels_path = os.path.join(save_path, "labels")
if not os.path.exists(images_path):
os.makedirs(images_path)
if not os.path.exists(labels_path):
os.makedirs(labels_path)
try:
# 修改图片文件名,在pcd后面添加前缀
new_image_name = modify_filename(image_name, prefix)
img.save(os.path.join(images_path, new_image_name))
# 保存正样本标签
save_labels(pos_boxes, labels_path, image_name, prefix, 'pos')
# 保存负样本标签
save_labels(neg_boxes, labels_path, image_name, prefix, 'neg')
depth_success, depth_msg = save_depth(image_name, save_path, prefix)
print(f"INFO: {depth_msg}")
except Exception as e:
print(f"ERROR: 保存 {image_name} 失败: {e}")
def runAugumentation(image_path, save_path):
image_list = get_image_list(image_path)
for image_name in image_list:
print("处理中: " + image_name)
img_path = os.path.join(image_path, image_name)
img = Image.open(img_path)
pos_boxes, neg_boxes = get_label_files(image_path, image_name) # 图片和标签在同一目录
# 下面是执行的数据增强功能,可自行选择
# Image类型的参数
DAD = DataAugmentationOnDetection()
""" 尺寸变换 """
# 水平旋转
t_img, t_pos_boxes = DAD.random_flip_horizon(img.copy(), pos_boxes.clone())
_, t_neg_boxes = DAD.random_flip_horizon(img.copy(), neg_boxes.clone())
save_result(t_img, t_pos_boxes, t_neg_boxes, save_path, "fh_", image_name)
# plot_pics(t_img, t_pos_boxes)
# 竖直旋转
t_img, t_pos_boxes = DAD.random_flip_vertical(img.copy(), pos_boxes.clone())
_, t_neg_boxes = DAD.random_flip_vertical(img.copy(), neg_boxes.clone())
save_result(t_img, t_pos_boxes, t_neg_boxes, save_path, "fv_", image_name)
# plot_pics(t_img, t_pos_boxes)
# center_crop
# t_img, t_pos_boxes = DAD.center_crop(img.copy(), pos_boxes.clone(), 1024)
# _, t_neg_boxes = DAD.center_crop(img.copy(), neg_boxes.clone(), 1024)
# save_result(t_img, t_pos_boxes, t_neg_boxes, save_path, "cc_", image_name)
# plot_pics(t_img, t_pos_boxes)
""" 图像变换,用tensor类型"""
to_tensor = transforms.ToTensor()
to_image = transforms.ToPILImage()
img_tensor = to_tensor(img)
# random_bright
t_img_tensor = DAD.random_bright(img_tensor.clone())
t_img = to_image(t_img_tensor)
save_result(t_img, pos_boxes, neg_boxes, save_path, "rb_", image_name)
# plot_pics(t_img, pos_boxes)
# random_contrast 对比度变化
t_img_tensor = DAD.random_contrast(img_tensor.clone())
t_img = to_image(t_img_tensor)
save_result(t_img, pos_boxes, neg_boxes, save_path, "rc_", image_name)
# plot_pics(t_img, pos_boxes)
# random_saturation 饱和度变化
t_img_tensor = DAD.random_saturation(img_tensor.clone())
t_img = to_image(t_img_tensor)
save_result(t_img, pos_boxes, neg_boxes, save_path, "rs_", image_name)
# plot_pics(t_img, pos_boxes)
# 高斯噪声
t_img_tensor = DAD.add_gasuss_noise(img_tensor.clone())
t_img = to_image(t_img_tensor)
save_result(t_img, pos_boxes, neg_boxes, save_path, "gn_", image_name)
# plot_pics(t_img, pos_boxes)
# add_salt_noise
t_img_tensor = DAD.add_salt_noise(img_tensor.clone())
t_img = to_image(t_img_tensor)
save_result(t_img, pos_boxes, neg_boxes, save_path, "sn_", image_name)
# plot_pics(t_img, pos_boxes)
# add_pepper_noise
t_img_tensor = DAD.add_pepper_noise(img_tensor.clone())
t_img = to_image(t_img_tensor)
save_result(t_img, pos_boxes, neg_boxes, save_path, "pn_", image_name)
# plot_pics(t_img, pos_boxes)
# random_hsv
t_img = DAD.random_hsv(img.copy())
save_result(t_img, pos_boxes, neg_boxes, save_path, "rhsv_", image_name)
# plot_pics(t_img, pos_boxes)
# random_blur
t_img = DAD.random_blur(img.copy())
save_result(t_img, pos_boxes, neg_boxes, save_path, "rblur_", image_name)
# plot_pics(t_img, pos_boxes)
# add_fog
t_img = DAD.add_fog(img.copy())
save_result(t_img, pos_boxes, neg_boxes, save_path, "rfog_", image_name)
# plot_pics(t_img, pos_boxes)
# add_rain
t_img = DAD.add_rain(img.copy(), intensity='light')
save_result(t_img, pos_boxes, neg_boxes, save_path, "rrain_light_", image_name)
t_img = DAD.add_rain(img.copy(), intensity='heavy')
save_result(t_img, pos_boxes, neg_boxes, save_path, "rrain_heavy_", image_name)
# plot_pics(t_img, pos_boxes)
# random_cutout
t_img = DAD.random_cutout(img.copy())
save_result(t_img, pos_boxes, neg_boxes, save_path, "rcutout_", image_name)
# plot_pics(t_img, pos_boxes)
print("完成: " + image_name)
if __name__ == '__main__':
# 图片和标签路径
data_path = r"F:\dataset\1"
# 保存路径
save_path = r"F:\dataset\aug"
# 运行
runAugumentation(data_path, save_path)
注意:数据增强完之后要检查一下数据格式是否跟cornell数据集的格式一致,如果不一致的话训练效果不好
train
code
git clone https://github.com/Fsoft-AIC/Grasp-Anything 到本地
参数配置
运行train_network.py
result
![]() |
![]() |
![]() |