引言
- 之前写过一篇对目标检测数据集的增强方法(目标检测数据增强之旋转),只能旋转90和270度
- 这次无意中看到大佬写的旋转任意角度,检测框也能旋转任意角度的代码,特地在此记录一下
- 整理思路:主要是首先计算旋转图像的旋转矩阵,然后再将此旋转矩阵应用于检测框的四个坐标点,旋转后的坐标点仍然是多边形,紧贴着物体,不是矩形框,这种可以用于可以检测物体完整轮廓的检测算法增强,当然也可以用于文本检测的图像增强。
- PS:opencv对旋转后对四个角的填充,在
cv2.warpAffine()
中有borderMode
和borderValue
两个参数来准确控制填充颜色,下面代码中用的是borderMode=cv2.BORDER_REFLECT
opencv实现代码如下
import cv2
import numpy as np
import math
from matplotlib import pyplot as plt
import copy
import urllib
def rotate_img_bbox(img, bboxes, angle=45, scale=1.):
'''
输入:
img(ndarray):(h,w,c)
bboxes(list):[[[x0, y0], [x1, y1], [x2, y2], [x3, y3]]]
angle:旋转角度,逆时针旋转
scale:默认1
输出:
rot_img:旋转后的图像array
rot_bboxes:[[x0, y0], [x1, y1], [x2, y2], [x3, y3]], 顺时针左上到左下
'''
w = img.shape[1]
h = img.shape[0]
rangle = np.deg2rad(angle)
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
rot_mat[0,2] += rot_move[0]
rot_mat[1,2] += rot_move[1]
rot_img = cv2.warpAffine(img,
rot_mat,
(int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4,
borderMode=cv2.BORDER_REFLECT,
)
rot_bboxes = list()
for bbox in bboxes:
point1 = np.dot(rot_mat, np.array([bbox[0][0], bbox[0][1], 1]).astype(np.int32))
point2 = np.dot(rot_mat, np.array([bbox[1][0], bbox[1][1], 1]).astype(np.int32))
point3 = np.dot(rot_mat, np.array([bbox[2][0], bbox[2][1], 1]).astype(np.int32))
point4 = np.dot(rot_mat, np.array([bbox[3][0], bbox[3][1], 1]).astype(np.int32))
rot_bboxes.append([[point1[0], point1[1]],
[point2[0], point2[1]],
[point3[0], point3[1]],
[point4[0], point4[1]]])
return rot_img, rot_bboxes
url = 'https://upload.wikimedia.org/wikipedia/commons/8/8e/Yellow-headed_caracara_%28Milvago_chimachima%29_on_capybara_%28Hydrochoeris_hydrochaeris%29.JPG'
resp = urllib.request.urlopen(url)
im = np.asarray(bytearray(resp.read()), dtype="uint8")
im = cv2.imdecode(im, cv2.IMREAD_COLOR)
im = cv2.resize(im, (447, 298))
x_min, y_min = 90, 90
x_max, y_max = 380, 284
boxes = [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]]
img = copy.deepcopy(im)
raw_plot_im = cv2.polylines(img, np.array([boxes]).astype(np.int32), True, (255, 0, 0), 2)
raw_plot_im = cv2.cvtColor(raw_plot_im, cv2.COLOR_BGR2RGB)
cv2.imwrite('raw_plot_box.jpg', raw_plot_im)
rot_img, rot_boxes = rotate_img_bbox(im, [boxes], angle=30, scale=1)
rot_img = cv2.cvtColor(rot_img, cv2.COLOR_BGR2RGB)
plot_im = cv2.polylines(rot_img, np.array(rot_boxes).astype(np.int32), True, (255, 0, 0), 2)
cv2.imwrite('rotated_plot_im.jpg', plot_im)
结果对比图

imgaug中相关的代码实现
import imageio
import imgaug as ia
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa
ia.seed(1)
image = imageio.imread(r'1.jpg')
image = ia.imresize_single_image(image, (298, 447))
bbs = BoundingBoxesOnImage([
BoundingBox(x1=0.2 * 447, x2=0.85 * 447, y1=0.3 * 298, y2=0.95 * 298),
], shape=image.shape)
ia.imshow(bbs.draw_on_image(image, size=1))
seq = iaa.Sequential([
iaa.Rotate(-30),
])
image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs)
ia.imshow(bbs_aug.draw_on_image(image_aug, size=1))
print(bbs_aug)
结果对比图

总结
- 两种增强方式的选择,拿文本检测来说,
- 如果采用基于分割的算法(例如:DBNet),就可以考虑用opencv这种多边形的增强方式;
- 如果只需要检测矩形框,还是采用imgaug的方便一些