本文不生产技术,只做技术的搬运工!!!
前言
最近公司甲方提了新需求,要求可以用少量的样例图像进行类别扩充,加快迭代速度,刚好最近在倒腾以图搜图,就以向量数据库的形式进行了测试实验,效果还不错,这里做下记录。
背景描述
当前分类模型已经有8个类别,且经过了大量数据的训练,加方要求后续再新增类别不需要大批量采集数据,只需要提供一定数量的样例图像即可完成分类。
技术路线
我们项目使用的是yolov8-cls进行的分类任务,这里我们需要对onnx模型进行一些改动,获取其中间输出作为特征向量,然后存入数据库中
源码
模型修改
import onnx
import numpy as np
from onnx import helper, checker
# 加载现有的 ONNX 模型
model = onnx.load("/home/workspace/temp/last.onnx")
# 找到你想作为新输出的节点
target_layer_name = "/model.9/Flatten_output_0" # 替换为目标层的名字
# 创建新的输出信息
new_output_info = helper.ValueInfoProto()
new_output_info.name = target_layer_name
# 设置 type 字段
tensor_type_proto = new_output_info.type.tensor_type
tensor_type_proto.elem_type = onnx.TensorProto.FLOAT # 假设输出类型为 float
tensor_shape_proto = tensor_type_proto.shape
# 假设输出形状为 (batch_size, num_features),这里可以具体化 batch_size 和 num_features
# 这里使用 -1 表示动态维度
tensor_shape_proto.dim.add().dim_value = -1 # batch_size
tensor_shape_proto.dim.add().dim_value = -1 # num_features
# 将新的输出信息添加到模型的输出列表中
model.graph.output.append(new_output_info)
# 验证修改后的模型是否有效
checker.check_model(model)
# 保存修改后的模型
onnx.save(model, "/home/workspace/temp/last_add.onnx")
修改后的模型输出头如下图所示
数据入库
from PIL import Image
import onnxruntime
import torch
import os
import numpy as np
import cv2
from torchvision import transforms
from pymilvus import MilvusClient
if os.path.isfile("val.db"):
client = MilvusClient("val.db")
else:
client = MilvusClient("val.db")
if client.has_collection(collection_name="text_image"):
client.drop_collection(collection_name="text_image")
client.create_collection(
collection_name="text_image",
dimension=1280, # The vectors we will use in this demo has 768 dimensions
metric_type="COSINE"
)
def read_image(image_path):
"""
读取图像并应用预处理。
:param image_path: 图像的路径。
:return: 经过预处理的图像和原始图像。
"""
# 从指定路径读取图像,解决中文路径问题
src = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
# 将图像从 BGR 转换为 RGB 格式
img = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
# 使用 InterpolationMode.BILINEAR 指定双线性插值
transform = transforms.Compose([
# 将图像大小调整为 224x224,保持宽高比不变
transforms.Resize(size=224, interpolation=transforms.InterpolationMode.BILINEAR, max_size=None, antialias=True),
# 从图像中心裁剪出 224x224 的区域
transforms.CenterCrop(size=(224, 224)),
# 将图像转换为 PyTorch 张量
transforms.ToTensor(),
# 使用指定的均值和标准差对图像进行标准化
transforms.Normalize(mean=[0., 0., 0.], std=[1., 1., 1.])
])
# 将图像转换为 PIL 图像并应用变换
pil_image = Image.fromarray(img)
normalized_image = transform(pil_image)
# 添加维度以适应模型输入要求,并返回处理后的图像和原始图像
return np.expand_dims(normalized_image.numpy(), axis=0), src
def getFileList(dir, Filelist, ext=None):
"""
获取文件夹及其子文件夹中文件列表
输入 dir:文件夹根目录
输入 ext: 扩展名
返回: 文件路径列表
"""
newDir = dir
if os.path.isfile(dir):
if ext is None:
Filelist.append(dir)
else:
if ext in dir:
Filelist.append(dir)
elif os.path.isdir(dir):
for s in os.listdir(dir):
newDir = os.path.join(dir, s)
getFileList(newDir, Filelist, ext)
return Filelist
def load_onnx_model(model_path):
providers = ['CUDAExecutionProvider']
session = onnxruntime.InferenceSession(model_path, providers=providers)
print("ONNX模型已成功加载。")
return session
def image_tensor(image_path, session):
image,_ = read_image(image_path)
input_name = session.get_inputs()[0].name
output_name = "/model.9/Flatten_output_0"
pred = session.run([output_name], {input_name: image})[0]
pred = np.squeeze(pred)
return pred
if __name__ == "__main__":
device = "cuda" if torch.cuda.is_available() else "cpu"
model_path = "/home/project_python/onnx_image-image/last_add.onnx"
img_dir = r"/home/project_python/onnx_image-image/test_image/add"
session = load_onnx_model(model_path)
image_path_list = []
image_path_list = getFileList(img_dir, image_path_list, '.jpg')
data = []
i = 0
for image_path in image_path_list:
temp = {}
image = Image.open(image_path)
image_features = image_tensor(image_path, session)
#image_features = image_features / image_features.norm(dim=-1, keepdim=True) # normalize
# 将特征向量转换为字符串
temp['id'] = i
temp['image_path'] = image_path
temp['vector'] = image_features
temp['image_labels'] = image_path.split("/")[-2]
data.append(temp)
i = i + 1
print(i)
res = client.insert(collection_name="text_image", data=data)
初始化数据库和新增数据都可以使用该代码,需要注意的是,数据库存入的类别信息是以图像所在文件夹的名称决定的,因此需要提前修改好文件夹名称,另外新增的数据最好是和分类模型原始类别同属性的类别,比如原来分类模型是猫狗分类,可以新增鸭子的类别,这样效果会比较好。
数据查询
import onnxruntime
import torch
import numpy as np
import cv2
from torchvision import transforms
from pymilvus import MilvusClient
from PIL import Image, ImageDraw, ImageFont
client = MilvusClient("val.db")
def display_images_in_grid(image_paths, scores, labels, images_per_row=3):
# 检查输入长度是否一致
if not (len(image_paths) == len(scores) == len(labels)):
raise ValueError("The lengths of image_paths, scores, and labels must be the same.")
# 计算需要的行数
num_images = len(image_paths)
num_rows = (num_images + images_per_row - 1) // images_per_row
# 打开所有图像并调整大小
images = []
for path in image_paths:
with Image.open(path) as img:
img = img.resize((200, 200)) # 调整图像大小以适应画布
images.append(img)
# 创建一个空白画布
canvas_width = images_per_row * 200
canvas_height = num_rows * 200
canvas = Image.new('RGB', (canvas_width, canvas_height), (255, 255, 255))
draw = ImageDraw.Draw(canvas)
# 加载字体(这里假设你有一个可用的字体文件)
try:
font = ImageFont.truetype("arial.ttf", 40)
except IOError:
font = ImageFont.load_default()
# 将图像粘贴到画布上,并绘制分数和标签
for idx, img in enumerate(images):
row = idx // images_per_row
col = idx % images_per_row
position = (col * 200, row * 200)
canvas.paste(img, position)
# 绘制分数
score_text = f"Score: {scores[idx]:.2f}"
text_position_score = (position[0] + 10, position[1] + 10)
draw.text(text_position_score, score_text, font=font, fill=(255, 0, 0))
# 绘制标签
label_text = f"Label: {labels[idx]}"
text_position_label = (position[0] + 10, position[1] + 20) # 调整位置以避免重叠
draw.text(text_position_label, label_text, font=font, fill=(255, 0, 0))
# 显示画布
canvas.show()
def read_image(image_path):
"""
读取图像并应用预处理。
:param image_path: 图像的路径。
:return: 经过预处理的图像和原始图像。
"""
# 从指定路径读取图像,解决中文路径问题
src = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
# 将图像从 BGR 转换为 RGB 格式
img = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
# 使用 InterpolationMode.BILINEAR 指定双线性插值
transform = transforms.Compose([
# 将图像大小调整为 224x224,保持宽高比不变
transforms.Resize(size=224, interpolation=transforms.InterpolationMode.BILINEAR, max_size=None, antialias=True),
# 从图像中心裁剪出 224x224 的区域
transforms.CenterCrop(size=(224, 224)),
# 将图像转换为 PyTorch 张量
transforms.ToTensor(),
# 使用指定的均值和标准差对图像进行标准化
transforms.Normalize(mean=[0., 0., 0.], std=[1., 1., 1.])
])
# 将图像转换为 PIL 图像并应用变换
pil_image = Image.fromarray(img)
normalized_image = transform(pil_image)
# 添加维度以适应模型输入要求,并返回处理后的图像和原始图像
return np.expand_dims(normalized_image.numpy(), axis=0), src
def load_onnx_model(model_path):
providers = ['CUDAExecutionProvider']
session = onnxruntime.InferenceSession(model_path, providers=providers)
print("ONNX模型已成功加载。")
return session
def image_tensor(image_path, session):
image,_ = read_image(image_path)
input_name = session.get_inputs()[0].name
output_name = "/model.9/Flatten_output_0"
pred = session.run([output_name], {input_name: image})[0]
#pred = np.squeeze(pred)
return pred
if __name__ == "__main__":
device = "cuda" if torch.cuda.is_available() else "cpu"
model_path = "/home/project_python/onnx_image-image/last_add.onnx"
img_path = r"/home/project_python/onnx_image-image/test_image/box_11.jpg"
session = load_onnx_model(model_path)
image_features = image_tensor(img_path, session)
results = client.search(
"text_image",
data=image_features,
output_fields=["image_path"],
search_params={"metric_type": "COSINE"},
limit=7
)
image_list = []
score_list = []
label_list = []
for i,result in enumerate(results[0]):
image_list.append(result["entity"]["image_path"])
score_list.append(result["distance"])
label_list.append(result["entity"]["image_path"].split("/")[-2])
display_images_in_grid(image_list, score_list,label_list,3)
#print(results)