测试Qwen2-VL-2B-Instruct

方小汪

已于 2025-01-24 09:39:22 修改

阅读量480

点赞数 2

CC 4.0 BY-SA版权

文章标签： python

于 2025-01-21 14:41:40 首次发布

本文链接：https://blog.csdn.net/weixin_42828571/article/details/145283233

1 图片推理

# 导入所需的库
from PIL import Image 
import requests 
import torch 
from torchvision import io  # PyTorch的计算机视觉工具包
from typing import Dict  # 用于类型注解
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor  # Hugging Face的transformers库，用于加载和使用预训练模型

# # 加载模型，使用半精度浮点数，自动选择可用设备
# model = Qwen2VLForConditionalGeneration.from_pretrained(
#     "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
# )
# # 加载处理器，用于预处理输入数据
# processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")

model = Qwen2VLForConditionalGeneration.from_pretrained(
    "/home/fyo/.cache/modelscope/hub/qwen/Qwen2-VL-2B-Instruct",#改为你本地的下载路径
    torch_dtype="auto",
     device_map="auto"
)
processor = AutoProcessor.from_pretrained("/home/fyo/.cache/modelscope/hub/qwen/Qwen2-VL-2B-Instruct")#改为你本地的下载路径

# # 设置图像URL
# url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
# image = Image.open(requests.get(url, stream=True).raw)

image = Image.open("/home/fyo/Pictures/earthquake.jpg")


# 构建对话结构，包含用户角色、图像和文本提示
conversation = [
    {
   
   
        "role": "user",
        "content": [
            {
   
   
                "type": "image",
            },
            {
   
   "type": "text", "text": "描述这张图."},
        ],
    }
]

# 使用处理器应用聊天模板，生成文本提示
text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

# 预处理输入数据，将文本和图像转换为模型可接受的格式
inputs = processor(
    text=[text_prompt], images=[image], padding=True, return_tensors="pt"
)
inputs = inputs