export CUDA_VISIBLE_DEVICES=0,1,2,3
DISABLE_VERSION_CHECK=1 TORCHRUN_DISABLED=1 python3 scripts/vllm_infer.py --model_name_or_path qwen2_5vl_lora_sft_v2 --dataset car_item --template qwen2_vl --tensor-parallel-size 8
CUDA_VISIBLE_DEVICES=7 CUDA_LAUNCH_BLOCKING=1 DISABLE_VERSION_CHECK=1 TORCHRUN_DISABLED=1 python3 scripts/vllm_infer.py --model_name_or_path qwen2_5vl_lora_sft_v2 --dataset car_item --template qwen2_vl --tensor-parallel-size 1 --gpu-memory-utilization 0.5 --max-model-len 5000 --pipeline_parallel_size 1
todo:不知道为何,有点问题,有时间再研究先记录下
【模型】llama factory vllm
于 2025-02-18 19:29:33 首次发布