-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Expand file tree
/
Copy pathDockerfile.stable.vllm
More file actions
155 lines (127 loc) · 6.22 KB
/
Copy pathDockerfile.stable.vllm
File metadata and controls
155 lines (127 loc) · 6.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# vllm: x86_64=0.20.2, aarch64=0.20.2
ARG CUDA_VERSION=13.0.2
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu24.04
ARG CUDA_VERSION
ARG PYTHON_VERSION=3.12
ARG TORCH_VERSION=2.11.0
ARG TORCH_VISION_VERSION=0.26.0
ARG TORCH_AUDIO_VERSION=2.11.0
ARG TRANSFORMERS_VERSION=5.3.0
ARG VLLM_VERSION=0.20.2
ARG TRL_VERSION=0.27.0
ARG TRANSFORMER_ENGINE_VERSION=v2.15
ARG FLASH_ATTENTION_VERSION=2.8.3
ARG NSIGHT_VERSION=2025.6.1
ARG MCORE_VERSION=core_v0.16.1
ARG VERL_VERSION=v0.7.1
ARG DEBIAN_FRONTEND=noninteractive
ARG PIP_NO_CACHE_DIR=1
ARG APT_MIRROR=""
# PEP 668: Ubuntu 24.04 blocks system-wide pip installs; override for Docker
ENV PIP_BREAK_SYSTEM_PACKAGES=1
RUN if [ -n "${APT_MIRROR}" ]; then \
sed -i "s@http://.*archive.ubuntu.com@${APT_MIRROR}@g" /etc/apt/sources.list.d/ubuntu.sources; \
fi
RUN apt-get update && apt-get install -y \
git \
wget \
curl \
cmake \
build-essential \
libibverbs-dev \
libnuma-dev \
librdmacm-dev \
numactl \
software-properties-common \
vim \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
&& rm -rf /var/lib/apt/lists/*
RUN wget https://bootstrap.pypa.io/get-pip.py && \
python${PYTHON_VERSION} get-pip.py && \
rm get-pip.py
RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 && \
ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
RUN pip install torch==${TORCH_VERSION} torchvision==${TORCH_VISION_VERSION} torchaudio==${TORCH_AUDIO_VERSION} --index-url https://download.pytorch.org/whl/cu130
RUN pip install pybind11 wheel
# =========================
# Install cuDNN (network repo)
# =========================
RUN ARCH=$(if [ "$(uname -m)" = "aarch64" ]; then echo "sbsa"; else echo "x86_64"; fi) && \
CUDA_VERSION_MAJOR=$(echo ${CUDA_VERSION} | cut -d '.' -f 1) && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${ARCH}/cuda-keyring_1.1-1_all.deb && \
sed -i '/developer\.download\.nvidia\.com\/compute\/cuda\/repos/d' /etc/apt/sources.list.d/* && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get -y --allow-downgrades --allow-change-held-packages install \
cudnn9-cuda-${CUDA_VERSION_MAJOR} \
libcudnn9-cuda-${CUDA_VERSION_MAJOR} \
libcudnn9-dev-cuda-${CUDA_VERSION_MAJOR} \
libcudnn9-headers-cuda-${CUDA_VERSION_MAJOR} && \
rm -f cuda-keyring_1.1-1_all.deb && \
rm -rf /var/lib/apt/lists/*
RUN pip install nvidia-mathdx ninja
RUN MAX_JOBS=256 pip install -v --disable-pip-version-check --no-build-isolation \
--config-settings "--build-option=--cpp_ext" \
--config-settings "--build-option=--cuda_ext" \
git+https://github.com/NVIDIA/apex.git
RUN export NVTE_FRAMEWORK=pytorch && \
MAX_JOBS=256 NVTE_BUILD_THREADS_PER_JOB=4 \
pip3 install --resume-retries 999 --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@${TRANSFORMER_ENGINE_VERSION}
RUN pip install codetiming mathruler pylatexenc cachetools pytest-asyncio
RUN export FLASH_ATTENTION_FORCE_BUILD="TRUE" && MAX_JOBS=32 pip install --no-build-isolation flash_attn==${FLASH_ATTENTION_VERSION}
RUN NSIGHT_VERSION=2025.6.1_2025.6.1.190-1_$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) && \
wget https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_6/nsight-systems-${NSIGHT_VERSION}.deb && \
apt-get update && apt-get install -y libxcb-cursor0 && \
apt-get install -y ./nsight-systems-${NSIGHT_VERSION}.deb && \
rm -rf /usr/local/cuda/bin/nsys && \
ln -s /opt/nvidia/nsight-systems/2025.6.1/nsys /usr/local/cuda/bin/nsys && \
rm -rf /usr/local/cuda/bin/nsys-ui && \
ln -s /opt/nvidia/nsight-systems/2025.6.1/nsys-ui /usr/local/cuda/bin/nsys-ui && \
rm nsight-systems-${NSIGHT_VERSION}.deb && \
rm -rf /var/lib/apt/lists/*
# =========================
# Install DeepEP
# =========================
RUN cd /home && mkdir -p dpsk_a2a && cd dpsk_a2a && \
git clone -b v2.5.1 https://github.com/NVIDIA/gdrcopy.git && \
cd gdrcopy && \
make prefix=/usr/local lib_install && \
cd .. && rm -rf gdrcopy && \
git clone -b hybrid-ep https://github.com/deepseek-ai/DeepEP.git && \
export NVSHMEM_DIR=/usr/local/lib/python3.12/dist-packages/nvidia/nvshmem && \
export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH" && \
export PATH="${NVSHMEM_DIR}/bin:$PATH" && \
cd ${NVSHMEM_DIR}/lib && \
ln -sf libnvshmem_host.so.3 libnvshmem_host.so && \
cd /home/dpsk_a2a/DeepEP && \
git checkout 3f601f7ac1c062c46502646ff04c535013bfca00 && \
CUDA_TARGET=$(uname -m | sed 's/aarch64/sbsa-linux/;s/x86_64/x86_64-linux/') && \
export CPATH=/usr/local/cuda/targets/${CUDA_TARGET}/include/cccl:$CPATH && \
TORCH_CUDA_ARCH_LIST="9.0;10.0" python setup.py install
# Debian python3-jwt has no pip RECORD; vllm cannot uninstall it when upgrading PyJWT.
RUN apt-get update && \
apt-get remove -y --purge python3-jwt 2>/dev/null || true && \
rm -rf /var/lib/apt/lists/* && \
pip install --ignore-installed PyJWT
RUN pip install vllm==${VLLM_VERSION}
RUN pip3 install --no-deps trl==${TRL_VERSION}
RUN pip3 install nvtx matplotlib liger_kernel
RUN pip install transformers==${TRANSFORMERS_VERSION}
RUN pip install -U git+https://github.com/ISEEKYAN/mbridge.git@main
RUN pip install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@${MCORE_VERSION}
RUN pip install torchcodec --index-url=https://download.pytorch.org/whl/cu130
RUN apt-get update && \
apt-get install -y ffmpeg && \
ffmpeg -decoders | grep -i nvidia && \
rm -rf /var/lib/apt/lists/*
RUN pip install qwen-vl-utils==0.0.14
RUN pip install git+https://github.com/verl-project/verl.git@${VERL_VERSION} && pip uninstall -y verl
RUN CUDA_VERSION_MAJOR=$(echo ${CUDA_VERSION} | cut -d '.' -f 1) && \
CUDNN_PKG=libcudnn9-cuda-${CUDA_VERSION_MAJOR} && \
CUDNN_VERSION=$(dpkg-query -W -f='${Version}' "${CUDNN_PKG}" 2>/dev/null | sed 's/-[0-9]*$//') && \
if [ -z "${CUDNN_VERSION}" ]; then \
CUDNN_HDR=$(find /usr/include -name cudnn_version.h | head -1) && \
CUDNN_VERSION=$(grep -E '^#define CUDNN_(MAJOR|MINOR|PATCHLEVEL) ' "${CUDNN_HDR}" | awk '{print $3}' | paste -sd. -); \
fi && \
pip install "nvidia-cudnn-cu${CUDA_VERSION_MAJOR}>=${CUDNN_VERSION}"