mirror of
https://github.com/QwenLM/Qwen.git
synced 2026-05-20 16:35:47 +08:00
add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support
This commit is contained in:
109
docker/Dockerfile
Normal file
109
docker/Dockerfile
Normal file
@@ -0,0 +1,109 @@
|
||||
ARG CUDA_VERSION=11.7.1
|
||||
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
|
||||
|
||||
FROM ${from} as base
|
||||
|
||||
ARG from
|
||||
|
||||
RUN <<EOF
|
||||
apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
|
||||
git \
|
||||
git-lfs \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-dev \
|
||||
wget \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
EOF
|
||||
|
||||
RUN ln -s /usr/bin/python3 /usr/bin/python
|
||||
|
||||
RUN git lfs install
|
||||
|
||||
FROM base as dev
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN mkdir -p /data/shared/Qwen
|
||||
|
||||
WORKDIR /data/shared/Qwen/
|
||||
|
||||
# Users can also mount '/data/shared/Qwen/' to keep the data
|
||||
COPY ../requirements.txt ./
|
||||
COPY ../requirements_web_demo.txt ./
|
||||
|
||||
FROM dev as bundle_req
|
||||
|
||||
ARG BUNDLE_REQUIREMENTS=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_REQUIREMENTS" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
pip3 install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -r requirements_web_demo.txt
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_req as bundle_flash_attention
|
||||
ARG BUNDLE_FLASH_ATTENTION=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
test -d flash-attention || git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention
|
||||
cd /data/shared/Qwen/flash-attention &&
|
||||
pip3 install . &&
|
||||
pip3 install csrc/layer_norm
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_flash_attention as bundle_finetune
|
||||
ARG BUNDLE_FINETUNE=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_FINETUNE" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
|
||||
# Full-finetune / LoRA.
|
||||
pip3 install deepspeed peft
|
||||
|
||||
# Q-LoRA.
|
||||
apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
|
||||
libopenmpi-dev openmpi-bin \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
pip3 install optimum auto-gptq mpi4py
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_finetune as bundle_openai_api
|
||||
ARG BUNDLE_OPENAI_API=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_OPENAI_API" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
|
||||
pip3 install fastapi uvicorn "openai<1.0.0" sse_starlette "pydantic<=1.10.13"
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_openai_api as final
|
||||
ARG from
|
||||
|
||||
COPY ../requirements.txt ./
|
||||
COPY ../requirements_web_demo.txt ./
|
||||
COPY ../cli_demo.py ./
|
||||
COPY ../web_demo.py ./
|
||||
COPY ../openai_api.py ./
|
||||
COPY ../finetune.py ./
|
||||
COPY ../utils.py ./
|
||||
COPY ./examples/* ./examples/
|
||||
COPY ./eval/* ./eval/
|
||||
COPY ./finetune/* ./finetune/
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
WORKDIR /data/shared/Qwen/
|
||||
|
||||
CMD ["python3", "web_demo.py", "--server-port", "80", "--server-name", "0.0.0.0", "-c", "/data/shared/Qwen/Qwen-Chat/"]
|
||||
105
docker/Dockerfile-cu114
Normal file
105
docker/Dockerfile-cu114
Normal file
@@ -0,0 +1,105 @@
|
||||
ARG CUDA_VERSION=11.4.3
|
||||
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
|
||||
|
||||
FROM ${from} as base
|
||||
|
||||
ARG from
|
||||
|
||||
RUN <<EOF
|
||||
apt update -y && apt upgrade -y && apt install -y --no-install-recommends \
|
||||
git \
|
||||
git-lfs \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-dev \
|
||||
wget \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
EOF
|
||||
|
||||
RUN ln -s /usr/bin/python3 /usr/bin/python
|
||||
|
||||
RUN git lfs install
|
||||
|
||||
FROM base as dev
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN mkdir -p /data/shared/Qwen
|
||||
|
||||
WORKDIR /data/shared/Qwen/
|
||||
|
||||
# Users can also mount '/data/shared/Qwen/' to keep the data
|
||||
COPY ../requirements.txt ./
|
||||
COPY ../requirements_web_demo.txt ./
|
||||
|
||||
FROM dev as bundle_req
|
||||
|
||||
ARG BUNDLE_REQUIREMENTS=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_REQUIREMENTS" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
pip3 install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
|
||||
pip3 install -r requirements.txt
|
||||
pip3 install -r requirements_web_demo.txt
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_req as bundle_flash_attention
|
||||
ARG BUNDLE_FLASH_ATTENTION=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
|
||||
echo "CUDA 11.4 does not support flash-attention, please try other images."
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_flash_attention as bundle_finetune
|
||||
ARG BUNDLE_FINETUNE=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_FINETUNE" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
|
||||
# Full-finetune / LoRA.
|
||||
pip3 install deepspeed peft
|
||||
|
||||
# Q-LoRA.
|
||||
apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
|
||||
libopenmpi-dev openmpi-bin \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
pip3 install optimum auto-gptq mpi4py
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_finetune as bundle_openai_api
|
||||
ARG BUNDLE_OPENAI_API=true
|
||||
|
||||
RUN <<EOF
|
||||
if [ "$BUNDLE_OPENAI_API" = "true" ]; then
|
||||
cd /data/shared/Qwen
|
||||
|
||||
pip3 install fastapi uvicorn "openai<1.0.0" sse_starlette "pydantic<=1.10.13"
|
||||
fi
|
||||
EOF
|
||||
|
||||
FROM bundle_openai_api as final
|
||||
ARG from
|
||||
|
||||
COPY ../requirements.txt ./
|
||||
COPY ../requirements_web_demo.txt ./
|
||||
COPY ../cli_demo.py ./
|
||||
COPY ../web_demo.py ./
|
||||
COPY ../openai_api.py ./
|
||||
COPY ../finetune.py ./
|
||||
COPY ../utils.py ./
|
||||
COPY ./examples/* ./examples/
|
||||
COPY ./eval/* ./eval/
|
||||
COPY ./finetune/* ./finetune/
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
WORKDIR /data/shared/Qwen/
|
||||
|
||||
CMD ["python3", "web_demo.py", "--server-port", "80", "--server-name", "0.0.0.0", "-c", "/data/shared/Qwen/Qwen-Chat/"]
|
||||
54
docker/docker_cli_demo.sh
Normal file
54
docker/docker_cli_demo.sh
Normal file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# This script will automatically pull docker image from DockerHub, and start a container to run the Qwen-Chat cli-demo.
|
||||
|
||||
IMAGE_NAME=qwenllm/qwen:cu117
|
||||
QWEN_CHECKPOINT_PATH=/path/to/Qwen-Chat
|
||||
CONTAINER_NAME=qwen
|
||||
|
||||
function usage() {
|
||||
echo '
|
||||
Usage: bash docker/docker_cli_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME]
|
||||
'
|
||||
}
|
||||
|
||||
while [[ "$1" != "" ]]; do
|
||||
case $1 in
|
||||
-i | --image-name )
|
||||
shift
|
||||
IMAGE_NAME=$1
|
||||
;;
|
||||
-c | --checkpoint )
|
||||
shift
|
||||
QWEN_CHECKPOINT_PATH=$1
|
||||
;;
|
||||
-n | --container-name )
|
||||
shift
|
||||
CONTAINER_NAME=$1
|
||||
;;
|
||||
-h | --help )
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
* )
|
||||
echo "Unknown argument ${1}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
|
||||
echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sudo docker pull ${IMAGE_NAME} || {
|
||||
echo "Pulling image ${IMAGE_NAME} failed, exit."
|
||||
exit 1
|
||||
}
|
||||
|
||||
sudo docker run --gpus all --rm --name ${CONTAINER_NAME} \
|
||||
--mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
|
||||
-it ${IMAGE_NAME} \
|
||||
python cli_demo.py -c /data/shared/Qwen/Qwen-Chat/
|
||||
64
docker/docker_openai_api.sh
Normal file
64
docker/docker_openai_api.sh
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat OpenAI API.
|
||||
|
||||
IMAGE_NAME=qwenllm/qwen:cu117
|
||||
QWEN_CHECKPOINT_PATH=/path/to/Qwen-Chat
|
||||
PORT=8000
|
||||
CONTAINER_NAME=qwen
|
||||
|
||||
function usage() {
|
||||
echo '
|
||||
Usage: bash docker/docker_openai_api.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME] [--port PORT]
|
||||
'
|
||||
}
|
||||
|
||||
while [[ "$1" != "" ]]; do
|
||||
case $1 in
|
||||
-i | --image-name )
|
||||
shift
|
||||
IMAGE_NAME=$1
|
||||
;;
|
||||
-c | --checkpoint )
|
||||
shift
|
||||
QWEN_CHECKPOINT_PATH=$1
|
||||
;;
|
||||
-n | --container-name )
|
||||
shift
|
||||
CONTAINER_NAME=$1
|
||||
;;
|
||||
--port )
|
||||
shift
|
||||
PORT=$1
|
||||
;;
|
||||
-h | --help )
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
* )
|
||||
echo "Unknown argument ${1}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
|
||||
echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sudo docker pull ${IMAGE_NAME} || {
|
||||
echo "Pulling image ${IMAGE_NAME} failed, exit."
|
||||
exit 1
|
||||
}
|
||||
|
||||
sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
|
||||
--mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
|
||||
-it ${IMAGE_NAME} \
|
||||
python openai_api.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen-Chat/ && {
|
||||
echo "Successfully started OpenAI API server. Access 'http://localhost:${PORT}/v1' to try!
|
||||
Run \`docker logs ${CONTAINER_NAME}\` to check server status.
|
||||
Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the server."
|
||||
}
|
||||
64
docker/docker_web_demo.sh
Normal file
64
docker/docker_web_demo.sh
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat web-demo.
|
||||
|
||||
IMAGE_NAME=qwenllm/qwen:cu117
|
||||
QWEN_CHECKPOINT_PATH=/path/to/Qwen-7B-Chat
|
||||
PORT=8901
|
||||
CONTAINER_NAME=qwen
|
||||
|
||||
function usage() {
|
||||
echo '
|
||||
Usage: bash docker/docker_web_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME] [--port PORT]
|
||||
'
|
||||
}
|
||||
|
||||
while [[ "$1" != "" ]]; do
|
||||
case $1 in
|
||||
-i | --image-name )
|
||||
shift
|
||||
IMAGE_NAME=$1
|
||||
;;
|
||||
-c | --checkpoint )
|
||||
shift
|
||||
QWEN_CHECKPOINT_PATH=$1
|
||||
;;
|
||||
-n | --container-name )
|
||||
shift
|
||||
CONTAINER_NAME=$1
|
||||
;;
|
||||
--port )
|
||||
shift
|
||||
PORT=$1
|
||||
;;
|
||||
-h | --help )
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
* )
|
||||
echo "Unknown argument ${1}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
|
||||
echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sudo docker pull ${IMAGE_NAME} || {
|
||||
echo "Pulling image ${IMAGE_NAME} failed, exit."
|
||||
exit 1
|
||||
}
|
||||
|
||||
sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
|
||||
--mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
|
||||
-it ${IMAGE_NAME} \
|
||||
python web_demo.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen-Chat/ && {
|
||||
echo "Successfully started web demo. Open 'http://localhost:${PORT}' to try!
|
||||
Run \`docker logs ${CONTAINER_NAME}\` to check demo status.
|
||||
Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the demo."
|
||||
}
|
||||
Reference in New Issue
Block a user