add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support

2026-05-20 16:35:47 +08:00 · 2023-11-30 15:29:13 +08:00
parent 981c89b2a9
commit e8e15962d8
52 changed files with 6139 additions and 1435 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -0,0 +1,109 @@
+ARG CUDA_VERSION=11.7.1
+ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
+
+FROM ${from} as base
+
+ARG from
+
+RUN <<EOF
+apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
+    git \
+    git-lfs \
+    python3 \
+    python3-pip \
+    python3-dev \
+    wget \
+    vim \
+&& rm -rf /var/lib/apt/lists/*
+EOF
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+RUN git lfs install
+
+FROM base as dev
+
+WORKDIR /
+
+RUN mkdir -p /data/shared/Qwen
+
+WORKDIR /data/shared/Qwen/
+
+# Users can also mount '/data/shared/Qwen/' to keep the data
+COPY ../requirements.txt ./
+COPY ../requirements_web_demo.txt ./
+
+FROM dev as bundle_req
+
+ARG BUNDLE_REQUIREMENTS=true
+
+RUN <<EOF
+if [ "$BUNDLE_REQUIREMENTS" = "true" ]; then 
+    cd /data/shared/Qwen
+    pip3 install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2
+    pip3 install -r requirements.txt
+    pip3 install -r requirements_web_demo.txt
+fi
+EOF
+
+FROM bundle_req as bundle_flash_attention
+ARG BUNDLE_FLASH_ATTENTION=true
+
+RUN <<EOF 
+if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
+    cd /data/shared/Qwen 
+    test -d flash-attention || git clone -b v2.3.3 https://github.com/Dao-AILab/flash-attention
+    cd /data/shared/Qwen/flash-attention &&
+        pip3 install . &&
+        pip3 install csrc/layer_norm
+fi
+EOF
+
+FROM bundle_flash_attention as bundle_finetune
+ARG BUNDLE_FINETUNE=true
+
+RUN <<EOF
+if [ "$BUNDLE_FINETUNE" = "true" ]; then
+    cd /data/shared/Qwen
+
+    # Full-finetune / LoRA.
+    pip3 install deepspeed peft
+
+    # Q-LoRA.
+    apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
+        libopenmpi-dev openmpi-bin \
+        && rm -rf /var/lib/apt/lists/*
+    pip3 install optimum auto-gptq mpi4py
+fi
+EOF
+
+FROM bundle_finetune as bundle_openai_api
+ARG BUNDLE_OPENAI_API=true
+
+RUN <<EOF
+if [ "$BUNDLE_OPENAI_API" = "true" ]; then
+    cd /data/shared/Qwen
+
+    pip3 install fastapi uvicorn "openai<1.0.0" sse_starlette "pydantic<=1.10.13"
+fi
+EOF
+
+FROM bundle_openai_api as final
+ARG from
+
+COPY ../requirements.txt ./
+COPY ../requirements_web_demo.txt ./
+COPY ../cli_demo.py ./
+COPY ../web_demo.py ./
+COPY ../openai_api.py ./
+COPY ../finetune.py ./
+COPY ../utils.py ./
+COPY ./examples/* ./examples/
+COPY ./eval/* ./eval/
+COPY ./finetune/* ./finetune/
+
+EXPOSE 80
+
+WORKDIR /data/shared/Qwen/
+
+CMD ["python3", "web_demo.py", "--server-port", "80", "--server-name", "0.0.0.0", "-c", "/data/shared/Qwen/Qwen-Chat/"]
--- a/docker/Dockerfile-cu114
+++ b/docker/Dockerfile-cu114
@@ -0,0 +1,105 @@
+ARG CUDA_VERSION=11.4.3
+ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
+
+FROM ${from} as base
+
+ARG from
+
+RUN <<EOF
+apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
+    git \
+    git-lfs \
+    python3 \
+    python3-pip \
+    python3-dev \
+    wget \
+    vim \
+&& rm -rf /var/lib/apt/lists/*
+EOF
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+RUN git lfs install
+
+FROM base as dev
+
+WORKDIR /
+
+RUN mkdir -p /data/shared/Qwen
+
+WORKDIR /data/shared/Qwen/
+
+# Users can also mount '/data/shared/Qwen/' to keep the data
+COPY ../requirements.txt ./
+COPY ../requirements_web_demo.txt ./
+
+FROM dev as bundle_req
+
+ARG BUNDLE_REQUIREMENTS=true
+
+RUN <<EOF
+if [ "$BUNDLE_REQUIREMENTS" = "true" ]; then 
+    cd /data/shared/Qwen
+    pip3 install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
+    pip3 install -r requirements.txt
+    pip3 install -r requirements_web_demo.txt
+fi
+EOF
+
+FROM bundle_req as bundle_flash_attention
+ARG BUNDLE_FLASH_ATTENTION=true
+
+RUN <<EOF 
+if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then
+    echo "CUDA 11.4 does not support flash-attention, please try other images."
+fi
+EOF
+
+FROM bundle_flash_attention as bundle_finetune
+ARG BUNDLE_FINETUNE=true
+
+RUN <<EOF
+if [ "$BUNDLE_FINETUNE" = "true" ]; then
+    cd /data/shared/Qwen
+
+    # Full-finetune / LoRA.
+    pip3 install deepspeed peft
+
+    # Q-LoRA.
+    apt update -y && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
+        libopenmpi-dev openmpi-bin \
+        && rm -rf /var/lib/apt/lists/*
+    pip3 install optimum auto-gptq mpi4py
+fi
+EOF
+
+FROM bundle_finetune as bundle_openai_api
+ARG BUNDLE_OPENAI_API=true
+
+RUN <<EOF
+if [ "$BUNDLE_OPENAI_API" = "true" ]; then
+    cd /data/shared/Qwen
+
+    pip3 install fastapi uvicorn "openai<1.0.0" sse_starlette "pydantic<=1.10.13"
+fi
+EOF
+
+FROM bundle_openai_api as final
+ARG from
+
+COPY ../requirements.txt ./
+COPY ../requirements_web_demo.txt ./
+COPY ../cli_demo.py ./
+COPY ../web_demo.py ./
+COPY ../openai_api.py ./
+COPY ../finetune.py ./
+COPY ../utils.py ./
+COPY ./examples/* ./examples/
+COPY ./eval/* ./eval/
+COPY ./finetune/* ./finetune/
+
+EXPOSE 80
+
+WORKDIR /data/shared/Qwen/
+
+CMD ["python3", "web_demo.py", "--server-port", "80", "--server-name", "0.0.0.0", "-c", "/data/shared/Qwen/Qwen-Chat/"]
--- a/docker/docker_cli_demo.sh
+++ b/docker/docker_cli_demo.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# This script will automatically pull docker image from DockerHub, and start a container to run the Qwen-Chat cli-demo.
+
+IMAGE_NAME=qwenllm/qwen:cu117
+QWEN_CHECKPOINT_PATH=/path/to/Qwen-Chat
+CONTAINER_NAME=qwen
+
+function usage() {
+    echo '
+Usage: bash docker/docker_cli_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME]
+'
+}
+
+while [[ "$1" != "" ]]; do
+    case $1 in
+        -i | --image-name )
+            shift
+            IMAGE_NAME=$1
+            ;;
+        -c | --checkpoint )
+            shift
+            QWEN_CHECKPOINT_PATH=$1
+            ;;
+        -n | --container-name )
+            shift
+            CONTAINER_NAME=$1
+            ;;
+        -h | --help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "Unknown argument ${1}"
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
+    echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
+    exit 1
+fi
+
+sudo docker pull ${IMAGE_NAME} || {
+    echo "Pulling image ${IMAGE_NAME} failed, exit."
+    exit 1
+}
+
+sudo docker run --gpus all --rm --name ${CONTAINER_NAME} \
+    --mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
+    -it ${IMAGE_NAME} \
+    python cli_demo.py -c /data/shared/Qwen/Qwen-Chat/
--- a/docker/docker_openai_api.sh
+++ b/docker/docker_openai_api.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat OpenAI API.
+
+IMAGE_NAME=qwenllm/qwen:cu117
+QWEN_CHECKPOINT_PATH=/path/to/Qwen-Chat
+PORT=8000
+CONTAINER_NAME=qwen
+
+function usage() {
+    echo '
+Usage: bash docker/docker_openai_api.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME] [--port PORT]
+'
+}
+
+while [[ "$1" != "" ]]; do
+    case $1 in
+        -i | --image-name )
+            shift
+            IMAGE_NAME=$1
+            ;;
+        -c | --checkpoint )
+            shift
+            QWEN_CHECKPOINT_PATH=$1
+            ;;
+        -n | --container-name )
+            shift
+            CONTAINER_NAME=$1
+            ;;
+        --port )
+            shift
+            PORT=$1
+            ;;
+        -h | --help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "Unknown argument ${1}"
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
+    echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
+    exit 1
+fi
+
+sudo docker pull ${IMAGE_NAME} || {
+    echo "Pulling image ${IMAGE_NAME} failed, exit."
+    exit 1
+}
+
+sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
+    -v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
+    --mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
+    -it ${IMAGE_NAME} \
+    python openai_api.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen-Chat/ && {
+    echo "Successfully started OpenAI API server. Access 'http://localhost:${PORT}/v1' to try!
+Run \`docker logs ${CONTAINER_NAME}\` to check server status.
+Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the server."
+}
--- a/docker/docker_web_demo.sh
+++ b/docker/docker_web_demo.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# This script will automatically pull docker image from DockerHub, and start a daemon container to run the Qwen-Chat web-demo.
+
+IMAGE_NAME=qwenllm/qwen:cu117
+QWEN_CHECKPOINT_PATH=/path/to/Qwen-7B-Chat
+PORT=8901
+CONTAINER_NAME=qwen
+
+function usage() {
+    echo '
+Usage: bash docker/docker_web_demo.sh [-i IMAGE_NAME] -c [/path/to/Qwen-Chat] [-n CONTAINER_NAME] [--port PORT]
+'
+}
+
+while [[ "$1" != "" ]]; do
+    case $1 in
+        -i | --image-name )
+            shift
+            IMAGE_NAME=$1
+            ;;
+        -c | --checkpoint )
+            shift
+            QWEN_CHECKPOINT_PATH=$1
+            ;;
+        -n | --container-name )
+            shift
+            CONTAINER_NAME=$1
+            ;;
+        --port )
+            shift
+            PORT=$1
+            ;;
+        -h | --help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "Unknown argument ${1}"
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ ! -e ${QWEN_CHECKPOINT_PATH}/config.json ]; then
+    echo "Checkpoint config.json file not found in ${QWEN_CHECKPOINT_PATH}, exit."
+    exit 1
+fi
+
+sudo docker pull ${IMAGE_NAME} || {
+    echo "Pulling image ${IMAGE_NAME} failed, exit."
+    exit 1
+}
+
+sudo docker run --gpus all -d --restart always --name ${CONTAINER_NAME} \
+    -v /var/run/docker.sock:/var/run/docker.sock -p ${PORT}:80 \
+    --mount type=bind,source=${QWEN_CHECKPOINT_PATH},target=/data/shared/Qwen/Qwen-Chat \
+    -it ${IMAGE_NAME} \
+    python web_demo.py --server-port 80 --server-name 0.0.0.0 -c /data/shared/Qwen/Qwen-Chat/ && {
+    echo "Successfully started web demo. Open 'http://localhost:${PORT}' to try!
+Run \`docker logs ${CONTAINER_NAME}\` to check demo status.
+Run \`docker rm -f ${CONTAINER_NAME}\` to stop and remove the demo."
+}