add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support

This commit is contained in:
yangapku
2023-11-30 15:29:13 +08:00
parent 981c89b2a9
commit e8e15962d8
52 changed files with 6139 additions and 1435 deletions

View File

@@ -2,7 +2,7 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
DIR=`pwd`
GPUS_PER_NODE=8
GPUS_PER_NODE=$(python -c 'import torch; print(torch.cuda.device_count())')
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
@@ -13,6 +13,34 @@ MODEL="Qwen/Qwen-7B" # Set the path if you do not want to load from huggingface
# See the section for finetuning in README for more information.
DATA="path_to_data"
function usage() {
echo '
Usage: bash finetune/finetune_ds.sh [-m MODEL_PATH] [-d DATA_PATH]
'
}
while [[ "$1" != "" ]]; do
case $1 in
-m | --model )
shift
MODEL=$1
;;
-d | --data )
shift
DATA=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done
DISTRIBUTED_ARGS="
--nproc_per_node $GPUS_PER_NODE \
--nnodes $NNODES \
@@ -44,4 +72,4 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
--model_max_length 512 \
--gradient_checkpointing True \
--lazy_preprocess True \
--deepspeed finetune/ds_config_zero3.json
--deepspeed finetune/ds_config_zero3.json

View File

@@ -2,7 +2,7 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
DIR=`pwd`
GPUS_PER_NODE=8
GPUS_PER_NODE=$(python -c 'import torch; print(torch.cuda.device_count())')
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
@@ -12,6 +12,39 @@ MODEL="Qwen/Qwen-7B" # Set the path if you do not want to load from huggingface
# ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
# See the section for finetuning in README for more information.
DATA="path_to_data"
DS_CONFIG_PATH="finetune/ds_config_zero2.json"
function usage() {
echo '
Usage: bash finetune/finetune_lora_ds.sh [-m MODEL_PATH] [-d DATA_PATH] [--deepspeed DS_CONFIG_PATH]
'
}
while [[ "$1" != "" ]]; do
case $1 in
-m | --model )
shift
MODEL=$1
;;
-d | --data )
shift
DATA=$1
;;
--deepspeed )
shift
DS_CONFIG_PATH=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done
DISTRIBUTED_ARGS="
--nproc_per_node $GPUS_PER_NODE \
@@ -45,4 +78,4 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
--lazy_preprocess True \
--use_lora \
--gradient_checkpointing \
--deepspeed finetune/ds_config_zero2.json
--deepspeed ${DS_CONFIG_PATH}

View File

@@ -1,13 +1,39 @@
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
DIR=`pwd`
MODEL="Qwen/Qwen-7B" # Set the path if you do not want to load from huggingface directly
# ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
# See the section for finetuning in README for more information.
DATA="path_to_data"
function usage() {
echo '
Usage: bash finetune/finetune_lora_single_gpu.sh [-m MODEL_PATH] [-d DATA_PATH]
'
}
while [[ "$1" != "" ]]; do
case $1 in
-m | --model )
shift
MODEL=$1
;;
-d | --data )
shift
DATA=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done
export CUDA_VISIBLE_DEVICES=0
python finetune.py \

View File

@@ -2,7 +2,7 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
DIR=`pwd`
GPUS_PER_NODE=8
GPUS_PER_NODE=$(python -c 'import torch; print(torch.cuda.device_count())')
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
@@ -13,6 +13,34 @@ MODEL="Qwen/Qwen-7B-Chat-Int4" # Set the path if you do not want to load from hu
# See the section for finetuning in README for more information.
DATA="path_to_data"
function usage() {
echo '
Usage: bash finetune/finetune_qlora_ds.sh [-m MODEL_PATH] [-d DATA_PATH]
'
}
while [[ "$1" != "" ]]; do
case $1 in
-m | --model )
shift
MODEL=$1
;;
-d | --data )
shift
DATA=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done
DISTRIBUTED_ARGS="
--nproc_per_node $GPUS_PER_NODE \
--nnodes $NNODES \

View File

@@ -7,6 +7,34 @@ MODEL="Qwen/Qwen-7B-Chat-Int4" # Set the path if you do not want to load from hu
# See the section for finetuning in README for more information.
DATA="path_to_data"
function usage() {
echo '
Usage: bash finetune/finetune_qlora_single_gpu.sh [-m MODEL_PATH] [-d DATA_PATH]
'
}
while [[ "$1" != "" ]]; do
case $1 in
-m | --model )
shift
MODEL=$1
;;
-d | --data )
shift
DATA=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done
export CUDA_VISIBLE_DEVICES=0
# Remember to use --fp16 instead of --bf16 due to autogptq