add 72B and 1.8B Qwen models, add Ascend 910 and Hygon DCU support, add docker support

This commit is contained in:
yangapku
2023-11-30 15:29:13 +08:00
parent 981c89b2a9
commit e8e15962d8
52 changed files with 6139 additions and 1435 deletions

View File

@@ -2,7 +2,7 @@
export CUDA_DEVICE_MAX_CONNECTIONS=1
DIR=`pwd`
GPUS_PER_NODE=8
GPUS_PER_NODE=$(python -c 'import torch; print(torch.cuda.device_count())')
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
@@ -13,6 +13,34 @@ MODEL="Qwen/Qwen-7B" # Set the path if you do not want to load from huggingface
# See the section for finetuning in README for more information.
DATA="path_to_data"
function usage() {
echo '
Usage: bash finetune/finetune_ds.sh [-m MODEL_PATH] [-d DATA_PATH]
'
}
while [[ "$1" != "" ]]; do
case $1 in
-m | --model )
shift
MODEL=$1
;;
-d | --data )
shift
DATA=$1
;;
-h | --help )
usage
exit 0
;;
* )
echo "Unknown argument ${1}"
exit 1
;;
esac
shift
done
DISTRIBUTED_ARGS="
--nproc_per_node $GPUS_PER_NODE \
--nnodes $NNODES \
@@ -44,4 +72,4 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
--model_max_length 512 \
--gradient_checkpointing True \
--lazy_preprocess True \
--deepspeed finetune/ds_config_zero3.json
--deepspeed finetune/ds_config_zero3.json