Merge pull request #296 from QwenLM/features/add_finetuning

add gradient checkpointing
2026-05-20 16:35:47 +08:00 · 2023-09-12 23:11:47 +08:00
parent e9f75c0ce5 73f4040abb
commit fcb36b7a94
4 changed files with 4 additions and 0 deletions
--- a/finetune/finetune_lora_ds.sh
+++ b/finetune/finetune_lora_ds.sh
@@ -44,4 +44,5 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
    --model_max_length 2048 \
    --lazy_preprocess True \
    --use_lora \
+    --gradient_checkpointing \
    --deepspeed finetune/ds_config_zero2.json
--- a/finetune/finetune_lora_single_gpu.sh
+++ b/finetune/finetune_lora_single_gpu.sh
@@ -32,4 +32,5 @@ python finetune.py \
  --report_to "none" \
  --model_max_length 2048 \
  --lazy_preprocess True \
+  --gradient_checkpointing \
  --use_lora
--- a/finetune/finetune_qlora_ds.sh
+++ b/finetune/finetune_qlora_ds.sh
@@ -46,4 +46,5 @@ torchrun $DISTRIBUTED_ARGS finetune.py \
    --lazy_preprocess True \
    --use_lora \
    --q_lora \
+    --gradient_checkpointing \
    --deepspeed finetune/ds_config_zero2.json
--- a/finetune/finetune_qlora_single_gpu.sh
+++ b/finetune/finetune_qlora_single_gpu.sh
@@ -32,5 +32,6 @@ python finetune.py \
  --report_to "none" \
  --model_max_length 2048 \
  --lazy_preprocess True \
+  --gradient_checkpointing \
  --use_lora \
  --q_lora