feat: 更新训练脚本以支持MOELORA,调整梯度累积步数,优化配置文件

This commit is contained in:
2025-05-30 16:33:36 +08:00
parent 70c446e548
commit baccca420a
8 changed files with 50 additions and 7 deletions
+1 -1
View File
@@ -13,7 +13,7 @@
// ],
"python.analysis.languageServerMode": "default",
"python.analysis.typeCheckingMode": "basic",
"python.analysis.userFileIndexingLimit": -1,
"python.analysis.userFileIndexingLimit": 10000,
"python.analysis.usePullDiagnostics": false,
"python.analysis.importFormat": "relative"
}
@@ -2,7 +2,7 @@ compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
gradient_accumulation_steps: 4
gradient_accumulation_steps: 2
zero3_init_flag: false
zero_stage: 1
distributed_type: DEEPSPEED
+24
View File
@@ -0,0 +1,24 @@
#!/bin/bash
accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \
--dataset_name textvqa \
--use_peft \
--peft_type MOELORA \
--model_name_or_path Qwen/Qwen2.5-Omni-3B \
--lora_target_modules .*model\.layers.*proj \
--lora_r 8 \
--lora_alpha 32 \
--per_device_train_batch_size 3 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 2 \
--num_train_epochs 1 \
--output_dir checkpoint/qwen2_alllinear/ \
--learning_rate 2e-4 \
--warmup_ratio 0.03 \
--lr_scheduler_type cosine \
--bf16 \
--torch_dtype bfloat16 \
--logging_steps 10 \
--gradient_checkpointing \
--weight_decay 0.1 \
--resume_from_checkpoint /root/autodl-tmp/zhouyunyao/projects/CL-LMM/src/checkpoint/qwen2_alllinear/checkpoint-1000
+13 -1
View File
@@ -27,4 +27,16 @@
[2025.05.16]
- [ ] 处理不同的持续学习框架,使得整体框架能够兼容
- [ ] 处理不同的持续学习框架,使得整体框架能够兼容
[2025.05.28]
- [x] MoeLora
- [ ] Coin Benchmark
- [x] 确定保存什么,便于后期测试
- [ ] Olora
- [ ] Hide-Llava
[2025.05.30]
- [ ] 评价指标
+7
View File
@@ -65,6 +65,13 @@ if __name__ == "__main__":
model.add_adapter(peft_config)
elif model_args.peft_type == "MOELORA":
from peft.tuners import MOELoraConfig
peft_config = MOELoraConfig(target_modules=model_args.lora_target_modules)
model.add_adapter(peft_config)
elif model_args.peft_type == "LORA":
from peft.tuners.lora import LoraConfig
+2 -2
View File
@@ -3,14 +3,14 @@
accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \
--dataset_name textvqa \
--use_peft \
--peft_type LORA \
--peft_type MOELORA \
--model_name_or_path Qwen/Qwen2.5-Omni-3B \
--lora_target_modules .\*proj.\*\|.\*fc.\*\|.\*mlp\.0\|.\*mlp\.2 \
--lora_r 8 \
--lora_alpha 32 \
--per_device_train_batch_size 3 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 4 \
--per_device_eval_batch_size 1 \
--num_train_epochs 1 \
--output_dir checkpoint/qwen2_alllinear/ \
--learning_rate 5e-5 \