feat✨: 更新训练脚本以支持MOELORA,调整梯度累积步数,优化配置文件
This commit is contained in:
parent
70c446e548
commit
baccca420a
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@ -13,7 +13,7 @@
|
|||||||
// ],
|
// ],
|
||||||
"python.analysis.languageServerMode": "default",
|
"python.analysis.languageServerMode": "default",
|
||||||
"python.analysis.typeCheckingMode": "basic",
|
"python.analysis.typeCheckingMode": "basic",
|
||||||
"python.analysis.userFileIndexingLimit": -1,
|
"python.analysis.userFileIndexingLimit": 10000,
|
||||||
"python.analysis.usePullDiagnostics": false,
|
"python.analysis.usePullDiagnostics": false,
|
||||||
"python.analysis.importFormat": "relative"
|
"python.analysis.importFormat": "relative"
|
||||||
}
|
}
|
@ -2,7 +2,7 @@ compute_environment: LOCAL_MACHINE
|
|||||||
debug: false
|
debug: false
|
||||||
deepspeed_config:
|
deepspeed_config:
|
||||||
deepspeed_multinode_launcher: standard
|
deepspeed_multinode_launcher: standard
|
||||||
gradient_accumulation_steps: 4
|
gradient_accumulation_steps: 2
|
||||||
zero3_init_flag: false
|
zero3_init_flag: false
|
||||||
zero_stage: 1
|
zero_stage: 1
|
||||||
distributed_type: DEEPSPEED
|
distributed_type: DEEPSPEED
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 83111347f3df66f04bd6759b1a3dcce719380628
|
Subproject commit 317d957cc101c4cb064066a1b228526a55f6e927
|
24
src/scripts/train_omni.sh
Executable file
24
src/scripts/train_omni.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \
|
||||||
|
--dataset_name textvqa \
|
||||||
|
--use_peft \
|
||||||
|
--peft_type MOELORA \
|
||||||
|
--model_name_or_path Qwen/Qwen2.5-Omni-3B \
|
||||||
|
--lora_target_modules .*model\.layers.*proj \
|
||||||
|
--lora_r 8 \
|
||||||
|
--lora_alpha 32 \
|
||||||
|
--per_device_train_batch_size 3 \
|
||||||
|
--per_device_eval_batch_size 1 \
|
||||||
|
--gradient_accumulation_steps 2 \
|
||||||
|
--num_train_epochs 1 \
|
||||||
|
--output_dir checkpoint/qwen2_alllinear/ \
|
||||||
|
--learning_rate 2e-4 \
|
||||||
|
--warmup_ratio 0.03 \
|
||||||
|
--lr_scheduler_type cosine \
|
||||||
|
--bf16 \
|
||||||
|
--torch_dtype bfloat16 \
|
||||||
|
--logging_steps 10 \
|
||||||
|
--gradient_checkpointing \
|
||||||
|
--weight_decay 0.1 \
|
||||||
|
--resume_from_checkpoint /root/autodl-tmp/zhouyunyao/projects/CL-LMM/src/checkpoint/qwen2_alllinear/checkpoint-1000
|
14
src/todo.md
14
src/todo.md
@ -27,4 +27,16 @@
|
|||||||
|
|
||||||
[2025.05.16]
|
[2025.05.16]
|
||||||
|
|
||||||
- [ ] 处理不同的持续学习框架,使得整体框架能够兼容
|
- [ ] 处理不同的持续学习框架,使得整体框架能够兼容
|
||||||
|
|
||||||
|
[2025.05.28]
|
||||||
|
|
||||||
|
- [x] MoeLora
|
||||||
|
- [ ] Coin Benchmark
|
||||||
|
- [x] 确定保存什么,便于后期测试
|
||||||
|
- [ ] Olora
|
||||||
|
- [ ] Hide-Llava
|
||||||
|
|
||||||
|
[2025.05.30]
|
||||||
|
|
||||||
|
- [ ] 评价指标
|
||||||
|
@ -65,6 +65,13 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
model.add_adapter(peft_config)
|
model.add_adapter(peft_config)
|
||||||
|
|
||||||
|
elif model_args.peft_type == "MOELORA":
|
||||||
|
from peft.tuners import MOELoraConfig
|
||||||
|
|
||||||
|
peft_config = MOELoraConfig(target_modules=model_args.lora_target_modules)
|
||||||
|
|
||||||
|
model.add_adapter(peft_config)
|
||||||
|
|
||||||
elif model_args.peft_type == "LORA":
|
elif model_args.peft_type == "LORA":
|
||||||
from peft.tuners.lora import LoraConfig
|
from peft.tuners.lora import LoraConfig
|
||||||
|
|
||||||
|
@ -3,14 +3,14 @@
|
|||||||
accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \
|
accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \
|
||||||
--dataset_name textvqa \
|
--dataset_name textvqa \
|
||||||
--use_peft \
|
--use_peft \
|
||||||
--peft_type LORA \
|
--peft_type MOELORA \
|
||||||
--model_name_or_path Qwen/Qwen2.5-Omni-3B \
|
--model_name_or_path Qwen/Qwen2.5-Omni-3B \
|
||||||
--lora_target_modules .\*proj.\*\|.\*fc.\*\|.\*mlp\.0\|.\*mlp\.2 \
|
--lora_target_modules .\*proj.\*\|.\*fc.\*\|.\*mlp\.0\|.\*mlp\.2 \
|
||||||
--lora_r 8 \
|
--lora_r 8 \
|
||||||
--lora_alpha 32 \
|
--lora_alpha 32 \
|
||||||
--per_device_train_batch_size 3 \
|
--per_device_train_batch_size 3 \
|
||||||
--per_device_eval_batch_size 1 \
|
|
||||||
--gradient_accumulation_steps 4 \
|
--gradient_accumulation_steps 4 \
|
||||||
|
--per_device_eval_batch_size 1 \
|
||||||
--num_train_epochs 1 \
|
--num_train_epochs 1 \
|
||||||
--output_dir checkpoint/qwen2_alllinear/ \
|
--output_dir checkpoint/qwen2_alllinear/ \
|
||||||
--learning_rate 5e-5 \
|
--learning_rate 5e-5 \
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit c8a4ee5b9daf9865b372a483fd04a984f0b265dc
|
Subproject commit 42a8639e1e827d6f0ab07d87078ff048b20dab19
|
Loading…
Reference in New Issue
Block a user