diff --git a/.vscode/settings.json b/.vscode/settings.json index a746a11..e957b23 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -13,7 +13,7 @@ // ], "python.analysis.languageServerMode": "default", "python.analysis.typeCheckingMode": "basic", - "python.analysis.userFileIndexingLimit": -1, + "python.analysis.userFileIndexingLimit": 10000, "python.analysis.usePullDiagnostics": false, "python.analysis.importFormat": "relative" } \ No newline at end of file diff --git a/src/configs/accelerate_configs/deepspeed_zero1.yaml b/src/configs/accelerate_configs/deepspeed_zero1.yaml index a369839..d53b443 100644 --- a/src/configs/accelerate_configs/deepspeed_zero1.yaml +++ b/src/configs/accelerate_configs/deepspeed_zero1.yaml @@ -2,7 +2,7 @@ compute_environment: LOCAL_MACHINE debug: false deepspeed_config: deepspeed_multinode_launcher: standard - gradient_accumulation_steps: 4 + gradient_accumulation_steps: 2 zero3_init_flag: false zero_stage: 1 distributed_type: DEEPSPEED diff --git a/src/peft_repo b/src/peft_repo index 8311134..317d957 160000 --- a/src/peft_repo +++ b/src/peft_repo @@ -1 +1 @@ -Subproject commit 83111347f3df66f04bd6759b1a3dcce719380628 +Subproject commit 317d957cc101c4cb064066a1b228526a55f6e927 diff --git a/src/scripts/train_omni.sh b/src/scripts/train_omni.sh new file mode 100755 index 0000000..99b1975 --- /dev/null +++ b/src/scripts/train_omni.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \ + --dataset_name textvqa \ + --use_peft \ + --peft_type MOELORA \ + --model_name_or_path Qwen/Qwen2.5-Omni-3B \ + --lora_target_modules .*model\.layers.*proj \ + --lora_r 8 \ + --lora_alpha 32 \ + --per_device_train_batch_size 3 \ + --per_device_eval_batch_size 1 \ + --gradient_accumulation_steps 2 \ + --num_train_epochs 1 \ + --output_dir checkpoint/qwen2_alllinear/ \ + --learning_rate 2e-4 \ + --warmup_ratio 0.03 \ + --lr_scheduler_type cosine \ + --bf16 \ + --torch_dtype bfloat16 \ + --logging_steps 10 \ + --gradient_checkpointing \ + --weight_decay 0.1 \ + --resume_from_checkpoint /root/autodl-tmp/zhouyunyao/projects/CL-LMM/src/checkpoint/qwen2_alllinear/checkpoint-1000 \ No newline at end of file diff --git a/src/todo.md b/src/todo.md index 5a018c2..45af0af 100644 --- a/src/todo.md +++ b/src/todo.md @@ -27,4 +27,16 @@ [2025.05.16] -- [ ] 处理不同的持续学习框架,使得整体框架能够兼容 \ No newline at end of file +- [ ] 处理不同的持续学习框架,使得整体框架能够兼容 + +[2025.05.28] + +- [x] MoeLora +- [ ] Coin Benchmark +- [x] 确定保存什么,便于后期测试 +- [ ] Olora +- [ ] Hide-Llava + +[2025.05.30] + +- [ ] 评价指标 diff --git a/src/train.py b/src/train.py index aee47d1..79e262c 100644 --- a/src/train.py +++ b/src/train.py @@ -65,6 +65,13 @@ if __name__ == "__main__": model.add_adapter(peft_config) + elif model_args.peft_type == "MOELORA": + from peft.tuners import MOELoraConfig + + peft_config = MOELoraConfig(target_modules=model_args.lora_target_modules) + + model.add_adapter(peft_config) + elif model_args.peft_type == "LORA": from peft.tuners.lora import LoraConfig diff --git a/src/train.sh b/src/train.sh index ac0fd5d..25a5cb1 100755 --- a/src/train.sh +++ b/src/train.sh @@ -3,14 +3,14 @@ accelerate launch --config_file configs/accelerate_configs/deepspeed_zero1.yaml train.py \ --dataset_name textvqa \ --use_peft \ - --peft_type LORA \ + --peft_type MOELORA \ --model_name_or_path Qwen/Qwen2.5-Omni-3B \ --lora_target_modules .\*proj.\*\|.\*fc.\*\|.\*mlp\.0\|.\*mlp\.2 \ --lora_r 8 \ --lora_alpha 32 \ --per_device_train_batch_size 3 \ - --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 4 \ + --per_device_eval_batch_size 1 \ --num_train_epochs 1 \ --output_dir checkpoint/qwen2_alllinear/ \ --learning_rate 5e-5 \ diff --git a/src/transformers_repo b/src/transformers_repo index c8a4ee5..42a8639 160000 --- a/src/transformers_repo +++ b/src/transformers_repo @@ -1 +1 @@ -Subproject commit c8a4ee5b9daf9865b372a483fd04a984f0b265dc +Subproject commit 42a8639e1e827d6f0ab07d87078ff048b20dab19