delete debug infomation

This commit is contained in:
YunyaoZhou 2025-01-02 21:52:32 +08:00
parent f230886c3f
commit ee8cc6f81d
Signed by: shujakuin
GPG Key ID: 418C3CA28E350CCF

View File

@ -56,9 +56,7 @@ class MMOELoraModel(LoraModel):
self.peft_config = config
# self.add_adapter(adapter_name, self.peft_config[adapter_name])
import sys; print(__file__, sys._getframe().f_lineno)
self.add_adapter(adapter_name, config=self.peft_config[adapter_name])
import sys; print(__file__, sys._getframe().f_lineno)
def add_adapter(self, adapter_name, config=None):
if config is not None: # get the lora config
@ -71,18 +69,19 @@ class MMOELoraModel(LoraModel):
self.peft_config[adapter_name] = config # subsititue the original config
self._find_and_replace(adapter_name)
if len(self.peft_config) > 1 and self.peft_config[adapter_name].bias != "none":
raise ValueError(
"MMOELoraModel supports only 1 adapter with bias. When using multiple adapters, set bias to 'none' for all adapters."
)
print(self.peft_config)
self.mark_only_lora_as_trainable(self.model, self.peft_config[adapter_name].bias)
self.mark_only_lora_as_trainable(
self.model, self.peft_config[adapter_name].bias
)
if self.peft_config[adapter_name].inference_mode:
_freeze_adapter(self.model, adapter_name)
def mark_only_lora_as_trainable(self,model: nn.Module, bias: str = "none") -> None:
def mark_only_lora_as_trainable(self, model: nn.Module, bias: str = "none") -> None:
"""Only activate the LoRA layer as trainable"""
for n, p in model.named_parameters():
if "lora_" not in n:
@ -95,7 +94,11 @@ class MMOELoraModel(LoraModel):
p.requires_grad = True
elif bias == "lora_only":
for m in model.modules():
if isinstance(m, LoraLayer) and hasattr(m, "bias") and m.bias is not None:
if (
isinstance(m, LoraLayer)
and hasattr(m, "bias")
and m.bias is not None
):
m.bias.requires_grad = True
else:
raise NotImplementedError
@ -378,7 +381,7 @@ class MMOELoraLinear(nn.Module, MMOELoraLayer):
def forward(self, x: torch.Tensor, **kwargs):
# task_id = kwargs["task_id"]
for k,v in kwargs.items():
for k, v in kwargs.items():
print(k, v.shape)
task_id = torch.tensor([0] * len(x), dtype=torch.long).to(x.device)
previous_dtype = x.dtype
@ -405,7 +408,11 @@ class MMOELoraLinear(nn.Module, MMOELoraLayer):
)
for i in range(self.expert_num):
result += (
self.lora_B[self._active_adapter].loraB[i](self.lora_A[self._active_adapter].loraA[i](self.lora_dropout[self._active_adapter](x)))
self.lora_B[self._active_adapter].loraB[i](
self.lora_A[self._active_adapter].loraA[i](
self.lora_dropout[self._active_adapter](x)
)
)
* self.scaling[self._active_adapter]
* expert_weight[..., i].view(-1, 1, 1)
)