-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctional.py
94 lines (72 loc) · 3.09 KB
/
functional.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import math
import copy
import torch
import torch.nn as nn
import torch.nn.init
import torch.nn.functional as F
#from megatron import print_rank_0
#from megatron.relora import ReLoRaLinear
from relora import ReLoRaLinear
def merge_and_reinit_functional(module):
if not isinstance(module, ReLoRaLinear):
return
_delta = module.lora_B.weight @ module.lora_A.weight
module.weight.data += _delta * module._post_lora_scale()
nn.init.kaiming_uniform_(module.lora_A.weight, a=math.sqrt(5))
nn.init.zeros_(module.lora_B.weight)
if module.trainable_scaling:
nn.init.zeros_(module.scaling)
def wrap_with_ReLoRa(model, r=128, lora_alpha=32, lora_dropout=0.1, trainable_scaling=False):
if r <= 0:
raise ValueError("r must be positive. If you want r == 0, use the original model.")
new_model = model
wrapped_one = False
for module_name, module in model.named_modules():
if not isinstance(module, nn.Linear):
continue
if isinstance(module, ReLoRaLinear):
#print_rank_0("WARNING: Trying to wrap ReLoRA into ReLoRA. Are you sure this is what you want?")
print("WARNING: Trying to wrap ReLoRA into ReLoRA. Are you sure this is what you want?")
continue
wrapped_one = True
new_module = ReLoRaLinear(
module.in_features,
module.out_features,
bias=module.bias is not None,
r=r,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
trainable_scaling=trainable_scaling,
)
new_module.weight.data = module.weight.data
if module.bias is not None:
new_module.bias.data = module.bias.data
nn.init.zeros_(new_module.lora_A.weight)
parent = _get_parent(module_name, new_model)
module_suffix = module_name.split(".")[-1]
setattr(parent, module_suffix, new_module)
if not wrapped_one:
raise ValueError("No nn.Linear found in the model. Cannot wrap with ReLoRa.")
return new_model
def _get_parent(module_name, model):
module_names_list = module_name.split(".")
parent_name = ".".join(module_names_list[:-1])
parent = model.get_submodule(parent_name)
return parent
def merge_and_reinit(model):
for module in model.modules():
if isinstance(module, ReLoRaLinear):
merge_and_reinit_functional(module)
def merge_and_unwrap(model) -> nn.Module:
unwrapped_model = copy.deepcopy(model) # Create a deep copy of the model
for module_name, module in model.named_modules():
if isinstance(module, ReLoRaLinear):
new_module = nn.Linear(module.in_features, module.out_features, bias=(module.bias is not None))
merge_and_reinit_functional(module)
new_module.weight.data = module.weight.data
if module.bias is not None:
new_module.bias.data = module.bias.data
parent = _get_parent(module_name, unwrapped_model)
module_suffix = module_name.split(".")[-1]
setattr(parent, module_suffix, new_module)
return unwrapped_model