-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_model.py
154 lines (125 loc) · 4.78 KB
/
train_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# build and train models (using pytorch lighting)
# Training code
print("importing Training libraries ................")
import pydantic
from datetime import datetime
import pytorch_lightning as pl
import torch
import torch.nn as nn
from pytorch_lightning import Trainer
from torch.utils.data import Dataset
import json
from reverse_dictionary.embeddings import TrainEmbeddings, TestEmbeddings
# import Train and Test embeddings
class RDTrainer(pydantic.BaseModel):
train_embds: TrainEmbeddings
test_embds: TestEmbeddings
lr:float
epochs:int
trained_model_name:str
save_checkpoint:bool = False
batch_size:int = 128
_model:pl.LightningModule = None
ready_model:pl.LightningModule = None # put your specific model here
now:str = datetime.now().strftime("%Y-%m-%d|%H:%M:%S")
class Config:
arbitrary_types_allowed = True
def model_maker(self,):
lr = self.lr
batch_size = self.batch_size
input_size = len(self.train_embds.embeds[0])
glosses_emb = self.train_embds.embeds
save_checkpoint = self.save_checkpoint
loss = torch.nn.MSELoss()
inputs = torch.tensor(glosses_emb)
outputs = torch.tensor(self.train_embds.outputs)
class CustomDataset(Dataset):
def __init__(self, input_vectors, output_vectors):
self.input_vectors = inputs
self.output_vectors = outputs
def __len__(self):
return len(self.input_vectors)
def __getitem__(self, idx):
input_vector = self.input_vectors[idx]
output_vector = self.output_vectors[idx]
return input_vector, output_vector
class MLP(pl.LightningModule):
def __init__(self, input_size, h1,h2,h3,h4, output_size):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_size, h1)
self.fc2 = nn.Linear(h1,h2)
self.fc3 = nn.Linear(h2, h3)
self.fc4 = nn.Linear(h3,h4)
self.relu = nn.GELU()
self.dropout = nn.Dropout(p=0.2)
self.out = nn.Linear(h4,output_size)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.fc3(x)
x = self.relu(x)
x = self.dropout(x)
x = self.fc4(x)
return self.out(x)
def configure_optimizers(self):
optim = torch.optim.AdamW(self.parameters(), lr=lr)
return optim
def training_step(self,batch,batch_idx):
x,y = batch
output = self(x)
lost = loss(output,y)
return {"loss":lost}
def train_dataloader(self):
inpt = inputs
out = outputs
dataset = CustomDataset(inpt,out)
train_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=batch_size,
shuffle=False)
return train_loader
trainer = Trainer(max_epochs=self.epochs, devices=[0], enable_checkpointing=save_checkpoint)
model = MLP(input_size,512*8,512*4,512*2,512,len(self.train_embds.outputs[0]))
return trainer, model
def train(self):
print(f"creating the model {self.trained_model_name} ...")
if self.ready_model:
model = self.ready_model
trainer = Trainer(max_epochs=self.epochs, devices=[0], enable_checkpointing=self.save_checkpoint)
else:
trainer, model = self.model_maker()
print(f"Training the model ... {self.trained_model_name} time {self.now}")
trainer.fit(model)
self._model = model
print(f"Saving the model ... time {self.now}")
def eval(self):
if not self._model:
print("No trained model yet")
return
# Set the model to evaluation mode
model = self._model
model.eval()
# Extract embeddings and IDs
test_embeds = torch.tensor(self.test_embds.embeds) # Convert to a tensor
test_ids = self.test_embds.ids
# Run inference with no_grad
with torch.no_grad():
outputs = model(test_embeds)
# Convert outputs to a format suitable for saving
preds = []
for i in range(len(test_ids)):
preds.append({
"id": test_ids[i], # ID from the test data
"output": outputs[i].cpu().tolist() # Convert to a Python list for JSON
})
# Generate a filename for the predictions
model_name = self.trained_model_name
dt_string = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_file = f"{model_name}_test_preds_{dt_string}.json"
# Save predictions to a JSON file
with open(output_file, "w") as f:
json.dump(preds, f, indent=4)
print(f"Predictions saved to {output_file}")