HemanM commited on
Commit
9e69980
·
verified ·
1 Parent(s): 676eb03

Create retrain.py

Browse files
Files changed (1) hide show
  1. retrain.py +52 -0
retrain.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pandas as pd
3
+ from torch.utils.data import Dataset, DataLoader
4
+ from transformers import AutoTokenizer
5
+ from evo_model import EvoTransformer
6
+ import torch.nn as nn
7
+ import torch.optim as optim
8
+
9
+ class FeedbackDataset(Dataset):
10
+ def __init__(self, csv_file):
11
+ self.data = pd.read_csv(csv_file).dropna()
12
+ self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
13
+
14
+ def __len__(self):
15
+ return len(self.data)
16
+
17
+ def __getitem__(self, idx):
18
+ row = self.data.iloc[idx]
19
+ prompt = row['prompt']
20
+ context = row['context']
21
+ label = int(row['label'])
22
+ text = f"{prompt} {context}"
23
+ encoded = self.tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors="pt")
24
+ return encoded['input_ids'].squeeze(0), torch.tensor(label)
25
+
26
+ def fine_tune_on_feedback():
27
+ csv_file = "feedback_log.csv"
28
+ dataset = FeedbackDataset(csv_file)
29
+ dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
30
+
31
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
+ model = EvoTransformer().to(device)
33
+ model.load_state_dict(torch.load("evo_hellaswag.pt", map_location=device))
34
+ model.train()
35
+
36
+ criterion = nn.CrossEntropyLoss()
37
+ optimizer = optim.Adam(model.parameters(), lr=2e-5)
38
+
39
+ for epoch in range(2):
40
+ for input_ids, labels in dataloader:
41
+ input_ids = input_ids.to(device)
42
+ labels = labels.to(device)
43
+
44
+ outputs = model(input_ids)
45
+ loss = criterion(outputs, labels)
46
+
47
+ optimizer.zero_grad()
48
+ loss.backward()
49
+ optimizer.step()
50
+
51
+ torch.save(model.state_dict(), "evo_hellaswag.pt")
52
+ print("✅ Evo retrained and saved.")