import torch import pandas as pd from transformers import AutoTokenizer from torch.utils.data import Dataset, DataLoader from evo_model import EvoTransformerV22 import torch.nn as nn import torch.optim as optim tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") class FeedbackDataset(Dataset): def __init__(self, csv_file): self.data = pd.read_csv(csv_file).dropna() def __len__(self): return len(self.data) def __getitem__(self, idx): x = self.data.iloc[idx] combined = x['query'] + " " + x['context'] enc = tokenizer(combined, padding="max_length", truncation=True, max_length=128, return_tensors="pt") label = torch.tensor(float(x['label'])).unsqueeze(0) # Single logit return enc['input_ids'].squeeze(0), label def fine_tune_on_feedback(model_path="trained_model_evo_hellaswag.pt", feedback_file="feedback_log.csv"): model = EvoTransformerV22() model.load_state_dict(torch.load(model_path)) model.train() dataset = FeedbackDataset(feedback_file) dataloader = DataLoader(dataset, batch_size=4, shuffle=True) model.to("cpu") optimizer = optim.Adam(model.parameters(), lr=1e-5) loss_fn = nn.BCEWithLogitsLoss() for epoch in range(2): # Light touch-up total_loss = 0 for input_ids, labels in dataloader: optimizer.zero_grad() outputs = model(input_ids) loss = loss_fn(outputs.view(-1), labels.view(-1)) loss.backward() optimizer.step() total_loss += loss.item() print(f"Epoch {epoch + 1} Loss: {total_loss:.4f}") torch.save(model.state_dict(), model_path) print("✅ Evo updated from feedback.")