Spaces:
Running
Running
import torch | |
from torch import nn | |
from transformers import PreTrainedModel, PretrainedConfig, AutoTokenizer | |
from model import EvoTransformer # assumes your core model is in model.py | |
from torch.utils.data import DataLoader, Dataset | |
import torch.optim as optim | |
class EvoTransformerConfig(PretrainedConfig): | |
model_type = "evo-transformer" | |
def __init__( | |
self, | |
vocab_size=30522, | |
d_model=256, | |
nhead=4, | |
dim_feedforward=512, | |
num_hidden_layers=4, | |
**kwargs | |
): | |
super().__init__(**kwargs) | |
self.vocab_size = vocab_size | |
self.d_model = d_model | |
self.nhead = nhead | |
self.dim_feedforward = dim_feedforward | |
self.num_hidden_layers = num_hidden_layers | |
class EvoTransformerForClassification(PreTrainedModel): | |
config_class = EvoTransformerConfig | |
def __init__(self, config): | |
super().__init__(config) | |
self.model = EvoTransformer( | |
vocab_size=config.vocab_size, | |
d_model=config.d_model, | |
nhead=config.nhead, | |
dim_feedforward=config.dim_feedforward, | |
num_layers=config.num_hidden_layers | |
) | |
self.classifier = nn.Linear(config.d_model, 2) | |
def forward(self, input_ids, attention_mask=None): | |
x = self.model(input_ids) # (batch_size, seq_len, hidden_size) | |
pooled = x[:, 0, :] # Take [CLS]-like first token | |
logits = self.classifier(pooled) | |
return logits | |
def save_pretrained(self, save_directory): | |
torch.save(self.state_dict(), f"{save_directory}/pytorch_model.bin") | |
self.config.save_pretrained(save_directory) | |
def from_pretrained(cls, load_directory): | |
config = EvoTransformerConfig.from_pretrained(load_directory) | |
model = cls(config) | |
model.load_state_dict(torch.load(f"{load_directory}/pytorch_model.bin")) | |
return model | |
# ✅ Add this retraining logic | |
def train_evo_transformer(df, epochs=1): | |
class EvoDataset(Dataset): | |
def __init__(self, dataframe, tokenizer): | |
self.df = dataframe | |
self.tokenizer = tokenizer | |
def __len__(self): | |
return len(self.df) | |
def __getitem__(self, idx): | |
row = self.df.iloc[idx] | |
text = f"{row['goal']} [SEP] {row['sol1']} [SEP] {row['sol2']}" | |
encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=64, return_tensors='pt') | |
input_ids = encoding['input_ids'].squeeze(0) | |
attention_mask = encoding['attention_mask'].squeeze(0) | |
label = torch.tensor(0 if row['correct'] == 'Solution 1' else 1) | |
return input_ids, attention_mask, label | |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
config = EvoTransformerConfig() | |
model = EvoTransformerForClassification(config) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
model.train() | |
dataset = EvoDataset(df, tokenizer) | |
loader = DataLoader(dataset, batch_size=8, shuffle=True) | |
optimizer = optim.Adam(model.parameters(), lr=2e-5) | |
criterion = nn.CrossEntropyLoss() | |
for epoch in range(epochs): | |
for input_ids, attention_mask, labels in loader: | |
input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device) | |
logits = model(input_ids, attention_mask) | |
loss = criterion(logits, labels) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
torch.save(model.state_dict(), "trained_model.pt") | |
return True | |