# train.py — EvoDecoderModel training loop import torch from torch import nn, optim from torch.utils.data import Dataset, DataLoader from transformers import AutoTokenizer from evo_model import EvoDecoderModel device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") class TextDataset(Dataset): def __init__(self, texts, tokenizer, max_len=512): self.tokenizer = tokenizer self.inputs = [tokenizer.encode(t, truncation=True, max_length=max_len, padding='max_length') for t in texts] def __len__(self): return len(self.inputs) def __getitem__(self, idx): x = torch.tensor(self.inputs[idx][:-1]) y = torch.tensor(self.inputs[idx][1:]) return x, y # Example data (replace with your own) texts = [ "User: How are you?\nAssistant: I'm doing well, thank you.", "User: What is AI?\nAssistant: AI stands for artificial intelligence.", # Add more... ] dataset = TextDataset(texts, tokenizer) loader = DataLoader(dataset, batch_size=2, shuffle=True) # Initialize model model = EvoDecoderModel(vocab_size=tokenizer.vocab_size, d_model=512).to(device) optimizer = optim.AdamW(model.parameters(), lr=5e-5) criterion = nn.CrossEntropyLoss() # Training loop epochs = 5 for epoch in range(epochs): total_loss = 0 model.train() for x, y in loader: x, y = x.to(device), y.to(device) optimizer.zero_grad() logits = model(x) loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1)) loss.backward() optimizer.step() total_loss += loss.item() print(f"Epoch {epoch+1} Loss: {total_loss/len(loader):.4f}") torch.save(model.state_dict(), "evo_decoder_model.pt") print("✅ Model saved to evo_decoder_model.pt")