File size: 1,822 Bytes
6a5863f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# train.py — EvoDecoderModel training loop
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer
from evo_model import EvoDecoderModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
class TextDataset(Dataset):
def __init__(self, texts, tokenizer, max_len=512):
self.tokenizer = tokenizer
self.inputs = [tokenizer.encode(t, truncation=True, max_length=max_len, padding='max_length') for t in texts]
def __len__(self):
return len(self.inputs)
def __getitem__(self, idx):
x = torch.tensor(self.inputs[idx][:-1])
y = torch.tensor(self.inputs[idx][1:])
return x, y
# Example data (replace with your own)
texts = [
"User: How are you?\nAssistant: I'm doing well, thank you.",
"User: What is AI?\nAssistant: AI stands for artificial intelligence.",
# Add more...
]
dataset = TextDataset(texts, tokenizer)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
# Initialize model
model = EvoDecoderModel(vocab_size=tokenizer.vocab_size, d_model=512).to(device)
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
criterion = nn.CrossEntropyLoss()
# Training loop
epochs = 5
for epoch in range(epochs):
total_loss = 0
model.train()
for x, y in loader:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
logits = model(x)
loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1))
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch {epoch+1} Loss: {total_loss/len(loader):.4f}")
torch.save(model.state_dict(), "evo_decoder_model.pt")
print("✅ Model saved to evo_decoder_model.pt")
|