|
|
|
import torch |
|
from torch import nn, optim |
|
from torch.utils.data import Dataset, DataLoader |
|
from transformers import AutoTokenizer |
|
from evo_model import EvoDecoderModel |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
|
|
|
class TextDataset(Dataset): |
|
def __init__(self, texts, tokenizer, max_len=512): |
|
self.tokenizer = tokenizer |
|
self.inputs = [tokenizer.encode(t, truncation=True, max_length=max_len, padding='max_length') for t in texts] |
|
|
|
def __len__(self): |
|
return len(self.inputs) |
|
|
|
def __getitem__(self, idx): |
|
x = torch.tensor(self.inputs[idx][:-1]) |
|
y = torch.tensor(self.inputs[idx][1:]) |
|
return x, y |
|
|
|
|
|
texts = [ |
|
"User: How are you?\nAssistant: I'm doing well, thank you.", |
|
"User: What is AI?\nAssistant: AI stands for artificial intelligence.", |
|
|
|
] |
|
dataset = TextDataset(texts, tokenizer) |
|
loader = DataLoader(dataset, batch_size=2, shuffle=True) |
|
|
|
|
|
model = EvoDecoderModel(vocab_size=tokenizer.vocab_size, d_model=512).to(device) |
|
optimizer = optim.AdamW(model.parameters(), lr=5e-5) |
|
criterion = nn.CrossEntropyLoss() |
|
|
|
|
|
epochs = 5 |
|
for epoch in range(epochs): |
|
total_loss = 0 |
|
model.train() |
|
for x, y in loader: |
|
x, y = x.to(device), y.to(device) |
|
optimizer.zero_grad() |
|
logits = model(x) |
|
loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1)) |
|
loss.backward() |
|
optimizer.step() |
|
total_loss += loss.item() |
|
print(f"Epoch {epoch+1} Loss: {total_loss/len(loader):.4f}") |
|
|
|
torch.save(model.state_dict(), "evo_decoder_model.pt") |
|
print("✅ Model saved to evo_decoder_model.pt") |
|
|