HemanM's picture
Update app.py
62adefb verified
raw
history blame
6.18 kB
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import AutoTokenizer, get_scheduler
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import io
from PIL import Image
import openai
import time
# βœ… Secure OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# βœ… Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# βœ… Load official PIQA dataset with remote code trust enabled
dataset = load_dataset("piqa", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def tokenize_choices(example):
input_0 = tokenizer(example["goal"] + " " + example["sol1"], truncation=True, padding="max_length", max_length=128)
input_1 = tokenizer(example["goal"] + " " + example["sol2"], truncation=True, padding="max_length", max_length=128)
return {
"input_ids_0": input_0["input_ids"],
"input_ids_1": input_1["input_ids"],
"label": example["label"]
}
dataset = dataset.map(tokenize_choices)
val_dataset = dataset["validation"].select(range(200)).with_format("torch")
# βœ… EvoTransformer definition
class EvoTransformer(nn.Module):
def __init__(self):
super().__init__()
self.embedding = nn.Embedding(30522, 384)
encoder_layer = nn.TransformerEncoderLayer(d_model=384, nhead=6, dim_feedforward=1024, batch_first=True)
self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
self.classifier = nn.Sequential(
nn.Linear(384, 128),
nn.ReLU(),
nn.Linear(128, 1)
)
def forward(self, input_ids):
x = self.embedding(input_ids)
x = self.encoder(x)
return self.classifier(x[:, 0, :]).squeeze(-1)
# βœ… GPT-3.5 response
def gpt35_answer(prompt):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
max_tokens=20,
temperature=0
)
return response['choices'][0]['message']['content'].strip()
except Exception as e:
return f"[Error: {e}]"
# βœ… Training and evaluation function
def train_and_demo(few_shot_size):
start_time = time.time()
model = EvoTransformer().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
train_set = dataset["train"].select(range(few_shot_size)).with_format("torch")
train_loader = DataLoader(train_set, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
scheduler = get_scheduler("linear", optimizer=optimizer,
num_warmup_steps=0, num_training_steps=3 * len(train_loader))
best_val = 0
accs = []
patience = 2
early_stop = 0
for epoch in range(3):
model.train()
for batch in train_loader:
optimizer.zero_grad()
x0 = batch["input_ids_0"].to(device)
x1 = batch["input_ids_1"].to(device)
labels = batch["label"].to(device)
l0 = model(x0)
l1 = model(x1)
logits = torch.stack([l0, l1], dim=1)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
scheduler.step()
model.eval()
correct = 0
with torch.no_grad():
for batch in val_loader:
x0 = batch["input_ids_0"].to(device)
x1 = batch["input_ids_1"].to(device)
labels = batch["label"].to(device)
l0 = model(x0)
l1 = model(x1)
logits = torch.stack([l0, l1], dim=1)
preds = torch.argmax(logits, dim=1)
correct += (preds == labels).sum().item()
acc = correct / len(val_dataset)
accs.append(acc)
if acc > best_val:
best_val = acc
early_stop = 0
else:
early_stop += 1
if early_stop >= patience:
break
# βœ… Accuracy Plot
fig, ax = plt.subplots()
ax.plot(accs, marker='o')
ax.set_title(f"Validation Accuracy ({few_shot_size} examples)")
ax.set_xlabel("Epoch")
ax.set_ylabel("Accuracy")
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
img = Image.open(buf)
# βœ… GPT vs Evo Predictions
output = ""
for i in range(2):
ex = dataset["validation"][i]
goal = ex["goal"]
sol1 = ex["sol1"]
sol2 = ex["sol2"]
x0 = torch.tensor([ex["input_ids_0"]]).to(device)
x1 = torch.tensor([ex["input_ids_1"]]).to(device)
l0 = model(x0)
l1 = model(x1)
pred_evo = 0 if l0 > l1 else 1
correct_evo = "βœ…" if pred_evo == ex["label"] else "❌"
gpt_prompt = f"Q: {goal}\nA) {sol1}\nB) {sol2}\nWhich is more appropriate? Answer with A or B only."
gpt_out = gpt35_answer(gpt_prompt)
pred_gpt = gpt_out[0].upper()
correct_gpt = "βœ…" if (pred_gpt == 'A' and ex["label"] == 0) or (pred_gpt == 'B' and ex["label"] == 1) else "❌"
output += f"Q: {goal}\nA) {sol1}\nB) {sol2}\n\nEvoTransformer: {'A' if pred_evo==0 else 'B'} {correct_evo}\nGPT-3.5: {pred_gpt} {correct_gpt}\n\n"
architecture_info = f"""
EvoTransformer v2.1 Configuration:
- Embedding Dim: 384
- Transformer Layers: 6
- Attention Heads: 6
- Feedforward Size: 1024
- Parameters: ~13M
- Training Time: {time.time() - start_time:.2f}s
"""
return img, f"Best Accuracy: {best_val:.4f}", output.strip() + "\n\n" + architecture_info.strip()
# βœ… Gradio interface
gr.Interface(
fn=train_and_demo,
inputs=gr.Slider(10, 500, step=10, value=50, label="Number of Training Examples"),
outputs=[
gr.Image(label="Accuracy Plot"),
gr.Textbox(label="Best Accuracy"),
gr.Textbox(label="Evo vs GPT-3.5 Output")
],
title="🧬 EvoTransformer v2.1 Benchmark",
description="Train EvoTransformer live on PIQA and compare with GPT-3.5."
).launch()