Spaces:

HemanM
/

EvoTransformer-v2.1

Running

App Files Files Community

EvoTransformer-v2.1 / app.py

HemanM

Update app.py

62adefb verified 2 days ago

raw

history blame

6.18 kB

	import os
	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader
	from datasets import load_dataset
	from transformers import AutoTokenizer, get_scheduler
	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	import io
	from PIL import Image
	import openai
	import time

	# ✅ Secure OpenAI API key
	openai.api_key = os.getenv("OPENAI_API_KEY")

	# ✅ Use GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# ✅ Load official PIQA dataset with remote code trust enabled
	dataset = load_dataset("piqa", trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

	def tokenize_choices(example):
	input_0 = tokenizer(example["goal"] + " " + example["sol1"], truncation=True, padding="max_length", max_length=128)
	input_1 = tokenizer(example["goal"] + " " + example["sol2"], truncation=True, padding="max_length", max_length=128)
	return {
	"input_ids_0": input_0["input_ids"],
	"input_ids_1": input_1["input_ids"],
	"label": example["label"]
	}

	dataset = dataset.map(tokenize_choices)
	val_dataset = dataset["validation"].select(range(200)).with_format("torch")

	# ✅ EvoTransformer definition
	class EvoTransformer(nn.Module):
	def __init__(self):
	super().__init__()
	self.embedding = nn.Embedding(30522, 384)
	encoder_layer = nn.TransformerEncoderLayer(d_model=384, nhead=6, dim_feedforward=1024, batch_first=True)
	self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
	self.classifier = nn.Sequential(
	nn.Linear(384, 128),
	nn.ReLU(),
	nn.Linear(128, 1)
	)

	def forward(self, input_ids):
	x = self.embedding(input_ids)
	x = self.encoder(x)
	return self.classifier(x[:, 0, :]).squeeze(-1)

	# ✅ GPT-3.5 response
	def gpt35_answer(prompt):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": prompt}],
	max_tokens=20,
	temperature=0
	)
	return response['choices'][0]['message']['content'].strip()
	except Exception as e:
	return f"[Error: {e}]"

	# ✅ Training and evaluation function
	def train_and_demo(few_shot_size):
	start_time = time.time()
	model = EvoTransformer().to(device)
	criterion = nn.CrossEntropyLoss()
	optimizer = optim.AdamW(model.parameters(), lr=5e-5)

	train_set = dataset["train"].select(range(few_shot_size)).with_format("torch")
	train_loader = DataLoader(train_set, batch_size=8, shuffle=True)
	val_loader = DataLoader(val_dataset, batch_size=32)

	scheduler = get_scheduler("linear", optimizer=optimizer,
	num_warmup_steps=0, num_training_steps=3 * len(train_loader))

	best_val = 0
	accs = []
	patience = 2
	early_stop = 0

	for epoch in range(3):
	model.train()
	for batch in train_loader:
	optimizer.zero_grad()
	x0 = batch["input_ids_0"].to(device)
	x1 = batch["input_ids_1"].to(device)
	labels = batch["label"].to(device)
	l0 = model(x0)
	l1 = model(x1)
	logits = torch.stack([l0, l1], dim=1)
	loss = criterion(logits, labels)
	loss.backward()
	optimizer.step()
	scheduler.step()

	model.eval()
	correct = 0
	with torch.no_grad():
	for batch in val_loader:
	x0 = batch["input_ids_0"].to(device)
	x1 = batch["input_ids_1"].to(device)
	labels = batch["label"].to(device)
	l0 = model(x0)
	l1 = model(x1)
	logits = torch.stack([l0, l1], dim=1)
	preds = torch.argmax(logits, dim=1)
	correct += (preds == labels).sum().item()
	acc = correct / len(val_dataset)
	accs.append(acc)
	if acc > best_val:
	best_val = acc
	early_stop = 0
	else:
	early_stop += 1
	if early_stop >= patience:
	break

	# ✅ Accuracy Plot
	fig, ax = plt.subplots()
	ax.plot(accs, marker='o')
	ax.set_title(f"Validation Accuracy ({few_shot_size} examples)")
	ax.set_xlabel("Epoch")
	ax.set_ylabel("Accuracy")
	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	buf.seek(0)
	img = Image.open(buf)

	# ✅ GPT vs Evo Predictions
	output = ""
	for i in range(2):
	ex = dataset["validation"][i]
	goal = ex["goal"]
	sol1 = ex["sol1"]
	sol2 = ex["sol2"]

	x0 = torch.tensor([ex["input_ids_0"]]).to(device)
	x1 = torch.tensor([ex["input_ids_1"]]).to(device)
	l0 = model(x0)
	l1 = model(x1)
	pred_evo = 0 if l0 > l1 else 1
	correct_evo = "✅" if pred_evo == ex["label"] else "❌"

	gpt_prompt = f"Q: {goal}\nA) {sol1}\nB) {sol2}\nWhich is more appropriate? Answer with A or B only."
	gpt_out = gpt35_answer(gpt_prompt)
	pred_gpt = gpt_out[0].upper()
	correct_gpt = "✅" if (pred_gpt == 'A' and ex["label"] == 0) or (pred_gpt == 'B' and ex["label"] == 1) else "❌"

	output += f"Q: {goal}\nA) {sol1}\nB) {sol2}\n\nEvoTransformer: {'A' if pred_evo==0 else 'B'} {correct_evo}\nGPT-3.5: {pred_gpt} {correct_gpt}\n\n"

	architecture_info = f"""
	EvoTransformer v2.1 Configuration:
	- Embedding Dim: 384
	- Transformer Layers: 6
	- Attention Heads: 6
	- Feedforward Size: 1024
	- Parameters: ~13M
	- Training Time: {time.time() - start_time:.2f}s
	"""

	return img, f"Best Accuracy: {best_val:.4f}", output.strip() + "\n\n" + architecture_info.strip()

	# ✅ Gradio interface
	gr.Interface(
	fn=train_and_demo,
	inputs=gr.Slider(10, 500, step=10, value=50, label="Number of Training Examples"),
	outputs=[
	gr.Image(label="Accuracy Plot"),
	gr.Textbox(label="Best Accuracy"),
	gr.Textbox(label="Evo vs GPT-3.5 Output")
	],
	title="🧬 EvoTransformer v2.1 Benchmark",
	description="Train EvoTransformer live on PIQA and compare with GPT-3.5."
	).launch()