Spaces:

HemanM
/

EvoTransformer-v2.1

Running

App Files Files Community

EvoTransformer-v2.1 / app.py

HemanM

Update app.py

488dba6 verified 1 day ago

raw

history blame

3.88 kB

	import gradio as gr
	from inference import predict
	from logger import log_feedback_to_firebase
	import openai
	import os
	from watchdog import manual_retrain
	from dashboard import render_dashboard

	openai.api_key = os.environ["OPENAI_API_KEY"] # Add this secret in your Hugging Face Space

	def gpt_predict(goal, sol1, sol2):
	prompt = f"You're solving a commonsense reasoning problem. Choose the better solution to achieve the goal.\n\nGoal: {goal}\n\nOption 1: {sol1}\nOption 2: {sol2}\n\nWhich option is better? Reply with only 'Solution 1' or 'Solution 2'."
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": prompt}],
	temperature=0
	)
	reply = response.choices[0].message["content"]
	return "Solution 1" if "1" in reply else "Solution 2"
	except Exception as e:
	return f"GPT Error: {str(e)}"

	def compare(goal, sol1, sol2, correct):
	evo = predict(goal, sol1, sol2)
	gpt = gpt_predict(goal, sol1, sol2)

	if evo == gpt == correct:
	verdict = "✅ Both Evo and GPT-3.5 are correct!"
	elif evo == correct and gpt != correct:
	verdict = "🧠 Evo got it right. GPT-3.5 missed it."
	elif gpt == correct and evo != correct:
	verdict = "🤖 GPT-3.5 got it right. Evo missed it."
	else:
	verdict = "❌ Both models got it wrong."

	log_feedback_to_firebase(goal, sol1, sol2, evo, gpt, correct, "from app.py")

	return evo, gpt, verdict

	def trigger_retrain():
	success = manual_retrain()
	return "✅ Evo retrained successfully!" if success else "⚠️ Retraining failed."

	with gr.Blocks() as demo:
	with gr.Tab("⚔️ Evo vs GPT-3.5 Showdown"):
	gr.Markdown("## ⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
	gr.Markdown(
	"> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%)\n"
	"> 13M Parameters • Fully Scratch-Trained • Leans Smart\n"
	"> 🧪 Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution."
	)
	gr.Markdown(
	"> 🔄 EvoTransformer is not static. Every time you provide feedback, Evo learns and evolves. Welcome to real-time neural evolution."
	)

	with gr.Row():
	goal = gr.Text(label="Goal")
	with gr.Row():
	sol1 = gr.Text(label="Solution 1")
	sol2 = gr.Text(label="Solution 2")
	correct = gr.Radio(choices=["Solution 1", "Solution 2"], label="✅ Which is actually correct?", value="Solution 1")

	btn = gr.Button("Submit")
	evo_out = gr.Text(label="🧠 EvoTransformer Response")
	gpt_out = gr.Text(label="🤖 GPT-3.5 Response")
	verdict_out = gr.Text(label="⚖️ Verdict")

	btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict_out])

	gr.Markdown("#### 🔍 Try These Examples:")
	examples = [
	["Start a fire", "Use a match", "Pour water", "Solution 1"],
	["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
	["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
	["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
	]
	gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct])

	retrain_btn = gr.Button("🔁 Retrain EvoTransformer")
	retrain_status = gr.Text(label="📢 Retrain Status")
	retrain_btn.click(fn=trigger_retrain, outputs=[retrain_status])

	gr.Markdown("Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.")

	with gr.Tab("📊 Evo Dashboard"):
	render_dashboard()

	demo.launch()