Spaces:

HemanM
/

EvoTransformer-v2.1

Sleeping

File size: 2,285 Bytes

# ✅ Evo Showcase Mode: Full Gradio App with GPT-3.5 Comparison

import gradio as gr
import openai
from inference import predict as evo_predict

# 🔐 SET YOUR GPT-3.5 API KEY HERE
openai.api_key = "sk-..."  # You must insert your OpenAI API key

def gpt_predict(prompt):
    try:
        system_msg = "You're a commonsense reasoning assistant. Given a goal and two options, pick the better one. Only say: Solution 1 or Solution 2."
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_msg},
                {"role": "user", "content": prompt}
            ]
        )
        return completion["choices"][0]["message"]["content"].strip()
    except Exception as e:
        return f"GPT Error: {str(e)}"

def compare(goal, sol1, sol2):
    if not goal.strip() or not sol1.strip() or not sol2.strip():
        return "⚠️ Please provide all inputs.", "", ""

    prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
    evo = evo_predict(goal, sol1, sol2)
    gpt = gpt_predict(prompt)

    if evo == gpt:
        verdict = "✅ Evo agrees with GPT-3.5"
    else:
        verdict = "⚖️ Evo disagrees with GPT-3.5 — explore why."

    return f"🧠 Evo: {evo}", f"🤖 GPT-3.5: {gpt}", verdict

examples = [
    ["Start a fire", "Use a match", "Pour water"],
    ["Warm up food", "Use microwave", "Put it in fridge"],
    ["Charge a phone", "Plug it in", "Put it on grass"],
    ["Get rid of bad smell", "Open window", "Close door"],
]

demo = gr.Interface(
    fn=compare,
    inputs=[
        gr.Textbox(label="Goal"),
        gr.Textbox(label="Solution 1"),
        gr.Textbox(label="Solution 2"),
    ],
    outputs=[
        gr.Textbox(label="EvoTransformer Response"),
        gr.Textbox(label="GPT-3.5 Response"),
        gr.Textbox(label="Verdict")
    ],
    title="⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown",
    description="""
🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%)
13M Parameters • Fully Scratch-Trained • Leans Smart

This live app shows Evo's answer side-by-side with GPT-3.5. Try it and witness evolution.
""",
    examples=examples,
    theme="default"
)

demo.launch()