Spaces:

HemanM
/

EvoTransformer-v2.1

Running

File size: 3,274 Bytes

312dbba
568d79a
19ba196
2eed809
568d79a
 
6c2267b
312dbba
19ba196
 
2eed809
568d79a
2eed809
568d79a
19ba196
2eed809
19ba196
 
2eed809
19ba196
e256998
19ba196
568d79a
19ba196
 
 
 
 
 
 
 
 
568d79a
19ba196
 
 
 
e6e2360
568d79a
e6e2360
568d79a
 
19ba196
 
 
 
 
6c2267b
 
 
e6e2360
 
19ba196
e6e2360
19ba196
 
 
568d79a
 
19ba196
 
 
568d79a
19ba196
e6e2360
19ba196
568d79a
 
 
 
 
 
19ba196
e6e2360
568d79a
e6e2360
568d79a

import gradio as gr
from inference import predict
from logger import log_feedback_to_firebase
import openai
import os

openai.api_key = os.environ["OPENAI_API_KEY"]  # Add this secret in your Hugging Face Space

def gpt_predict(goal, sol1, sol2):
    prompt = f"You're solving a commonsense reasoning problem. Choose the better solution to achieve the goal.\n\nGoal: {goal}\n\nOption 1: {sol1}\nOption 2: {sol2}\n\nWhich option is better? Reply with only 'Solution 1' or 'Solution 2'."
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        reply = response.choices[0].message["content"]
        return "Solution 1" if "1" in reply else "Solution 2"
    except Exception as e:
        return f"GPT Error: {str(e)}"

def compare(goal, sol1, sol2, correct):
    evo = predict(goal, sol1, sol2)
    gpt = gpt_predict(goal, sol1, sol2)

    # Determine outcome
    if evo == gpt == correct:
        verdict = "✅ Both Evo and GPT-3.5 are correct!"
    elif evo == correct and gpt != correct:
        verdict = "🧠 Evo got it right. GPT-3.5 missed it."
    elif gpt == correct and evo != correct:
        verdict = "🤖 GPT-3.5 got it right. Evo missed it."
    else:
        verdict = "❌ Both models got it wrong."

    # Log to Firebase
    log_feedback_to_firebase(goal, sol1, sol2, evo, gpt, correct, "from app.py")

    return evo, gpt, verdict

with gr.Blocks() as demo:
    gr.Markdown("## ⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
    gr.Markdown(
        "> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%)\n"
        "> 13M Parameters • Fully Scratch-Trained • Leans Smart\n"
        "> 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*"
    )
    gr.Markdown(
        "> 🔄 *EvoTransformer is not static. Every time you provide feedback, Evo learns and evolves. Welcome to real-time neural evolution.*"
    )

    with gr.Row():
        goal = gr.Text(label="Goal")
    with gr.Row():
        sol1 = gr.Text(label="Solution 1")
        sol2 = gr.Text(label="Solution 2")
    correct = gr.Radio(choices=["Solution 1", "Solution 2"], label="✅ Which is actually correct?", value="Solution 1")

    btn = gr.Button("Submit")
    evo_out = gr.Text(label="🧠 EvoTransformer Response")
    gpt_out = gr.Text(label="🤖 GPT-3.5 Response")
    verdict_out = gr.Text(label="⚖️ Verdict")

    btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict_out])

    gr.Markdown("#### 🔍 Try These Examples:")
    examples = [
        ["Start a fire", "Use a match", "Pour water", "Solution 1"],
        ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
        ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
        ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
    ]
    gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct])

    gr.Markdown("Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.")

demo.launch()