Spaces:

HemanM
/

EvoTransformer-v2.1

Running

File size: 3,251 Bytes

312dbba
568d79a
 
2eed809
568d79a
 
 
 
312dbba
568d79a
 
 
2eed809
568d79a
 
 
23012c7
568d79a
 
2eed809
568d79a
2eed809
568d79a
 
2eed809
568d79a
 
2eed809
568d79a
e256998
568d79a
 
 
7c4d437
568d79a
 
e256998
568d79a
 
2eed809
568d79a
 
 
 
 
 
 
e6e2360
568d79a
e6e2360
568d79a
 
 
 
e6e2360
 
 
 
 
 
568d79a
 
 
 
 
 
 
e6e2360
568d79a
e6e2360
568d79a
 
 
 
 
 
 
e6e2360
568d79a
 
 
 
 
e6e2360
568d79a
e6e2360
568d79a
e6e2360
568d79a

import gradio as gr
from inference import predict
from logger import log_interaction
import openai
import os

# --- Set your OpenAI key here (or use secrets/environment)
openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..."  # Replace if needed

def gpt3_predict(goal, sol1, sol2):
    prompt = f"""You are solving a commonsense reasoning task.
Given a goal and two possible solutions, choose which solution makes more sense.

Goal: {goal}
Option A: {sol1}
Option B: {sol2}

Which option is better? Reply only with "Solution 1" or "Solution 2"."""
    
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=10
        )
        answer = response.choices[0].message.content.strip()
        return answer
    except Exception as e:
        return f"GPT Error: {e}"

def compare(goal, sol1, sol2, correct_answer):
    # EvoTransformer prediction
    evo = predict(goal, sol1, sol2)

    # GPT-3.5 prediction
    gpt = gpt3_predict(goal, sol1, sol2)

    # Log feedback
    log_interaction(goal, sol1, sol2, evo, gpt, correct_answer)

    # Verdict
    if correct_answer:
        verdict = "✅ Evo was RIGHT ✅" if evo == correct_answer else "❌ Evo was WRONG ❌"
        verdict += "\n"
        verdict += "✅ GPT-3.5 was RIGHT ✅" if gpt == correct_answer else "❌ GPT-3.5 was WRONG ❌"
    else:
        verdict = "⚖️ Evo and GPT-3.5 predictions compared."

    return evo, gpt, verdict

with gr.Blocks() as demo:
    gr.Markdown("## ⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
    gr.Markdown("> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%) · 13M Parameters · Fully Scratch-Trained · Leans Smart")
    gr.Markdown("> 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*")

    with gr.Row():
        goal = gr.Textbox(label="Goal")
    with gr.Row():
        sol1 = gr.Textbox(label="Solution 1")
        sol2 = gr.Textbox(label="Solution 2")
    correct = gr.Radio(choices=["Solution 1", "Solution 2", None], label="✅ Correct Answer (if known)", value=None)

    btn = gr.Button("Submit")

    with gr.Row():
        evo_out = gr.Textbox(label="🧠 EvoTransformer Response")
        gpt_out = gr.Textbox(label="🤖 GPT-3.5 Response")

    verdict = gr.Textbox(label="Verdict", interactive=False)

    examples = [
        ["Start a fire", "Use a match", "Pour water", "Solution 1"],
        ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
        ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
        ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
        ["Find your way", "Use a map", "Close your eyes", "Solution 1"]
    ]

    gr.Examples(
        examples=examples,
        inputs=[goal, sol1, sol2, correct],
        label="🔍 Try These Examples"
    )

    btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict])

    gr.Markdown("Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.")

demo.launch()