Spaces:

HemanM
/

EvoTransformer-v2.1

Running

App Files Files Community

HemanM commited on about 23 hours ago

Commit

568d79a

verified ·

1 Parent(s): e42fcaf

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -67

app.py CHANGED Viewed

@@ -1,94 +1,89 @@
-# ✅ Evo Showcase Mode: Full Evaluation with Correct Answer Comparison
 import gradio as gr
 import openai
-from inference import predict as evo_predict
-# 🔐 SET YOUR GPT-3.5 API KEY HERE
-openai.api_key = "sk-..."  # Replace with your actual key
-client = openai.OpenAI()
-def gpt_predict(prompt):
     try:
-        response = client.chat.completions.create(
             model="gpt-3.5-turbo",
-            messages=[
-                {"role": "system", "content": "You're a commonsense reasoning assistant. Only say: Solution 1 or Solution 2."},
-                {"role": "user", "content": prompt}
-            ]
         )
-        return response.choices[0].message.content.strip()
     except Exception as e:
-        return f"GPT Error: {str(e)}"
-def compare(goal, sol1, sol2, correct):
-    if not goal.strip() or not sol1.strip() or not sol2.strip():
-        return "⚠️ Please provide all inputs.", "", "", ""
-    prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
-    evo = evo_predict(goal, sol1, sol2)
-    gpt = gpt_predict(prompt)
-    if evo == gpt:
-        verdict = "✅ Evo agrees with GPT-3.5"
-    else:
-        verdict = "⚖️ Evo disagrees with GPT-3.5 — explore why."
-    if correct.strip().lower() in ["solution 1", "solution 2"]:
-        if evo == correct and gpt == correct:
-            score_note = "✅ Both Evo and GPT-3.5 were correct."
-        elif evo == correct:
-            score_note = "🟢 Evo was correct. GPT-3.5 was wrong."
-        elif gpt == correct:
-            score_note = "🟢 GPT-3.5 was correct. Evo was wrong."
-        else:
-            score_note = "❌ Both were incorrect."
-    else:
-        score_note = "⚠️ Correct answer not provided or invalid (must be 'Solution 1' or 'Solution 2')."
-    return f"🧠 Evo: {evo}", f"🤖 GPT-3.5: {gpt}", verdict, score_note
-examples = [
-    ["Start a fire", "Use a match", "Pour water", "Solution 1"],
-    ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
-    ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
-    ["Stop a car", "Press the brake", "Press the horn", "Solution 1"]
-]
-with gr.Blocks(title="⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown") as demo:
-    gr.Markdown("""
-    # 🧠 EvoTransformer v2.1
-    **PIQA Accuracy:** 69.7% &nbsp;&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;&nbsp; **Model Size:** ~13M Parameters &nbsp;&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;&nbsp; **Baseline:** GPT-3.5 ≈ 81%
-    EvoTransformer is a scratch-built reasoning model trained on just 1K PIQA examples. No pretraining. No fine-tuning. Pure evolution.
-    Compare its decisions with GPT-3.5 in real-time and witness how intelligence can emerge even from lean, efficient architectures.
-    """)
     with gr.Row():
         goal = gr.Textbox(label="Goal")
     with gr.Row():
         sol1 = gr.Textbox(label="Solution 1")
         sol2 = gr.Textbox(label="Solution 2")
-    correct = gr.Textbox(label="Correct Answer (Solution 1 or Solution 2)")
-    evo_output = gr.Textbox(label="EvoTransformer Response")
-    gpt_output = gr.Textbox(label="GPT-3.5 Response")
-    verdict_output = gr.Textbox(label="Verdict")
-    score_output = gr.Textbox(label="Correctness Evaluation")
-    submit = gr.Button("Submit")
-    submit.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output])
-    gr.Markdown("### 🔍 Examples:")
-    gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output], fn=compare, cache_examples=False)
-    gr.Markdown("""
-    > 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*
-    ---
-    Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.
-    """)
-    demo.launch()

 import gradio as gr
+from inference import predict
+from logger import log_interaction
 import openai
+import os
+# --- Set your OpenAI key here (or use secrets/environment)
+openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..."  # Replace if needed
+def gpt3_predict(goal, sol1, sol2):
+    prompt = f"""You are solving a commonsense reasoning task.
+Given a goal and two possible solutions, choose which solution makes more sense.
+Goal: {goal}
+Option A: {sol1}
+Option B: {sol2}
+Which option is better? Reply only with "Solution 1" or "Solution 2"."""
     try:
+        response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=10
         )
+        answer = response.choices[0].message.content.strip()
+        return answer
     except Exception as e:
+        return f"GPT Error: {e}"
+def compare(goal, sol1, sol2, correct_answer):
+    # EvoTransformer prediction
+    evo = predict(goal, sol1, sol2)
+    # GPT-3.5 prediction
+    gpt = gpt3_predict(goal, sol1, sol2)
+    # Log feedback
+    log_interaction(goal, sol1, sol2, evo, gpt, correct_answer)
+    # Verdict
+    if correct_answer:
+        verdict = "✅ Evo was RIGHT ✅" if evo == correct_answer else "❌ Evo was WRONG ❌"
+        verdict += "\n"
+        verdict += "✅ GPT-3.5 was RIGHT ✅" if gpt == correct_answer else "❌ GPT-3.5 was WRONG ❌"
+    else:
+        verdict = "⚖️ Evo and GPT-3.5 predictions compared."
+    return evo, gpt, verdict
+with gr.Blocks() as demo:
+    gr.Markdown("## ⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
+    gr.Markdown("> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%) · 13M Parameters · Fully Scratch-Trained · Leans Smart")
+    gr.Markdown("> 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*")
     with gr.Row():
         goal = gr.Textbox(label="Goal")
     with gr.Row():
         sol1 = gr.Textbox(label="Solution 1")
         sol2 = gr.Textbox(label="Solution 2")
+    correct = gr.Radio(choices=["Solution 1", "Solution 2", None], label="✅ Correct Answer (if known)", value=None)
+    btn = gr.Button("Submit")
+    with gr.Row():
+        evo_out = gr.Textbox(label="🧠 EvoTransformer Response")
+        gpt_out = gr.Textbox(label="🤖 GPT-3.5 Response")
+    verdict = gr.Textbox(label="Verdict", interactive=False)
+    examples = [
+        ["Start a fire", "Use a match", "Pour water", "Solution 1"],
+        ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
+        ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
+        ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
+        ["Find your way", "Use a map", "Close your eyes", "Solution 1"]
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=[goal, sol1, sol2, correct],
+        label="🔍 Try These Examples"
+    )
+    btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict])
+    gr.Markdown("Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.")
+demo.launch()