Spaces:

HemanM
/

EvoTransformer-v2.1

Running

App Files Files Community

HemanM commited on 1 day ago

Commit

7c4d437

verified ·

1 Parent(s): e6e2360

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -11

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ✅ Evo Showcase Mode: Gradio App with Enhanced Info Panel + GPT-3.5 Comparison
 import gradio as gr
 import openai
@@ -7,7 +7,6 @@ from inference import predict as evo_predict
 # 🔐 SET YOUR GPT-3.5 API KEY HERE
 openai.api_key = "sk-..."  # Replace with your actual key
-# ✅ Use the new openai>=1.0.0 API
 client = openai.OpenAI()
 def gpt_predict(prompt):
@@ -23,9 +22,9 @@ def gpt_predict(prompt):
     except Exception as e:
         return f"GPT Error: {str(e)}"
-def compare(goal, sol1, sol2):
     if not goal.strip() or not sol1.strip() or not sol2.strip():
-        return "⚠️ Please provide all inputs.", "", ""
     prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
     evo = evo_predict(goal, sol1, sol2)
@@ -36,13 +35,25 @@ def compare(goal, sol1, sol2):
     else:
         verdict = "⚖️ Evo disagrees with GPT-3.5 — explore why."
-    return f"🧠 Evo: {evo}", f"🤖 GPT-3.5: {gpt}", verdict
 examples = [
-    ["Start a fire", "Use a match", "Pour water"],
-    ["Warm up food", "Use microwave", "Put it in fridge"],
-    ["Charge a phone", "Plug it in", "Put it on grass"],
-    ["Stop a car", "Press the brake", "Press the horn"]
 ]
 with gr.Blocks(title="⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown") as demo:
@@ -60,15 +71,18 @@ with gr.Blocks(title="⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
     with gr.Row():
         sol1 = gr.Textbox(label="Solution 1")
         sol2 = gr.Textbox(label="Solution 2")
     evo_output = gr.Textbox(label="EvoTransformer Response")
     gpt_output = gr.Textbox(label="GPT-3.5 Response")
     verdict_output = gr.Textbox(label="Verdict")
     submit = gr.Button("Submit")
-    submit.click(fn=compare, inputs=[goal, sol1, sol2], outputs=[evo_output, gpt_output, verdict_output])
-    gr.Examples(examples=examples, inputs=[goal, sol1, sol2], outputs=[evo_output, gpt_output, verdict_output], fn=compare, cache_examples=False)
     gr.Markdown("""
     > 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*

+# ✅ Evo Showcase Mode: Full Evaluation with Correct Answer Comparison
 import gradio as gr
 import openai
 # 🔐 SET YOUR GPT-3.5 API KEY HERE
 openai.api_key = "sk-..."  # Replace with your actual key
 client = openai.OpenAI()
 def gpt_predict(prompt):
     except Exception as e:
         return f"GPT Error: {str(e)}"
+def compare(goal, sol1, sol2, correct):
     if not goal.strip() or not sol1.strip() or not sol2.strip():
+        return "⚠️ Please provide all inputs.", "", "", ""
     prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
     evo = evo_predict(goal, sol1, sol2)
     else:
         verdict = "⚖️ Evo disagrees with GPT-3.5 — explore why."
+    if correct.strip().lower() in ["solution 1", "solution 2"]:
+        if evo == correct and gpt == correct:
+            score_note = "✅ Both Evo and GPT-3.5 were correct."
+        elif evo == correct:
+            score_note = "🟢 Evo was correct. GPT-3.5 was wrong."
+        elif gpt == correct:
+            score_note = "🟢 GPT-3.5 was correct. Evo was wrong."
+        else:
+            score_note = "❌ Both were incorrect."
+    else:
+        score_note = "⚠️ Correct answer not provided or invalid (must be 'Solution 1' or 'Solution 2')."
+    return f"🧠 Evo: {evo}", f"🤖 GPT-3.5: {gpt}", verdict, score_note
 examples = [
+    ["Start a fire", "Use a match", "Pour water", "Solution 1"],
+    ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
+    ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
+    ["Stop a car", "Press the brake", "Press the horn", "Solution 1"]
 ]
 with gr.Blocks(title="⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown") as demo:
     with gr.Row():
         sol1 = gr.Textbox(label="Solution 1")
         sol2 = gr.Textbox(label="Solution 2")
+    correct = gr.Textbox(label="Correct Answer (Solution 1 or Solution 2)")
     evo_output = gr.Textbox(label="EvoTransformer Response")
     gpt_output = gr.Textbox(label="GPT-3.5 Response")
     verdict_output = gr.Textbox(label="Verdict")
+    score_output = gr.Textbox(label="Correctness Evaluation")
     submit = gr.Button("Submit")
+    submit.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output])
+    gr.Markdown("### 🔍 Examples:")
+    gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output], fn=compare, cache_examples=False)
     gr.Markdown("""
     > 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*