Spaces:

HemanM
/

EvoTransformer-v2.1

Running

App Files Files Community

HemanM commited on 2 days ago

Commit

19ba196

verified ·

1 Parent(s): 2c69375

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -52

app.py CHANGED Viewed

@@ -1,88 +1,73 @@
 import gradio as gr
 from inference import predict
-from logger import log_interaction
 import openai
 import os
-# --- Set your OpenAI key here (or use secrets/environment)
-openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..."  # Replace if needed
-def gpt3_predict(goal, sol1, sol2):
-    prompt = f"""You are solving a commonsense reasoning task.
-Given a goal and two possible solutions, choose which solution makes more sense.
-Goal: {goal}
-Option A: {sol1}
-Option B: {sol2}
-Which option is better? Reply only with "Solution 1" or "Solution 2"."""
     try:
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": prompt}],
-            max_tokens=10
         )
-        answer = response.choices[0].message.content.strip()
-        return answer
     except Exception as e:
-        return f"GPT Error: {e}"
-def compare(goal, sol1, sol2, correct_answer):
-    # EvoTransformer prediction
     evo = predict(goal, sol1, sol2)
-    # GPT-3.5 prediction
-    gpt = gpt3_predict(goal, sol1, sol2)
-    # Log feedback
-    log_interaction(goal, sol1, sol2, evo, gpt, correct_answer)
-    # Verdict
-    if correct_answer:
-        verdict = "✅ Evo was RIGHT ✅" if evo == correct_answer else "❌ Evo was WRONG ❌"
-        verdict += "\n"
-        verdict += "✅ GPT-3.5 was RIGHT ✅" if gpt == correct_answer else "❌ GPT-3.5 was WRONG ❌"
     else:
-        verdict = "⚖️ Evo and GPT-3.5 predictions compared."
     return evo, gpt, verdict
 with gr.Blocks() as demo:
     gr.Markdown("## ⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
-    gr.Markdown("> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%) · 13M Parameters · Fully Scratch-Trained · Leans Smart")
-    gr.Markdown("> 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*")
     with gr.Row():
-        goal = gr.Textbox(label="Goal")
     with gr.Row():
-        sol1 = gr.Textbox(label="Solution 1")
-        sol2 = gr.Textbox(label="Solution 2")
-    correct = gr.Radio(choices=["Solution 1", "Solution 2", None], label="✅ Correct Answer (if known)", value=None)
     btn = gr.Button("Submit")
-    with gr.Row():
-        evo_out = gr.Textbox(label="🧠 EvoTransformer Response")
-        gpt_out = gr.Textbox(label="🤖 GPT-3.5 Response")
-    verdict = gr.Textbox(label="Verdict", interactive=False)
     examples = [
         ["Start a fire", "Use a match", "Pour water", "Solution 1"],
         ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
         ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
         ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
-        ["Find your way", "Use a map", "Close your eyes", "Solution 1"]
     ]
-    gr.Examples(
-        examples=examples,
-        inputs=[goal, sol1, sol2, correct],
-        label="🔍 Try These Examples"
-    )
-    btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict])
     gr.Markdown("Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.")

 import gradio as gr
 from inference import predict
+from logger import log_feedback_to_firebase
 import openai
 import os
+openai.api_key = os.environ["OPENAI_API_KEY"]  # Add this secret in HF Space
+def gpt_predict(goal, sol1, sol2):
+    prompt = f"You're solving a commonsense reasoning problem. Choose the better solution to achieve the goal.\n\nGoal: {goal}\n\nOption 1: {sol1}\nOption 2: {sol2}\n\nWhich option is better? Reply with only 'Solution 1' or 'Solution 2'."
     try:
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": prompt}],
+            temperature=0
         )
+        reply = response.choices[0].message["content"]
+        return "Solution 1" if "1" in reply else "Solution 2"
     except Exception as e:
+        return f"GPT Error: {str(e)}"
+def compare(goal, sol1, sol2, correct):
     evo = predict(goal, sol1, sol2)
+    gpt = gpt_predict(goal, sol1, sol2)
+    # Determine outcome
+    if evo == gpt == correct:
+        verdict = "✅ Both Evo and GPT-3.5 are correct!"
+    elif evo == correct and gpt != correct:
+        verdict = "🧠 Evo got it right. GPT-3.5 missed it."
+    elif gpt == correct and evo != correct:
+        verdict = "🤖 GPT-3.5 got it right. Evo missed it."
     else:
+        verdict = "❌ Both models got it wrong."
+    # Log to Firebase
+    log_feedback_to_firebase(goal, sol1, sol2, evo, gpt, correct, "from app.py")
     return evo, gpt, verdict
 with gr.Blocks() as demo:
     gr.Markdown("## ⚔️ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
+    gr.Markdown(
+        "> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 ≈ 81%)\n"
+        "> 13M Parameters • Fully Scratch-Trained • Leans Smart\n"
+        "> 🧪 *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*"
+    )
     with gr.Row():
+        goal = gr.Text(label="Goal")
     with gr.Row():
+        sol1 = gr.Text(label="Solution 1")
+        sol2 = gr.Text(label="Solution 2")
+    correct = gr.Radio(choices=["Solution 1", "Solution 2"], label="✅ Which is actually correct?", value="Solution 1")
     btn = gr.Button("Submit")
+    evo_out = gr.Text(label="🧠 EvoTransformer Response")
+    gpt_out = gr.Text(label="🤖 GPT-3.5 Response")
+    verdict_out = gr.Text(label="⚖️ Verdict")
+    btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict_out])
+    gr.Markdown("#### 🔍 Try These Examples:")
     examples = [
         ["Start a fire", "Use a match", "Pour water", "Solution 1"],
         ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
         ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
         ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
     ]
+    gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct])
     gr.Markdown("Made with ❤️ by Dr. Heman Mohabeer — EvoTransformer is not just code. It's evolution.")