Spaces:
Running
Running
File size: 3,251 Bytes
312dbba 568d79a 2eed809 568d79a 312dbba 568d79a 2eed809 568d79a 23012c7 568d79a 2eed809 568d79a 2eed809 568d79a 2eed809 568d79a 2eed809 568d79a e256998 568d79a 7c4d437 568d79a e256998 568d79a 2eed809 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a e6e2360 568d79a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
from inference import predict
from logger import log_interaction
import openai
import os
# --- Set your OpenAI key here (or use secrets/environment)
openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..." # Replace if needed
def gpt3_predict(goal, sol1, sol2):
prompt = f"""You are solving a commonsense reasoning task.
Given a goal and two possible solutions, choose which solution makes more sense.
Goal: {goal}
Option A: {sol1}
Option B: {sol2}
Which option is better? Reply only with "Solution 1" or "Solution 2"."""
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
max_tokens=10
)
answer = response.choices[0].message.content.strip()
return answer
except Exception as e:
return f"GPT Error: {e}"
def compare(goal, sol1, sol2, correct_answer):
# EvoTransformer prediction
evo = predict(goal, sol1, sol2)
# GPT-3.5 prediction
gpt = gpt3_predict(goal, sol1, sol2)
# Log feedback
log_interaction(goal, sol1, sol2, evo, gpt, correct_answer)
# Verdict
if correct_answer:
verdict = "β
Evo was RIGHT β
" if evo == correct_answer else "β Evo was WRONG β"
verdict += "\n"
verdict += "β
GPT-3.5 was RIGHT β
" if gpt == correct_answer else "β GPT-3.5 was WRONG β"
else:
verdict = "βοΈ Evo and GPT-3.5 predictions compared."
return evo, gpt, verdict
with gr.Blocks() as demo:
gr.Markdown("## βοΈ Evo vs GPT-3.5 β Real-Time Commonsense Showdown")
gr.Markdown("> π§ EvoTransformer v2.1 β PIQA Accuracy: 69.7% (vs GPT-3.5 β 81%) Β· 13M Parameters Β· Fully Scratch-Trained Β· Leans Smart")
gr.Markdown("> π§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. Thatβs part of its evolution.*")
with gr.Row():
goal = gr.Textbox(label="Goal")
with gr.Row():
sol1 = gr.Textbox(label="Solution 1")
sol2 = gr.Textbox(label="Solution 2")
correct = gr.Radio(choices=["Solution 1", "Solution 2", None], label="β
Correct Answer (if known)", value=None)
btn = gr.Button("Submit")
with gr.Row():
evo_out = gr.Textbox(label="π§ EvoTransformer Response")
gpt_out = gr.Textbox(label="π€ GPT-3.5 Response")
verdict = gr.Textbox(label="Verdict", interactive=False)
examples = [
["Start a fire", "Use a match", "Pour water", "Solution 1"],
["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
["Find your way", "Use a map", "Close your eyes", "Solution 1"]
]
gr.Examples(
examples=examples,
inputs=[goal, sol1, sol2, correct],
label="π Try These Examples"
)
btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict])
gr.Markdown("Made with β€οΈ by Dr. Heman Mohabeer β EvoTransformer is not just code. It's evolution.")
demo.launch()
|