Spaces:
Sleeping
Sleeping
File size: 3,932 Bytes
7c4d437 e127a0b 312dbba 2eed809 312dbba 2eed809 e6e2360 2eed809 23012c7 2eed809 23012c7 2eed809 23012c7 2eed809 23012c7 2eed809 e256998 7c4d437 e127a0b 7c4d437 e256998 2eed809 e256998 2eed809 e256998 7c4d437 e256998 2eed809 7c4d437 2eed809 e6e2360 7c4d437 e6e2360 7c4d437 e6e2360 7c4d437 e6e2360 7c4d437 e6e2360 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# β
Evo Showcase Mode: Full Evaluation with Correct Answer Comparison
import gradio as gr
import openai
from inference import predict as evo_predict
# π SET YOUR GPT-3.5 API KEY HERE
openai.api_key = "sk-..." # Replace with your actual key
client = openai.OpenAI()
def gpt_predict(prompt):
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You're a commonsense reasoning assistant. Only say: Solution 1 or Solution 2."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"GPT Error: {str(e)}"
def compare(goal, sol1, sol2, correct):
if not goal.strip() or not sol1.strip() or not sol2.strip():
return "β οΈ Please provide all inputs.", "", "", ""
prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
evo = evo_predict(goal, sol1, sol2)
gpt = gpt_predict(prompt)
if evo == gpt:
verdict = "β
Evo agrees with GPT-3.5"
else:
verdict = "βοΈ Evo disagrees with GPT-3.5 β explore why."
if correct.strip().lower() in ["solution 1", "solution 2"]:
if evo == correct and gpt == correct:
score_note = "β
Both Evo and GPT-3.5 were correct."
elif evo == correct:
score_note = "π’ Evo was correct. GPT-3.5 was wrong."
elif gpt == correct:
score_note = "π’ GPT-3.5 was correct. Evo was wrong."
else:
score_note = "β Both were incorrect."
else:
score_note = "β οΈ Correct answer not provided or invalid (must be 'Solution 1' or 'Solution 2')."
return f"π§ Evo: {evo}", f"π€ GPT-3.5: {gpt}", verdict, score_note
examples = [
["Start a fire", "Use a match", "Pour water", "Solution 1"],
["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
["Stop a car", "Press the brake", "Press the horn", "Solution 1"]
]
with gr.Blocks(title="βοΈ Evo vs GPT-3.5 β Real-Time Commonsense Showdown") as demo:
gr.Markdown("""
# π§ EvoTransformer v2.1
**PIQA Accuracy:** 69.7% | **Model Size:** ~13M Parameters | **Baseline:** GPT-3.5 β 81%
EvoTransformer is a scratch-built reasoning model trained on just 1K PIQA examples. No pretraining. No fine-tuning. Pure evolution.
Compare its decisions with GPT-3.5 in real-time and witness how intelligence can emerge even from lean, efficient architectures.
""")
with gr.Row():
goal = gr.Textbox(label="Goal")
with gr.Row():
sol1 = gr.Textbox(label="Solution 1")
sol2 = gr.Textbox(label="Solution 2")
correct = gr.Textbox(label="Correct Answer (Solution 1 or Solution 2)")
evo_output = gr.Textbox(label="EvoTransformer Response")
gpt_output = gr.Textbox(label="GPT-3.5 Response")
verdict_output = gr.Textbox(label="Verdict")
score_output = gr.Textbox(label="Correctness Evaluation")
submit = gr.Button("Submit")
submit.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output])
gr.Markdown("### π Examples:")
gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output], fn=compare, cache_examples=False)
gr.Markdown("""
> π§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. Thatβs part of its evolution.*
---
Made with β€οΈ by Dr. Heman Mohabeer β EvoTransformer is not just code. It's evolution.
""")
demo.launch() |