Spaces:
Running
Running
File size: 3,274 Bytes
312dbba 568d79a 19ba196 2eed809 568d79a 6c2267b 312dbba 19ba196 2eed809 568d79a 2eed809 568d79a 19ba196 2eed809 19ba196 2eed809 19ba196 e256998 19ba196 568d79a 19ba196 568d79a 19ba196 e6e2360 568d79a e6e2360 568d79a 19ba196 6c2267b e6e2360 19ba196 e6e2360 19ba196 568d79a 19ba196 568d79a 19ba196 e6e2360 19ba196 568d79a 19ba196 e6e2360 568d79a e6e2360 568d79a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
from inference import predict
from logger import log_feedback_to_firebase
import openai
import os
openai.api_key = os.environ["OPENAI_API_KEY"] # Add this secret in your Hugging Face Space
def gpt_predict(goal, sol1, sol2):
prompt = f"You're solving a commonsense reasoning problem. Choose the better solution to achieve the goal.\n\nGoal: {goal}\n\nOption 1: {sol1}\nOption 2: {sol2}\n\nWhich option is better? Reply with only 'Solution 1' or 'Solution 2'."
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0
)
reply = response.choices[0].message["content"]
return "Solution 1" if "1" in reply else "Solution 2"
except Exception as e:
return f"GPT Error: {str(e)}"
def compare(goal, sol1, sol2, correct):
evo = predict(goal, sol1, sol2)
gpt = gpt_predict(goal, sol1, sol2)
# Determine outcome
if evo == gpt == correct:
verdict = "β
Both Evo and GPT-3.5 are correct!"
elif evo == correct and gpt != correct:
verdict = "π§ Evo got it right. GPT-3.5 missed it."
elif gpt == correct and evo != correct:
verdict = "π€ GPT-3.5 got it right. Evo missed it."
else:
verdict = "β Both models got it wrong."
# Log to Firebase
log_feedback_to_firebase(goal, sol1, sol2, evo, gpt, correct, "from app.py")
return evo, gpt, verdict
with gr.Blocks() as demo:
gr.Markdown("## βοΈ Evo vs GPT-3.5 β Real-Time Commonsense Showdown")
gr.Markdown(
"> π§ EvoTransformer v2.1 β PIQA Accuracy: 69.7% (vs GPT-3.5 β 81%)\n"
"> 13M Parameters β’ Fully Scratch-Trained β’ Leans Smart\n"
"> π§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. Thatβs part of its evolution.*"
)
gr.Markdown(
"> π *EvoTransformer is not static. Every time you provide feedback, Evo learns and evolves. Welcome to real-time neural evolution.*"
)
with gr.Row():
goal = gr.Text(label="Goal")
with gr.Row():
sol1 = gr.Text(label="Solution 1")
sol2 = gr.Text(label="Solution 2")
correct = gr.Radio(choices=["Solution 1", "Solution 2"], label="β
Which is actually correct?", value="Solution 1")
btn = gr.Button("Submit")
evo_out = gr.Text(label="π§ EvoTransformer Response")
gpt_out = gr.Text(label="π€ GPT-3.5 Response")
verdict_out = gr.Text(label="βοΈ Verdict")
btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict_out])
gr.Markdown("#### π Try These Examples:")
examples = [
["Start a fire", "Use a match", "Pour water", "Solution 1"],
["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
]
gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct])
gr.Markdown("Made with β€οΈ by Dr. Heman Mohabeer β EvoTransformer is not just code. It's evolution.")
demo.launch()
|