Spaces:
Sleeping
Sleeping
File size: 2,285 Bytes
2eed809 e127a0b 312dbba 2eed809 312dbba 2eed809 e256998 5f733b4 e127a0b 2eed809 e256998 2eed809 e256998 2eed809 e256998 2eed809 e256998 2eed809 e256998 2eed809 e256998 2eed809 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# β
Evo Showcase Mode: Full Gradio App with GPT-3.5 Comparison
import gradio as gr
import openai
from inference import predict as evo_predict
# π SET YOUR GPT-3.5 API KEY HERE
openai.api_key = "sk-..." # You must insert your OpenAI API key
def gpt_predict(prompt):
try:
system_msg = "You're a commonsense reasoning assistant. Given a goal and two options, pick the better one. Only say: Solution 1 or Solution 2."
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_msg},
{"role": "user", "content": prompt}
]
)
return completion["choices"][0]["message"]["content"].strip()
except Exception as e:
return f"GPT Error: {str(e)}"
def compare(goal, sol1, sol2):
if not goal.strip() or not sol1.strip() or not sol2.strip():
return "β οΈ Please provide all inputs.", "", ""
prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
evo = evo_predict(goal, sol1, sol2)
gpt = gpt_predict(prompt)
if evo == gpt:
verdict = "β
Evo agrees with GPT-3.5"
else:
verdict = "βοΈ Evo disagrees with GPT-3.5 β explore why."
return f"π§ Evo: {evo}", f"π€ GPT-3.5: {gpt}", verdict
examples = [
["Start a fire", "Use a match", "Pour water"],
["Warm up food", "Use microwave", "Put it in fridge"],
["Charge a phone", "Plug it in", "Put it on grass"],
["Get rid of bad smell", "Open window", "Close door"],
]
demo = gr.Interface(
fn=compare,
inputs=[
gr.Textbox(label="Goal"),
gr.Textbox(label="Solution 1"),
gr.Textbox(label="Solution 2"),
],
outputs=[
gr.Textbox(label="EvoTransformer Response"),
gr.Textbox(label="GPT-3.5 Response"),
gr.Textbox(label="Verdict")
],
title="βοΈ Evo vs GPT-3.5 β Real-Time Commonsense Showdown",
description="""
π§ EvoTransformer v2.1 β PIQA Accuracy: 69.7% (vs GPT-3.5 β 81%)
13M Parameters β’ Fully Scratch-Trained β’ Leans Smart
This live app shows Evo's answer side-by-side with GPT-3.5. Try it and witness evolution.
""",
examples=examples,
theme="default"
)
demo.launch()
|