HemanM commited on
Commit
7c4d437
Β·
verified Β·
1 Parent(s): e6e2360

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # βœ… Evo Showcase Mode: Gradio App with Enhanced Info Panel + GPT-3.5 Comparison
2
 
3
  import gradio as gr
4
  import openai
@@ -7,7 +7,6 @@ from inference import predict as evo_predict
7
  # πŸ” SET YOUR GPT-3.5 API KEY HERE
8
  openai.api_key = "sk-..." # Replace with your actual key
9
 
10
- # βœ… Use the new openai>=1.0.0 API
11
  client = openai.OpenAI()
12
 
13
  def gpt_predict(prompt):
@@ -23,9 +22,9 @@ def gpt_predict(prompt):
23
  except Exception as e:
24
  return f"GPT Error: {str(e)}"
25
 
26
- def compare(goal, sol1, sol2):
27
  if not goal.strip() or not sol1.strip() or not sol2.strip():
28
- return "⚠️ Please provide all inputs.", "", ""
29
 
30
  prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
31
  evo = evo_predict(goal, sol1, sol2)
@@ -36,13 +35,25 @@ def compare(goal, sol1, sol2):
36
  else:
37
  verdict = "βš–οΈ Evo disagrees with GPT-3.5 β€” explore why."
38
 
39
- return f"🧠 Evo: {evo}", f"πŸ€– GPT-3.5: {gpt}", verdict
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  examples = [
42
- ["Start a fire", "Use a match", "Pour water"],
43
- ["Warm up food", "Use microwave", "Put it in fridge"],
44
- ["Charge a phone", "Plug it in", "Put it on grass"],
45
- ["Stop a car", "Press the brake", "Press the horn"]
46
  ]
47
 
48
  with gr.Blocks(title="βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown") as demo:
@@ -60,15 +71,18 @@ with gr.Blocks(title="βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
60
  with gr.Row():
61
  sol1 = gr.Textbox(label="Solution 1")
62
  sol2 = gr.Textbox(label="Solution 2")
 
63
 
64
  evo_output = gr.Textbox(label="EvoTransformer Response")
65
  gpt_output = gr.Textbox(label="GPT-3.5 Response")
66
  verdict_output = gr.Textbox(label="Verdict")
 
67
 
68
  submit = gr.Button("Submit")
69
- submit.click(fn=compare, inputs=[goal, sol1, sol2], outputs=[evo_output, gpt_output, verdict_output])
70
 
71
- gr.Examples(examples=examples, inputs=[goal, sol1, sol2], outputs=[evo_output, gpt_output, verdict_output], fn=compare, cache_examples=False)
 
72
 
73
  gr.Markdown("""
74
  > πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*
 
1
+ # βœ… Evo Showcase Mode: Full Evaluation with Correct Answer Comparison
2
 
3
  import gradio as gr
4
  import openai
 
7
  # πŸ” SET YOUR GPT-3.5 API KEY HERE
8
  openai.api_key = "sk-..." # Replace with your actual key
9
 
 
10
  client = openai.OpenAI()
11
 
12
  def gpt_predict(prompt):
 
22
  except Exception as e:
23
  return f"GPT Error: {str(e)}"
24
 
25
+ def compare(goal, sol1, sol2, correct):
26
  if not goal.strip() or not sol1.strip() or not sol2.strip():
27
+ return "⚠️ Please provide all inputs.", "", "", ""
28
 
29
  prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
30
  evo = evo_predict(goal, sol1, sol2)
 
35
  else:
36
  verdict = "βš–οΈ Evo disagrees with GPT-3.5 β€” explore why."
37
 
38
+ if correct.strip().lower() in ["solution 1", "solution 2"]:
39
+ if evo == correct and gpt == correct:
40
+ score_note = "βœ… Both Evo and GPT-3.5 were correct."
41
+ elif evo == correct:
42
+ score_note = "🟒 Evo was correct. GPT-3.5 was wrong."
43
+ elif gpt == correct:
44
+ score_note = "🟒 GPT-3.5 was correct. Evo was wrong."
45
+ else:
46
+ score_note = "❌ Both were incorrect."
47
+ else:
48
+ score_note = "⚠️ Correct answer not provided or invalid (must be 'Solution 1' or 'Solution 2')."
49
+
50
+ return f"🧠 Evo: {evo}", f"πŸ€– GPT-3.5: {gpt}", verdict, score_note
51
 
52
  examples = [
53
+ ["Start a fire", "Use a match", "Pour water", "Solution 1"],
54
+ ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
55
+ ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
56
+ ["Stop a car", "Press the brake", "Press the horn", "Solution 1"]
57
  ]
58
 
59
  with gr.Blocks(title="βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown") as demo:
 
71
  with gr.Row():
72
  sol1 = gr.Textbox(label="Solution 1")
73
  sol2 = gr.Textbox(label="Solution 2")
74
+ correct = gr.Textbox(label="Correct Answer (Solution 1 or Solution 2)")
75
 
76
  evo_output = gr.Textbox(label="EvoTransformer Response")
77
  gpt_output = gr.Textbox(label="GPT-3.5 Response")
78
  verdict_output = gr.Textbox(label="Verdict")
79
+ score_output = gr.Textbox(label="Correctness Evaluation")
80
 
81
  submit = gr.Button("Submit")
82
+ submit.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output])
83
 
84
+ gr.Markdown("### πŸ” Examples:")
85
+ gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output], fn=compare, cache_examples=False)
86
 
87
  gr.Markdown("""
88
  > πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*