HemanM commited on
Commit
568d79a
Β·
verified Β·
1 Parent(s): e42fcaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -67
app.py CHANGED
@@ -1,94 +1,89 @@
1
- # βœ… Evo Showcase Mode: Full Evaluation with Correct Answer Comparison
2
-
3
  import gradio as gr
 
 
4
  import openai
5
- from inference import predict as evo_predict
 
 
 
6
 
7
- # πŸ” SET YOUR GPT-3.5 API KEY HERE
8
- openai.api_key = "sk-..." # Replace with your actual key
 
9
 
10
- client = openai.OpenAI()
 
 
11
 
12
- def gpt_predict(prompt):
 
13
  try:
14
- response = client.chat.completions.create(
15
  model="gpt-3.5-turbo",
16
- messages=[
17
- {"role": "system", "content": "You're a commonsense reasoning assistant. Only say: Solution 1 or Solution 2."},
18
- {"role": "user", "content": prompt}
19
- ]
20
  )
21
- return response.choices[0].message.content.strip()
 
22
  except Exception as e:
23
- return f"GPT Error: {str(e)}"
24
-
25
- def compare(goal, sol1, sol2, correct):
26
- if not goal.strip() or not sol1.strip() or not sol2.strip():
27
- return "⚠️ Please provide all inputs.", "", "", ""
28
 
29
- prompt = f"Goal: {goal}\nSolution 1: {sol1}\nSolution 2: {sol2}\nWhich is better?"
30
- evo = evo_predict(goal, sol1, sol2)
31
- gpt = gpt_predict(prompt)
32
-
33
- if evo == gpt:
34
- verdict = "βœ… Evo agrees with GPT-3.5"
35
- else:
36
- verdict = "βš–οΈ Evo disagrees with GPT-3.5 β€” explore why."
37
-
38
- if correct.strip().lower() in ["solution 1", "solution 2"]:
39
- if evo == correct and gpt == correct:
40
- score_note = "βœ… Both Evo and GPT-3.5 were correct."
41
- elif evo == correct:
42
- score_note = "🟒 Evo was correct. GPT-3.5 was wrong."
43
- elif gpt == correct:
44
- score_note = "🟒 GPT-3.5 was correct. Evo was wrong."
45
- else:
46
- score_note = "❌ Both were incorrect."
47
- else:
48
- score_note = "⚠️ Correct answer not provided or invalid (must be 'Solution 1' or 'Solution 2')."
49
 
50
- return f"🧠 Evo: {evo}", f"πŸ€– GPT-3.5: {gpt}", verdict, score_note
 
51
 
52
- examples = [
53
- ["Start a fire", "Use a match", "Pour water", "Solution 1"],
54
- ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
55
- ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
56
- ["Stop a car", "Press the brake", "Press the horn", "Solution 1"]
57
- ]
58
 
59
- with gr.Blocks(title="βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown") as demo:
60
- gr.Markdown("""
61
- # 🧠 EvoTransformer v2.1
62
- **PIQA Accuracy:** 69.7%     |     **Model Size:** ~13M Parameters     |     **Baseline:** GPT-3.5 β‰ˆ 81%
 
 
 
63
 
64
- EvoTransformer is a scratch-built reasoning model trained on just 1K PIQA examples. No pretraining. No fine-tuning. Pure evolution.
65
 
66
- Compare its decisions with GPT-3.5 in real-time and witness how intelligence can emerge even from lean, efficient architectures.
67
- """)
 
 
68
 
69
  with gr.Row():
70
  goal = gr.Textbox(label="Goal")
71
  with gr.Row():
72
  sol1 = gr.Textbox(label="Solution 1")
73
  sol2 = gr.Textbox(label="Solution 2")
74
- correct = gr.Textbox(label="Correct Answer (Solution 1 or Solution 2)")
 
 
 
 
 
 
75
 
76
- evo_output = gr.Textbox(label="EvoTransformer Response")
77
- gpt_output = gr.Textbox(label="GPT-3.5 Response")
78
- verdict_output = gr.Textbox(label="Verdict")
79
- score_output = gr.Textbox(label="Correctness Evaluation")
80
 
81
- submit = gr.Button("Submit")
82
- submit.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output])
 
 
 
 
 
83
 
84
- gr.Markdown("### πŸ” Examples:")
85
- gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct], outputs=[evo_output, gpt_output, verdict_output, score_output], fn=compare, cache_examples=False)
 
 
 
86
 
87
- gr.Markdown("""
88
- > πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*
89
 
90
- ---
91
- Made with ❀️ by Dr. Heman Mohabeer β€” EvoTransformer is not just code. It's evolution.
92
- """)
93
 
94
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ from inference import predict
3
+ from logger import log_interaction
4
  import openai
5
+ import os
6
+
7
+ # --- Set your OpenAI key here (or use secrets/environment)
8
+ openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..." # Replace if needed
9
 
10
+ def gpt3_predict(goal, sol1, sol2):
11
+ prompt = f"""You are solving a commonsense reasoning task.
12
+ Given a goal and two possible solutions, choose which solution makes more sense.
13
 
14
+ Goal: {goal}
15
+ Option A: {sol1}
16
+ Option B: {sol2}
17
 
18
+ Which option is better? Reply only with "Solution 1" or "Solution 2"."""
19
+
20
  try:
21
+ response = openai.ChatCompletion.create(
22
  model="gpt-3.5-turbo",
23
+ messages=[{"role": "user", "content": prompt}],
24
+ max_tokens=10
 
 
25
  )
26
+ answer = response.choices[0].message.content.strip()
27
+ return answer
28
  except Exception as e:
29
+ return f"GPT Error: {e}"
 
 
 
 
30
 
31
+ def compare(goal, sol1, sol2, correct_answer):
32
+ # EvoTransformer prediction
33
+ evo = predict(goal, sol1, sol2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # GPT-3.5 prediction
36
+ gpt = gpt3_predict(goal, sol1, sol2)
37
 
38
+ # Log feedback
39
+ log_interaction(goal, sol1, sol2, evo, gpt, correct_answer)
 
 
 
 
40
 
41
+ # Verdict
42
+ if correct_answer:
43
+ verdict = "βœ… Evo was RIGHT βœ…" if evo == correct_answer else "❌ Evo was WRONG ❌"
44
+ verdict += "\n"
45
+ verdict += "βœ… GPT-3.5 was RIGHT βœ…" if gpt == correct_answer else "❌ GPT-3.5 was WRONG ❌"
46
+ else:
47
+ verdict = "βš–οΈ Evo and GPT-3.5 predictions compared."
48
 
49
+ return evo, gpt, verdict
50
 
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("## βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
53
+ gr.Markdown("> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 β‰ˆ 81%) Β· 13M Parameters Β· Fully Scratch-Trained Β· Leans Smart")
54
+ gr.Markdown("> πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*")
55
 
56
  with gr.Row():
57
  goal = gr.Textbox(label="Goal")
58
  with gr.Row():
59
  sol1 = gr.Textbox(label="Solution 1")
60
  sol2 = gr.Textbox(label="Solution 2")
61
+ correct = gr.Radio(choices=["Solution 1", "Solution 2", None], label="βœ… Correct Answer (if known)", value=None)
62
+
63
+ btn = gr.Button("Submit")
64
+
65
+ with gr.Row():
66
+ evo_out = gr.Textbox(label="🧠 EvoTransformer Response")
67
+ gpt_out = gr.Textbox(label="πŸ€– GPT-3.5 Response")
68
 
69
+ verdict = gr.Textbox(label="Verdict", interactive=False)
 
 
 
70
 
71
+ examples = [
72
+ ["Start a fire", "Use a match", "Pour water", "Solution 1"],
73
+ ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
74
+ ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
75
+ ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
76
+ ["Find your way", "Use a map", "Close your eyes", "Solution 1"]
77
+ ]
78
 
79
+ gr.Examples(
80
+ examples=examples,
81
+ inputs=[goal, sol1, sol2, correct],
82
+ label="πŸ” Try These Examples"
83
+ )
84
 
85
+ btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict])
 
86
 
87
+ gr.Markdown("Made with ❀️ by Dr. Heman Mohabeer β€” EvoTransformer is not just code. It's evolution.")
 
 
88
 
89
+ demo.launch()