HemanM commited on
Commit
19ba196
Β·
verified Β·
1 Parent(s): 2c69375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -52
app.py CHANGED
@@ -1,88 +1,73 @@
1
  import gradio as gr
2
  from inference import predict
3
- from logger import log_interaction
4
  import openai
5
  import os
6
 
7
- # --- Set your OpenAI key here (or use secrets/environment)
8
- openai.api_key = os.getenv("OPENAI_API_KEY") or "sk-..." # Replace if needed
9
 
10
- def gpt3_predict(goal, sol1, sol2):
11
- prompt = f"""You are solving a commonsense reasoning task.
12
- Given a goal and two possible solutions, choose which solution makes more sense.
13
-
14
- Goal: {goal}
15
- Option A: {sol1}
16
- Option B: {sol2}
17
-
18
- Which option is better? Reply only with "Solution 1" or "Solution 2"."""
19
-
20
  try:
21
  response = openai.ChatCompletion.create(
22
  model="gpt-3.5-turbo",
23
  messages=[{"role": "user", "content": prompt}],
24
- max_tokens=10
25
  )
26
- answer = response.choices[0].message.content.strip()
27
- return answer
28
  except Exception as e:
29
- return f"GPT Error: {e}"
30
 
31
- def compare(goal, sol1, sol2, correct_answer):
32
- # EvoTransformer prediction
33
  evo = predict(goal, sol1, sol2)
34
-
35
- # GPT-3.5 prediction
36
- gpt = gpt3_predict(goal, sol1, sol2)
37
-
38
- # Log feedback
39
- log_interaction(goal, sol1, sol2, evo, gpt, correct_answer)
40
-
41
- # Verdict
42
- if correct_answer:
43
- verdict = "βœ… Evo was RIGHT βœ…" if evo == correct_answer else "❌ Evo was WRONG ❌"
44
- verdict += "\n"
45
- verdict += "βœ… GPT-3.5 was RIGHT βœ…" if gpt == correct_answer else "❌ GPT-3.5 was WRONG ❌"
46
  else:
47
- verdict = "βš–οΈ Evo and GPT-3.5 predictions compared."
 
 
 
48
 
49
  return evo, gpt, verdict
50
 
51
  with gr.Blocks() as demo:
52
  gr.Markdown("## βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
53
- gr.Markdown("> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 β‰ˆ 81%) Β· 13M Parameters Β· Fully Scratch-Trained Β· Leans Smart")
54
- gr.Markdown("> πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*")
 
 
 
55
 
56
  with gr.Row():
57
- goal = gr.Textbox(label="Goal")
58
  with gr.Row():
59
- sol1 = gr.Textbox(label="Solution 1")
60
- sol2 = gr.Textbox(label="Solution 2")
61
- correct = gr.Radio(choices=["Solution 1", "Solution 2", None], label="βœ… Correct Answer (if known)", value=None)
62
 
63
  btn = gr.Button("Submit")
 
 
 
64
 
65
- with gr.Row():
66
- evo_out = gr.Textbox(label="🧠 EvoTransformer Response")
67
- gpt_out = gr.Textbox(label="πŸ€– GPT-3.5 Response")
68
-
69
- verdict = gr.Textbox(label="Verdict", interactive=False)
70
 
 
71
  examples = [
72
  ["Start a fire", "Use a match", "Pour water", "Solution 1"],
73
  ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
74
  ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
75
  ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
76
- ["Find your way", "Use a map", "Close your eyes", "Solution 1"]
77
  ]
78
-
79
- gr.Examples(
80
- examples=examples,
81
- inputs=[goal, sol1, sol2, correct],
82
- label="πŸ” Try These Examples"
83
- )
84
-
85
- btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict])
86
 
87
  gr.Markdown("Made with ❀️ by Dr. Heman Mohabeer β€” EvoTransformer is not just code. It's evolution.")
88
 
 
1
  import gradio as gr
2
  from inference import predict
3
+ from logger import log_feedback_to_firebase
4
  import openai
5
  import os
6
 
7
+ openai.api_key = os.environ["OPENAI_API_KEY"] # Add this secret in HF Space
 
8
 
9
+ def gpt_predict(goal, sol1, sol2):
10
+ prompt = f"You're solving a commonsense reasoning problem. Choose the better solution to achieve the goal.\n\nGoal: {goal}\n\nOption 1: {sol1}\nOption 2: {sol2}\n\nWhich option is better? Reply with only 'Solution 1' or 'Solution 2'."
 
 
 
 
 
 
 
 
11
  try:
12
  response = openai.ChatCompletion.create(
13
  model="gpt-3.5-turbo",
14
  messages=[{"role": "user", "content": prompt}],
15
+ temperature=0
16
  )
17
+ reply = response.choices[0].message["content"]
18
+ return "Solution 1" if "1" in reply else "Solution 2"
19
  except Exception as e:
20
+ return f"GPT Error: {str(e)}"
21
 
22
+ def compare(goal, sol1, sol2, correct):
 
23
  evo = predict(goal, sol1, sol2)
24
+ gpt = gpt_predict(goal, sol1, sol2)
25
+
26
+ # Determine outcome
27
+ if evo == gpt == correct:
28
+ verdict = "βœ… Both Evo and GPT-3.5 are correct!"
29
+ elif evo == correct and gpt != correct:
30
+ verdict = "🧠 Evo got it right. GPT-3.5 missed it."
31
+ elif gpt == correct and evo != correct:
32
+ verdict = "πŸ€– GPT-3.5 got it right. Evo missed it."
 
 
 
33
  else:
34
+ verdict = "❌ Both models got it wrong."
35
+
36
+ # Log to Firebase
37
+ log_feedback_to_firebase(goal, sol1, sol2, evo, gpt, correct, "from app.py")
38
 
39
  return evo, gpt, verdict
40
 
41
  with gr.Blocks() as demo:
42
  gr.Markdown("## βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
43
+ gr.Markdown(
44
+ "> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 β‰ˆ 81%)\n"
45
+ "> 13M Parameters β€’ Fully Scratch-Trained β€’ Leans Smart\n"
46
+ "> πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*"
47
+ )
48
 
49
  with gr.Row():
50
+ goal = gr.Text(label="Goal")
51
  with gr.Row():
52
+ sol1 = gr.Text(label="Solution 1")
53
+ sol2 = gr.Text(label="Solution 2")
54
+ correct = gr.Radio(choices=["Solution 1", "Solution 2"], label="βœ… Which is actually correct?", value="Solution 1")
55
 
56
  btn = gr.Button("Submit")
57
+ evo_out = gr.Text(label="🧠 EvoTransformer Response")
58
+ gpt_out = gr.Text(label="πŸ€– GPT-3.5 Response")
59
+ verdict_out = gr.Text(label="βš–οΈ Verdict")
60
 
61
+ btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict_out])
 
 
 
 
62
 
63
+ gr.Markdown("#### πŸ” Try These Examples:")
64
  examples = [
65
  ["Start a fire", "Use a match", "Pour water", "Solution 1"],
66
  ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
67
  ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
68
  ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
 
69
  ]
70
+ gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct])
 
 
 
 
 
 
 
71
 
72
  gr.Markdown("Made with ❀️ by Dr. Heman Mohabeer β€” EvoTransformer is not just code. It's evolution.")
73