File size: 4,027 Bytes
480dd1d
 
312dbba
568d79a
19ba196
488dba6
 
1e3a2f8
923fd6e
 
1e3a2f8
568d79a
1e3a2f8
 
312dbba
19ba196
1e3a2f8
 
 
 
 
2eed809
1e3a2f8
2eed809
568d79a
19ba196
2eed809
1e3a2f8
19ba196
2eed809
19ba196
e256998
19ba196
568d79a
19ba196
 
 
 
 
 
 
 
568d79a
19ba196
 
 
e6e2360
568d79a
e6e2360
488dba6
 
 
 
568d79a
488dba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e695177
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
exec(open("init_save.py").read())

import gradio as gr
from inference import predict
from logger import log_feedback_to_firebase
from watchdog import manual_retrain
from dashboard import render_dashboard
from openai import OpenAI
from watchdog import manual_retrain

import os

# Initialize OpenAI client
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])  # Add this secret in your HF Space

def gpt_predict(goal, sol1, sol2):
    prompt = (
        f"You're solving a commonsense reasoning problem. Choose the better solution to achieve the goal.\n\n"
        f"Goal: {goal}\n\nOption 1: {sol1}\nOption 2: {sol2}\n\n"
        f"Which option is better? Reply with only 'Solution 1' or 'Solution 2'."
    )
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        reply = response.choices[0].message.content
        return "Solution 1" if "1" in reply else "Solution 2"
    except Exception as e:
        return f"GPT Error: {str(e)}"

def compare(goal, sol1, sol2, correct):
    evo = predict(goal, sol1, sol2)
    gpt = gpt_predict(goal, sol1, sol2)

    if evo == gpt == correct:
        verdict = "βœ… Both Evo and GPT-3.5 are correct!"
    elif evo == correct and gpt != correct:
        verdict = "🧠 Evo got it right. GPT-3.5 missed it."
    elif gpt == correct and evo != correct:
        verdict = "πŸ€– GPT-3.5 got it right. Evo missed it."
    else:
        verdict = "❌ Both models got it wrong."

    log_feedback_to_firebase(goal, sol1, sol2, evo, gpt, correct, "from app.py")

    return evo, gpt, verdict

def trigger_retrain():
    success = manual_retrain()
    return "βœ… Evo retrained successfully!" if success else "⚠️ Retraining failed."

with gr.Blocks() as demo:
    with gr.Tab("βš”οΈ Evo vs GPT-3.5 Showdown"):
        gr.Markdown("## βš”οΈ Evo vs GPT-3.5 – Real-Time Commonsense Showdown")
        gr.Markdown(
            "> 🧠 EvoTransformer v2.1 – PIQA Accuracy: 69.7% (vs GPT-3.5 β‰ˆ 81%)\n"
            "> 13M Parameters β€’ Fully Scratch-Trained β€’ Leans Smart\n"
            "> πŸ§ͺ *Note: EvoTransformer is a scratch-built model trained on 1K PIQA examples. It may occasionally misinterpret context or idioms. That’s part of its evolution.*"
        )
        gr.Markdown(
            "> πŸ”„ *EvoTransformer is not static. Every time you provide feedback, Evo learns and evolves. Welcome to real-time neural evolution.*"
        )

        with gr.Row():
            goal = gr.Text(label="Goal")
        with gr.Row():
            sol1 = gr.Text(label="Solution 1")
            sol2 = gr.Text(label="Solution 2")
        correct = gr.Radio(choices=["Solution 1", "Solution 2"], label="βœ… Which is actually correct?", value="Solution 1")

        btn = gr.Button("Submit")
        evo_out = gr.Text(label="🧠 EvoTransformer Response")
        gpt_out = gr.Text(label="πŸ€– GPT-3.5 Response")
        verdict_out = gr.Text(label="βš–οΈ Verdict")

        btn.click(fn=compare, inputs=[goal, sol1, sol2, correct], outputs=[evo_out, gpt_out, verdict_out])

        gr.Markdown("#### πŸ” Try These Examples:")
        examples = [
            ["Start a fire", "Use a match", "Pour water", "Solution 1"],
            ["Warm up food", "Use microwave", "Put it in fridge", "Solution 1"],
            ["Charge a phone", "Plug it in", "Put it on grass", "Solution 1"],
            ["Get rid of bad smell", "Open window", "Close door", "Solution 1"],
        ]
        gr.Examples(examples=examples, inputs=[goal, sol1, sol2, correct])

        retrain_btn = gr.Button("πŸ” Retrain EvoTransformer")
        retrain_status = gr.Text(label="πŸ“’ Retrain Status")
        retrain_btn.click(fn=trigger_retrain, outputs=[retrain_status])

        gr.Markdown("Made with ❀️ by Dr. Heman Mohabeer β€” EvoTransformer is not just code. It's evolution.")

    with gr.Tab("πŸ“Š Evo Dashboard"):
        render_dashboard()

demo.launch()