File size: 2,196 Bytes
112a2a9
 
 
 
 
 
 
0ac88b2
112a2a9
0ac88b2
112a2a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ac88b2
 
 
 
 
 
 
112a2a9
0ac88b2
 
112a2a9
0ac88b2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import firebase_admin
from firebase_admin import credentials, firestore
import pandas as pd
import matplotlib.pyplot as plt
import io
import gradio as gr

# Initialize Firebase Admin SDK
if not firebase_admin._apps:
    cred = credentials.Certificate("firebase_key.json")
    firebase_admin.initialize_app(cred)

db = firestore.client()

def fetch_logs():
    docs = db.collection("evo_feedback_logs").stream()
    data = []
    for doc in docs:
        d = doc.to_dict()
        if all(k in d for k in ["goal", "evo", "gpt", "correct"]):
            data.append(d)
    return pd.DataFrame(data)

def generate_dashboard():
    df = fetch_logs()
    if df.empty:
        return "No data yet.", None

    df["evo_correct"] = df["evo"] == df["correct"]
    df["gpt_correct"] = df["gpt"] == df["correct"]

    evo_acc = df["evo_correct"].mean()
    gpt_acc = df["gpt_correct"].mean()
    agreement = (df["evo"] == df["gpt"]).mean()

    fig, ax = plt.subplots(figsize=(6, 4))
    counts = [
        df["evo_correct"].sum(),
        df["evo_correct"].count() - df["evo_correct"].sum(),
        df["gpt_correct"].sum(),
        df["gpt_correct"].count() - df["gpt_correct"].sum()
    ]
    bars = ax.bar(["Evo Correct", "Evo Wrong", "GPT Correct", "GPT Wrong"], counts)
    ax.set_title("Model Accuracy Summary")
    ax.set_ylabel("Count")

    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format="png")
    plt.close(fig)
    buf.seek(0)

    summary = (
        f"🧠 Evo Accuracy: {evo_acc:.2%}\n"
        f"πŸ€– GPT-3.5 Accuracy: {gpt_acc:.2%}\n"
        f"βš–οΈ Agreement Rate: {agreement:.2%}\n"
        f"πŸ“ Total Examples Logged: {len(df)}"
    )

    return summary, buf

def render_dashboard():
    with gr.Blocks() as dashboard:
        gr.Markdown("## πŸ“Š EvoTransformer Live Evolution Dashboard")
        gr.Markdown("> πŸ”„ *Below metrics are based on real-time user feedback.*")
        
        summary_text = gr.Textbox(label="Summary", interactive=False)
        plot = gr.Image(type="pil", label="Accuracy Plot")

        refresh = gr.Button("πŸ” Refresh Data")
        refresh.click(fn=generate_dashboard, outputs=[summary_text, plot])

    return dashboard