File size: 5,631 Bytes
0bbd367
08d1f1b
 
0bbd367
08d1f1b
b0ad3dc
 
08d1f1b
 
 
 
 
b0ad3dc
 
08d1f1b
 
b0ad3dc
 
08d1f1b
 
b0ad3dc
 
08d1f1b
 
b0ad3dc
 
08d1f1b
 
b0ad3dc
 
08d1f1b
 
b0ad3dc
 
 
08d1f1b
94c2f22
 
 
 
 
 
 
 
 
 
 
 
08d1f1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bbd367
94c2f22
 
08d1f1b
b0ad3dc
08d1f1b
b0ad3dc
 
08d1f1b
0bbd367
94c2f22
08d1f1b
94c2f22
08d1f1b
 
0bbd367
b0ad3dc
08d1f1b
0bbd367
 
b0ad3dc
 
 
 
 
 
 
 
 
 
 
 
 
08d1f1b
b0ad3dc
0bbd367
b0ad3dc
 
 
 
 
 
 
 
 
 
 
 
08d1f1b
b0ad3dc
0bbd367
94c2f22
 
 
 
 
 
 
 
 
 
 
 
08d1f1b
94c2f22
 
08d1f1b
 
 
 
 
 
 
 
 
0bbd367
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np

# Data for Tabular Models (normalized to 0-10 from original 0-1 data)
TABULAR_MODEL_EVALS = {
    "Proteins": {
        "Nexa Bio1 (Secondary)": 7.1,
        "Porter6 (Secondary)": 8.5,
        "DeepCNF (Secondary)": 8.5,
        "AlphaFold2 (Tertiary GDT-TS)": 9.2,
        "Nexa Bio2 (Tertiary)": 9.0,
    },
    "Astro": {
        "Nexa Astro": 9.7,
        "Baseline CNN": 8.9,
    },
    "Materials": {
        "Nexa Materials": 10.0,
        "Random Forest Baseline": 9.2,
    },
    "QST": {
        "Nexa PIN Model": 8.0,
        "Quantum TomoNet": 8.5,
    },
    "HEP": {
        "Nexa HEP Model": 9.1,
        "CMSNet": 9.4,
    },
    "CFD": {
        "Nexa CFD Model": 9.2,
        "FlowNet": 8.9,
    },
}

# Data for Nexa Mistral Sci-7B Evaluation (from your image)
NEXA_MISTRAL_EVALS = {
    "Nexa Mistral Sci-7B": {
        "Scientific Utility": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.5},
        "Symbolism & Math Logic": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.5},
        "Citation & Structure": {"OSIR (General)": 5.5, "OSIR-Field (Physics)": 6.0},
        "Thematic Grounding": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.0},
        "Hypothesis Framing": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.0},
        "Internal Consistency": {"OSIR (General)": 9.0, "OSIR-Field (Physics)": 9.5},
        "Entropy / Novelty": {"OSIR (General)": 6.5, "OSIR-Field (Physics)": 6.0},
    }
}

# Plotting function using Matplotlib
def plot_comparison(domain, data_type):
    if data_type == "mistral":
        metric = domain
        data = NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"][metric]
        models = list(data.keys())
        scores = list(data.values())
        fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
        y_pos = np.arange(len(models))
        width = 0.35
        ax.barh(y_pos - width/2, scores[:1], width, label=models[0], color='yellow')
        ax.barh(y_pos + width/2, scores[1:], width, label=models[1], color='orange')
    else:
        data = TABULAR_MODEL_EVALS[domain] if data_type == "tabular" else LLM_MODEL_EVALS[domain]
        models = list(data.keys())
        scores = list(data.values())
        fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
        y_pos = np.arange(len(models))
        width = 0.8
        colors = ['indigo' if 'Nexa' in model else 'lightgray' if data_type == "tabular" else 'gray' for model in models]
        ax.barh(y_pos, scores, width, color=colors)

    ax.set_yticks(y_pos)
    ax.set_yticklabels(models)
    ax.set_xlabel('Score (1-10)')
    ax.set_title(f"{('Nexa Mistral Sci-7B Evaluation: ' if data_type == 'mistral' else '')}{domain}")
    ax.set_xlim(0, 10)
    if data_type == "mistral":
        ax.legend()
    ax.grid(True, axis='x', linestyle='--', alpha=0.7)
    plt.tight_layout()

    return fig

# Display functions
def display_tabular_eval(domain):
    return plot_comparison(domain, "tabular")

def display_llm_eval(domain):
    return plot_comparison(domain, "llm")

def display_mistral_eval(metric):
    return plot_comparison(metric, "mistral")

# Gradio interface
with gr.Blocks(css="body {font-family: 'Inter', sans-serif; background-color: #e0e0e0; color: #333;}") as demo:
    gr.Markdown("""
    # 🔬 Nexa Evals — Scientific ML Benchmark Suite
    A benchmarking suite for Nexa models across various domains.
    """)

    with gr.Tabs():
        with gr.TabItem("Tabular Models"):
            with gr.Row():
                tabular_domain = gr.Dropdown(
                    choices=list(TABULAR_MODEL_EVALS.keys()),
                    label="Select Domain",
                    value="Proteins"
                )
                show_tabular_btn = gr.Button("Show Evaluation")
            tabular_plot = gr.Plot(label="Benchmark Plot")
            show_tabular_btn.click(
                fn=display_tabular_eval,
                inputs=tabular_domain,
                outputs=tabular_plot
            )

        with gr.TabItem("LLMs"):
            with gr.Row():
                llm_domain = gr.Dropdown(
                    choices=list(LLM_MODEL_EVALS.keys()),
                    label="Select Domain",
                    value="LLM (General OSIR)"
                )
                show_llm_btn = gr.Button("Show Evaluation")
            llm_plot = gr.Plot(label="Benchmark Plot")
            show_llm_btn.click(
                fn=display_llm_eval,
                inputs=llm_domain,
                outputs=llm_plot
            )

        with gr.TabItem("Nexa Mistral Sci-7B"):
            with gr.Row():
                mistral_metric = gr.Dropdown(
                    choices=list(NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"].keys()),
                    label="Select Metric",
                    value="Scientific Utility"
                )
                show_mistral_btn = gr.Button("Show Evaluation")
            mistral_plot = gr.Plot(label="Benchmark Plot")
            show_mistral_btn.click(
                fn=display_mistral_eval,
                inputs=mistral_metric,
                outputs=mistral_plot
            )

    with gr.TabItem("About"):
        gr.Markdown("""
        # ℹ️ About Nexa Evals
        Nexa Evals benchmarks Nexa models across scientific domains:
        - **Tabular Models**: Compares Nexa models against baselines.
        - **LLMs**: Evaluates Nexa language models against competitors.
        - **Nexa Mistral Sci-7B**: Compares general and physics-specific performance.
        Scores are on a 1-10 scale.
        """)

demo.launch()