Spaces:
Running
Running
Delete app.py
Browse files
app.py
DELETED
@@ -1,176 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import matplotlib.pyplot as plt
|
3 |
-
import numpy as np
|
4 |
-
|
5 |
-
# Data for Tabular Models (normalized to 0-10 from original 0-1 data)
|
6 |
-
TABULAR_MODEL_EVALS = {
|
7 |
-
"Proteins": {
|
8 |
-
"Nexa Bio1 (Secondary)": 7.1,
|
9 |
-
"Porter6 (Secondary)": 8.5,
|
10 |
-
"DeepCNF (Secondary)": 8.5,
|
11 |
-
"AlphaFold2 (Tertiary GDT-TS)": 9.2,
|
12 |
-
"Nexa Bio2 (Tertiary)": 9.0,
|
13 |
-
},
|
14 |
-
"Astro": {
|
15 |
-
"Nexa Astro": 9.7,
|
16 |
-
"Baseline CNN": 8.9,
|
17 |
-
},
|
18 |
-
"Materials": {
|
19 |
-
"Nexa Materials": 10.0,
|
20 |
-
"Random Forest Baseline": 9.2,
|
21 |
-
},
|
22 |
-
"QST": {
|
23 |
-
"Nexa PIN Model": 8.0,
|
24 |
-
"Quantum TomoNet": 8.5,
|
25 |
-
},
|
26 |
-
"HEP": {
|
27 |
-
"Nexa HEP Model": 9.1,
|
28 |
-
"CMSNet": 9.4,
|
29 |
-
},
|
30 |
-
"CFD": {
|
31 |
-
"Nexa CFD Model": 9.2,
|
32 |
-
"FlowNet": 8.9,
|
33 |
-
},
|
34 |
-
}
|
35 |
-
|
36 |
-
# Data for LLMs (Demo Data)
|
37 |
-
LLM_MODEL_EVALS = {
|
38 |
-
"LLM (General OSIR)": {
|
39 |
-
"Nexa Mistral Sci-7B": 6.1,
|
40 |
-
"Llama-3-8B-Instruct": 3.9,
|
41 |
-
"Mixtral-8x7B-Instruct-v0.1": 4.1,
|
42 |
-
"Claude-3-Sonnet": 6.4,
|
43 |
-
"GPT-4-Turbo": 6.8,
|
44 |
-
"GPT-4o": 7.1,
|
45 |
-
},
|
46 |
-
"LLM (Field-Specific OSIR)": {
|
47 |
-
"Nexa Bio Adapter": 6.6,
|
48 |
-
"Nexa Astro Adapter": 7.0,
|
49 |
-
"GPT-4o (Biomed)": 6.9,
|
50 |
-
"Claude-3-Opus (Bio)": 6.7,
|
51 |
-
"Llama-3-8B-Bio": 4.2,
|
52 |
-
"Mixtral-8x7B-BioTune": 4.3,
|
53 |
-
},
|
54 |
-
}
|
55 |
-
|
56 |
-
# Data for Nexa Mistral Sci-7B Evaluation (from your image)
|
57 |
-
NEXA_MISTRAL_EVALS = {
|
58 |
-
"Nexa Mistral Sci-7B": {
|
59 |
-
"Scientific Utility": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.5},
|
60 |
-
"Symbolism & Math Logic": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.5},
|
61 |
-
"Citation & Structure": {"OSIR (General)": 5.5, "OSIR-Field (Physics)": 6.0},
|
62 |
-
"Thematic Grounding": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.0},
|
63 |
-
"Hypothesis Framing": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.0},
|
64 |
-
"Internal Consistency": {"OSIR (General)": 9.0, "OSIR-Field (Physics)": 9.5},
|
65 |
-
"Entropy / Novelty": {"OSIR (General)": 6.5, "OSIR-Field (Physics)": 6.0},
|
66 |
-
}
|
67 |
-
}
|
68 |
-
|
69 |
-
# Plotting function using Matplotlib
|
70 |
-
def plot_comparison(domain, data_type):
|
71 |
-
if data_type == "mistral":
|
72 |
-
metric = domain
|
73 |
-
data = NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"][metric]
|
74 |
-
models = list(data.keys())
|
75 |
-
scores = list(data.values())
|
76 |
-
fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
|
77 |
-
y_pos = np.arange(len(models))
|
78 |
-
width = 0.35
|
79 |
-
ax.barh(y_pos - width/2, scores[:1], width, label=models[0], color='yellow')
|
80 |
-
ax.barh(y_pos + width/2, scores[1:], width, label=models[1], color='orange')
|
81 |
-
else:
|
82 |
-
data = TABULAR_MODEL_EVALS[domain] if data_type == "tabular" else LLM_MODEL_EVALS[domain]
|
83 |
-
models = list(data.keys())
|
84 |
-
scores = list(data.values())
|
85 |
-
fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
|
86 |
-
y_pos = np.arange(len(models))
|
87 |
-
width = 0.8
|
88 |
-
colors = ['indigo' if 'Nexa' in model else 'lightgray' if data_type == "tabular" else 'gray' for model in models]
|
89 |
-
ax.barh(y_pos, scores, width, color=colors)
|
90 |
-
|
91 |
-
ax.set_yticks(y_pos)
|
92 |
-
ax.set_yticklabels(models)
|
93 |
-
ax.set_xlabel('Score (1-10)')
|
94 |
-
ax.set_title(f"{('Nexa Mistral Sci-7B Evaluation: ' if data_type == 'mistral' else '')}{domain}")
|
95 |
-
ax.set_xlim(0, 10)
|
96 |
-
if data_type == "mistral":
|
97 |
-
ax.legend()
|
98 |
-
ax.grid(True, axis='x', linestyle='--', alpha=0.7)
|
99 |
-
plt.tight_layout()
|
100 |
-
|
101 |
-
return fig
|
102 |
-
|
103 |
-
# Display functions
|
104 |
-
def display_tabular_eval(domain):
|
105 |
-
return plot_comparison(domain, "tabular")
|
106 |
-
|
107 |
-
def display_llm_eval(domain):
|
108 |
-
return plot_comparison(domain, "llm")
|
109 |
-
|
110 |
-
def display_mistral_eval(metric):
|
111 |
-
return plot_comparison(metric, "mistral")
|
112 |
-
|
113 |
-
# Gradio interface
|
114 |
-
with gr.Blocks(css="body {font-family: 'Inter', sans-serif; background-color: #e0e0e0; color: #333;}") as demo:
|
115 |
-
gr.Markdown("""
|
116 |
-
# 🔬 Nexa Evals — Scientific ML Benchmark Suite
|
117 |
-
A benchmarking suite for Nexa models across various domains.
|
118 |
-
""")
|
119 |
-
|
120 |
-
with gr.Tabs():
|
121 |
-
with gr.TabItem("Tabular Models"):
|
122 |
-
with gr.Row():
|
123 |
-
tabular_domain = gr.Dropdown(
|
124 |
-
choices=list(TABULAR_MODEL_EVALS.keys()),
|
125 |
-
label="Select Domain",
|
126 |
-
value="Proteins"
|
127 |
-
)
|
128 |
-
show_tabular_btn = gr.Button("Show Evaluation")
|
129 |
-
tabular_plot = gr.Plot(label="Benchmark Plot")
|
130 |
-
show_tabular_btn.click(
|
131 |
-
fn=display_tabular_eval,
|
132 |
-
inputs=tabular_domain,
|
133 |
-
outputs=tabular_plot
|
134 |
-
)
|
135 |
-
|
136 |
-
with gr.TabItem("LLMs"):
|
137 |
-
with gr.Row():
|
138 |
-
llm_domain = gr.Dropdown(
|
139 |
-
choices=list(LLM_MODEL_EVALS.keys()),
|
140 |
-
label="Select Domain",
|
141 |
-
value="LLM (General OSIR)"
|
142 |
-
)
|
143 |
-
show_llm_btn = gr.Button("Show Evaluation")
|
144 |
-
llm_plot = gr.Plot(label="Benchmark Plot")
|
145 |
-
show_llm_btn.click(
|
146 |
-
fn=display_llm_eval,
|
147 |
-
inputs=llm_domain,
|
148 |
-
outputs=llm_plot
|
149 |
-
)
|
150 |
-
|
151 |
-
with gr.TabItem("Nexa Mistral Sci-7B"):
|
152 |
-
with gr.Row():
|
153 |
-
mistral_metric = gr.Dropdown(
|
154 |
-
choices=list(NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"].keys()),
|
155 |
-
label="Select Metric",
|
156 |
-
value="Scientific Utility"
|
157 |
-
)
|
158 |
-
show_mistral_btn = gr.Button("Show Evaluation")
|
159 |
-
mistral_plot = gr.Plot(label="Benchmark Plot")
|
160 |
-
show_mistral_btn.click(
|
161 |
-
fn=display_mistral_eval,
|
162 |
-
inputs=mistral_metric,
|
163 |
-
outputs=mistral_plot
|
164 |
-
)
|
165 |
-
|
166 |
-
with gr.TabItem("About"):
|
167 |
-
gr.Markdown("""
|
168 |
-
# ℹ️ About Nexa Evals
|
169 |
-
Nexa Evals benchmarks Nexa models across scientific domains:
|
170 |
-
- **Tabular Models**: Compares Nexa models against baselines.
|
171 |
-
- **LLMs**: Evaluates Nexa language models against competitors.
|
172 |
-
- **Nexa Mistral Sci-7B**: Compares general and physics-specific performance.
|
173 |
-
Scores are on a 1-10 scale.
|
174 |
-
""")
|
175 |
-
|
176 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|