Allanatrix commited on
Commit
f55f079
·
verified ·
1 Parent(s): 72311f1

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -176
app.py DELETED
@@ -1,176 +0,0 @@
1
- import gradio as gr
2
- import matplotlib.pyplot as plt
3
- import numpy as np
4
-
5
- # Data for Tabular Models (normalized to 0-10 from original 0-1 data)
6
- TABULAR_MODEL_EVALS = {
7
- "Proteins": {
8
- "Nexa Bio1 (Secondary)": 7.1,
9
- "Porter6 (Secondary)": 8.5,
10
- "DeepCNF (Secondary)": 8.5,
11
- "AlphaFold2 (Tertiary GDT-TS)": 9.2,
12
- "Nexa Bio2 (Tertiary)": 9.0,
13
- },
14
- "Astro": {
15
- "Nexa Astro": 9.7,
16
- "Baseline CNN": 8.9,
17
- },
18
- "Materials": {
19
- "Nexa Materials": 10.0,
20
- "Random Forest Baseline": 9.2,
21
- },
22
- "QST": {
23
- "Nexa PIN Model": 8.0,
24
- "Quantum TomoNet": 8.5,
25
- },
26
- "HEP": {
27
- "Nexa HEP Model": 9.1,
28
- "CMSNet": 9.4,
29
- },
30
- "CFD": {
31
- "Nexa CFD Model": 9.2,
32
- "FlowNet": 8.9,
33
- },
34
- }
35
-
36
- # Data for LLMs (Demo Data)
37
- LLM_MODEL_EVALS = {
38
- "LLM (General OSIR)": {
39
- "Nexa Mistral Sci-7B": 6.1,
40
- "Llama-3-8B-Instruct": 3.9,
41
- "Mixtral-8x7B-Instruct-v0.1": 4.1,
42
- "Claude-3-Sonnet": 6.4,
43
- "GPT-4-Turbo": 6.8,
44
- "GPT-4o": 7.1,
45
- },
46
- "LLM (Field-Specific OSIR)": {
47
- "Nexa Bio Adapter": 6.6,
48
- "Nexa Astro Adapter": 7.0,
49
- "GPT-4o (Biomed)": 6.9,
50
- "Claude-3-Opus (Bio)": 6.7,
51
- "Llama-3-8B-Bio": 4.2,
52
- "Mixtral-8x7B-BioTune": 4.3,
53
- },
54
- }
55
-
56
- # Data for Nexa Mistral Sci-7B Evaluation (from your image)
57
- NEXA_MISTRAL_EVALS = {
58
- "Nexa Mistral Sci-7B": {
59
- "Scientific Utility": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.5},
60
- "Symbolism & Math Logic": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.5},
61
- "Citation & Structure": {"OSIR (General)": 5.5, "OSIR-Field (Physics)": 6.0},
62
- "Thematic Grounding": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.0},
63
- "Hypothesis Framing": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.0},
64
- "Internal Consistency": {"OSIR (General)": 9.0, "OSIR-Field (Physics)": 9.5},
65
- "Entropy / Novelty": {"OSIR (General)": 6.5, "OSIR-Field (Physics)": 6.0},
66
- }
67
- }
68
-
69
- # Plotting function using Matplotlib
70
- def plot_comparison(domain, data_type):
71
- if data_type == "mistral":
72
- metric = domain
73
- data = NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"][metric]
74
- models = list(data.keys())
75
- scores = list(data.values())
76
- fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
77
- y_pos = np.arange(len(models))
78
- width = 0.35
79
- ax.barh(y_pos - width/2, scores[:1], width, label=models[0], color='yellow')
80
- ax.barh(y_pos + width/2, scores[1:], width, label=models[1], color='orange')
81
- else:
82
- data = TABULAR_MODEL_EVALS[domain] if data_type == "tabular" else LLM_MODEL_EVALS[domain]
83
- models = list(data.keys())
84
- scores = list(data.values())
85
- fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
86
- y_pos = np.arange(len(models))
87
- width = 0.8
88
- colors = ['indigo' if 'Nexa' in model else 'lightgray' if data_type == "tabular" else 'gray' for model in models]
89
- ax.barh(y_pos, scores, width, color=colors)
90
-
91
- ax.set_yticks(y_pos)
92
- ax.set_yticklabels(models)
93
- ax.set_xlabel('Score (1-10)')
94
- ax.set_title(f"{('Nexa Mistral Sci-7B Evaluation: ' if data_type == 'mistral' else '')}{domain}")
95
- ax.set_xlim(0, 10)
96
- if data_type == "mistral":
97
- ax.legend()
98
- ax.grid(True, axis='x', linestyle='--', alpha=0.7)
99
- plt.tight_layout()
100
-
101
- return fig
102
-
103
- # Display functions
104
- def display_tabular_eval(domain):
105
- return plot_comparison(domain, "tabular")
106
-
107
- def display_llm_eval(domain):
108
- return plot_comparison(domain, "llm")
109
-
110
- def display_mistral_eval(metric):
111
- return plot_comparison(metric, "mistral")
112
-
113
- # Gradio interface
114
- with gr.Blocks(css="body {font-family: 'Inter', sans-serif; background-color: #e0e0e0; color: #333;}") as demo:
115
- gr.Markdown("""
116
- # 🔬 Nexa Evals — Scientific ML Benchmark Suite
117
- A benchmarking suite for Nexa models across various domains.
118
- """)
119
-
120
- with gr.Tabs():
121
- with gr.TabItem("Tabular Models"):
122
- with gr.Row():
123
- tabular_domain = gr.Dropdown(
124
- choices=list(TABULAR_MODEL_EVALS.keys()),
125
- label="Select Domain",
126
- value="Proteins"
127
- )
128
- show_tabular_btn = gr.Button("Show Evaluation")
129
- tabular_plot = gr.Plot(label="Benchmark Plot")
130
- show_tabular_btn.click(
131
- fn=display_tabular_eval,
132
- inputs=tabular_domain,
133
- outputs=tabular_plot
134
- )
135
-
136
- with gr.TabItem("LLMs"):
137
- with gr.Row():
138
- llm_domain = gr.Dropdown(
139
- choices=list(LLM_MODEL_EVALS.keys()),
140
- label="Select Domain",
141
- value="LLM (General OSIR)"
142
- )
143
- show_llm_btn = gr.Button("Show Evaluation")
144
- llm_plot = gr.Plot(label="Benchmark Plot")
145
- show_llm_btn.click(
146
- fn=display_llm_eval,
147
- inputs=llm_domain,
148
- outputs=llm_plot
149
- )
150
-
151
- with gr.TabItem("Nexa Mistral Sci-7B"):
152
- with gr.Row():
153
- mistral_metric = gr.Dropdown(
154
- choices=list(NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"].keys()),
155
- label="Select Metric",
156
- value="Scientific Utility"
157
- )
158
- show_mistral_btn = gr.Button("Show Evaluation")
159
- mistral_plot = gr.Plot(label="Benchmark Plot")
160
- show_mistral_btn.click(
161
- fn=display_mistral_eval,
162
- inputs=mistral_metric,
163
- outputs=mistral_plot
164
- )
165
-
166
- with gr.TabItem("About"):
167
- gr.Markdown("""
168
- # ℹ️ About Nexa Evals
169
- Nexa Evals benchmarks Nexa models across scientific domains:
170
- - **Tabular Models**: Compares Nexa models against baselines.
171
- - **LLMs**: Evaluates Nexa language models against competitors.
172
- - **Nexa Mistral Sci-7B**: Compares general and physics-specific performance.
173
- Scores are on a 1-10 scale.
174
- """)
175
-
176
- demo.launch()