Spaces:
Sleeping
Sleeping
File size: 5,631 Bytes
0bbd367 08d1f1b 0bbd367 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b 94c2f22 08d1f1b 0bbd367 94c2f22 08d1f1b b0ad3dc 08d1f1b b0ad3dc 08d1f1b 0bbd367 94c2f22 08d1f1b 94c2f22 08d1f1b 0bbd367 b0ad3dc 08d1f1b 0bbd367 b0ad3dc 08d1f1b b0ad3dc 0bbd367 b0ad3dc 08d1f1b b0ad3dc 0bbd367 94c2f22 08d1f1b 94c2f22 08d1f1b 0bbd367 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
# Data for Tabular Models (normalized to 0-10 from original 0-1 data)
TABULAR_MODEL_EVALS = {
"Proteins": {
"Nexa Bio1 (Secondary)": 7.1,
"Porter6 (Secondary)": 8.5,
"DeepCNF (Secondary)": 8.5,
"AlphaFold2 (Tertiary GDT-TS)": 9.2,
"Nexa Bio2 (Tertiary)": 9.0,
},
"Astro": {
"Nexa Astro": 9.7,
"Baseline CNN": 8.9,
},
"Materials": {
"Nexa Materials": 10.0,
"Random Forest Baseline": 9.2,
},
"QST": {
"Nexa PIN Model": 8.0,
"Quantum TomoNet": 8.5,
},
"HEP": {
"Nexa HEP Model": 9.1,
"CMSNet": 9.4,
},
"CFD": {
"Nexa CFD Model": 9.2,
"FlowNet": 8.9,
},
}
# Data for Nexa Mistral Sci-7B Evaluation (from your image)
NEXA_MISTRAL_EVALS = {
"Nexa Mistral Sci-7B": {
"Scientific Utility": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.5},
"Symbolism & Math Logic": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.5},
"Citation & Structure": {"OSIR (General)": 5.5, "OSIR-Field (Physics)": 6.0},
"Thematic Grounding": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.0},
"Hypothesis Framing": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.0},
"Internal Consistency": {"OSIR (General)": 9.0, "OSIR-Field (Physics)": 9.5},
"Entropy / Novelty": {"OSIR (General)": 6.5, "OSIR-Field (Physics)": 6.0},
}
}
# Plotting function using Matplotlib
def plot_comparison(domain, data_type):
if data_type == "mistral":
metric = domain
data = NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"][metric]
models = list(data.keys())
scores = list(data.values())
fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
y_pos = np.arange(len(models))
width = 0.35
ax.barh(y_pos - width/2, scores[:1], width, label=models[0], color='yellow')
ax.barh(y_pos + width/2, scores[1:], width, label=models[1], color='orange')
else:
data = TABULAR_MODEL_EVALS[domain] if data_type == "tabular" else LLM_MODEL_EVALS[domain]
models = list(data.keys())
scores = list(data.values())
fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
y_pos = np.arange(len(models))
width = 0.8
colors = ['indigo' if 'Nexa' in model else 'lightgray' if data_type == "tabular" else 'gray' for model in models]
ax.barh(y_pos, scores, width, color=colors)
ax.set_yticks(y_pos)
ax.set_yticklabels(models)
ax.set_xlabel('Score (1-10)')
ax.set_title(f"{('Nexa Mistral Sci-7B Evaluation: ' if data_type == 'mistral' else '')}{domain}")
ax.set_xlim(0, 10)
if data_type == "mistral":
ax.legend()
ax.grid(True, axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
return fig
# Display functions
def display_tabular_eval(domain):
return plot_comparison(domain, "tabular")
def display_llm_eval(domain):
return plot_comparison(domain, "llm")
def display_mistral_eval(metric):
return plot_comparison(metric, "mistral")
# Gradio interface
with gr.Blocks(css="body {font-family: 'Inter', sans-serif; background-color: #e0e0e0; color: #333;}") as demo:
gr.Markdown("""
# 🔬 Nexa Evals — Scientific ML Benchmark Suite
A benchmarking suite for Nexa models across various domains.
""")
with gr.Tabs():
with gr.TabItem("Tabular Models"):
with gr.Row():
tabular_domain = gr.Dropdown(
choices=list(TABULAR_MODEL_EVALS.keys()),
label="Select Domain",
value="Proteins"
)
show_tabular_btn = gr.Button("Show Evaluation")
tabular_plot = gr.Plot(label="Benchmark Plot")
show_tabular_btn.click(
fn=display_tabular_eval,
inputs=tabular_domain,
outputs=tabular_plot
)
with gr.TabItem("LLMs"):
with gr.Row():
llm_domain = gr.Dropdown(
choices=list(LLM_MODEL_EVALS.keys()),
label="Select Domain",
value="LLM (General OSIR)"
)
show_llm_btn = gr.Button("Show Evaluation")
llm_plot = gr.Plot(label="Benchmark Plot")
show_llm_btn.click(
fn=display_llm_eval,
inputs=llm_domain,
outputs=llm_plot
)
with gr.TabItem("Nexa Mistral Sci-7B"):
with gr.Row():
mistral_metric = gr.Dropdown(
choices=list(NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"].keys()),
label="Select Metric",
value="Scientific Utility"
)
show_mistral_btn = gr.Button("Show Evaluation")
mistral_plot = gr.Plot(label="Benchmark Plot")
show_mistral_btn.click(
fn=display_mistral_eval,
inputs=mistral_metric,
outputs=mistral_plot
)
with gr.TabItem("About"):
gr.Markdown("""
# ℹ️ About Nexa Evals
Nexa Evals benchmarks Nexa models across scientific domains:
- **Tabular Models**: Compares Nexa models against baselines.
- **LLMs**: Evaluates Nexa language models against competitors.
- **Nexa Mistral Sci-7B**: Compares general and physics-specific performance.
Scores are on a 1-10 scale.
""")
demo.launch()
|