Spaces:

Allanatrix
/

NexaEvals

Running

App Files Files Community

Allanatrix commited on 18 days ago

Commit

f55f079

verified ·

1 Parent(s): 72311f1

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -176

app.py DELETED Viewed

@@ -1,176 +0,0 @@
-import gradio as gr
-import matplotlib.pyplot as plt
-import numpy as np
-# Data for Tabular Models (normalized to 0-10 from original 0-1 data)
-TABULAR_MODEL_EVALS = {
-    "Proteins": {
-        "Nexa Bio1 (Secondary)": 7.1,
-        "Porter6 (Secondary)": 8.5,
-        "DeepCNF (Secondary)": 8.5,
-        "AlphaFold2 (Tertiary GDT-TS)": 9.2,
-        "Nexa Bio2 (Tertiary)": 9.0,
-    },
-    "Astro": {
-        "Nexa Astro": 9.7,
-        "Baseline CNN": 8.9,
-    },
-    "Materials": {
-        "Nexa Materials": 10.0,
-        "Random Forest Baseline": 9.2,
-    },
-    "QST": {
-        "Nexa PIN Model": 8.0,
-        "Quantum TomoNet": 8.5,
-    },
-    "HEP": {
-        "Nexa HEP Model": 9.1,
-        "CMSNet": 9.4,
-    },
-    "CFD": {
-        "Nexa CFD Model": 9.2,
-        "FlowNet": 8.9,
-    },
-}
-# Data for LLMs (Demo Data)
-LLM_MODEL_EVALS = {
-    "LLM (General OSIR)": {
-        "Nexa Mistral Sci-7B": 6.1,
-        "Llama-3-8B-Instruct": 3.9,
-        "Mixtral-8x7B-Instruct-v0.1": 4.1,
-        "Claude-3-Sonnet": 6.4,
-        "GPT-4-Turbo": 6.8,
-        "GPT-4o": 7.1,
-    },
-    "LLM (Field-Specific OSIR)": {
-        "Nexa Bio Adapter": 6.6,
-        "Nexa Astro Adapter": 7.0,
-        "GPT-4o (Biomed)": 6.9,
-        "Claude-3-Opus (Bio)": 6.7,
-        "Llama-3-8B-Bio": 4.2,
-        "Mixtral-8x7B-BioTune": 4.3,
-    },
-}
-# Data for Nexa Mistral Sci-7B Evaluation (from your image)
-NEXA_MISTRAL_EVALS = {
-    "Nexa Mistral Sci-7B": {
-        "Scientific Utility": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.5},
-        "Symbolism & Math Logic": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.5},
-        "Citation & Structure": {"OSIR (General)": 5.5, "OSIR-Field (Physics)": 6.0},
-        "Thematic Grounding": {"OSIR (General)": 7.0, "OSIR-Field (Physics)": 8.0},
-        "Hypothesis Framing": {"OSIR (General)": 6.0, "OSIR-Field (Physics)": 7.0},
-        "Internal Consistency": {"OSIR (General)": 9.0, "OSIR-Field (Physics)": 9.5},
-        "Entropy / Novelty": {"OSIR (General)": 6.5, "OSIR-Field (Physics)": 6.0},
-    }
-}
-# Plotting function using Matplotlib
-def plot_comparison(domain, data_type):
-    if data_type == "mistral":
-        metric = domain
-        data = NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"][metric]
-        models = list(data.keys())
-        scores = list(data.values())
-        fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
-        y_pos = np.arange(len(models))
-        width = 0.35
-        ax.barh(y_pos - width/2, scores[:1], width, label=models[0], color='yellow')
-        ax.barh(y_pos + width/2, scores[1:], width, label=models[1], color='orange')
-    else:
-        data = TABULAR_MODEL_EVALS[domain] if data_type == "tabular" else LLM_MODEL_EVALS[domain]
-        models = list(data.keys())
-        scores = list(data.values())
-        fig, ax = plt.subplots(figsize=(8, 6), facecolor='#e0e0e0')
-        y_pos = np.arange(len(models))
-        width = 0.8
-        colors = ['indigo' if 'Nexa' in model else 'lightgray' if data_type == "tabular" else 'gray' for model in models]
-        ax.barh(y_pos, scores, width, color=colors)
-    ax.set_yticks(y_pos)
-    ax.set_yticklabels(models)
-    ax.set_xlabel('Score (1-10)')
-    ax.set_title(f"{('Nexa Mistral Sci-7B Evaluation: ' if data_type == 'mistral' else '')}{domain}")
-    ax.set_xlim(0, 10)
-    if data_type == "mistral":
-        ax.legend()
-    ax.grid(True, axis='x', linestyle='--', alpha=0.7)
-    plt.tight_layout()
-    return fig
-# Display functions
-def display_tabular_eval(domain):
-    return plot_comparison(domain, "tabular")
-def display_llm_eval(domain):
-    return plot_comparison(domain, "llm")
-def display_mistral_eval(metric):
-    return plot_comparison(metric, "mistral")
-# Gradio interface
-with gr.Blocks(css="body {font-family: 'Inter', sans-serif; background-color: #e0e0e0; color: #333;}") as demo:
-    gr.Markdown("""
-    # 🔬 Nexa Evals — Scientific ML Benchmark Suite
-    A benchmarking suite for Nexa models across various domains.
-    """)
-    with gr.Tabs():
-        with gr.TabItem("Tabular Models"):
-            with gr.Row():
-                tabular_domain = gr.Dropdown(
-                    choices=list(TABULAR_MODEL_EVALS.keys()),
-                    label="Select Domain",
-                    value="Proteins"
-                )
-                show_tabular_btn = gr.Button("Show Evaluation")
-            tabular_plot = gr.Plot(label="Benchmark Plot")
-            show_tabular_btn.click(
-                fn=display_tabular_eval,
-                inputs=tabular_domain,
-                outputs=tabular_plot
-            )
-        with gr.TabItem("LLMs"):
-            with gr.Row():
-                llm_domain = gr.Dropdown(
-                    choices=list(LLM_MODEL_EVALS.keys()),
-                    label="Select Domain",
-                    value="LLM (General OSIR)"
-                )
-                show_llm_btn = gr.Button("Show Evaluation")
-            llm_plot = gr.Plot(label="Benchmark Plot")
-            show_llm_btn.click(
-                fn=display_llm_eval,
-                inputs=llm_domain,
-                outputs=llm_plot
-            )
-        with gr.TabItem("Nexa Mistral Sci-7B"):
-            with gr.Row():
-                mistral_metric = gr.Dropdown(
-                    choices=list(NEXA_MISTRAL_EVALS["Nexa Mistral Sci-7B"].keys()),
-                    label="Select Metric",
-                    value="Scientific Utility"
-                )
-                show_mistral_btn = gr.Button("Show Evaluation")
-            mistral_plot = gr.Plot(label="Benchmark Plot")
-            show_mistral_btn.click(
-                fn=display_mistral_eval,
-                inputs=mistral_metric,
-                outputs=mistral_plot
-            )
-    with gr.TabItem("About"):
-        gr.Markdown("""
-        # ℹ️ About Nexa Evals
-        Nexa Evals benchmarks Nexa models across scientific domains:
-        - **Tabular Models**: Compares Nexa models against baselines.
-        - **LLMs**: Evaluates Nexa language models against competitors.
-        - **Nexa Mistral Sci-7B**: Compares general and physics-specific performance.
-        Scores are on a 1-10 scale.
-        """)
-demo.launch()