Spaces:

Allanatrix
/

NexaEvals

Sleeping

App Files Files Community

Allanatrix commited on Jun 26

Commit

e4b89b2

verified ·

1 Parent(s): 31a042b

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -58

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ MODEL_EVALS = {
     },
 }
-# SCIEVAL/OSIR metrics data
 SCIEVAL_METRICS = {
     "Nexa Mistral Sci-7B": {
         "OSIR (General)": {
@@ -56,52 +56,11 @@ SCIEVAL_METRICS = {
             "Symbolism & Math Logic": 7.8,
             "Scientific Utility": 8.3
         }
-    },
-    # (Data below here is a demo⬇️)
-    "GPT-4 Scientific": {
-        "OSIR (General)": {
-            "Entropy / Novelty": 7.8,
-            "Internal Consistency": 8.2,
-            "Hypothesis Framing": 8.1,
-            "Thematic Grounding": 8.4,
-            "Citation & Structure": 8.9,
-            "Symbolism & Math Logic": 7.4,
-            "Scientific Utility": 8.1
-        },
-        "OSIR-Field (Physics)": {
-            "Entropy / Novelty": 7.2,
-            "Internal Consistency": 8.6,
-            "Hypothesis Framing": 8.3,
-            "Thematic Grounding": 8.7,
-            "Citation & Structure": 9.1,
-            "Symbolism & Math Logic": 8.2,
-            "Scientific Utility": 8.4
-        }
-    },
-    "Claude Scientific": {
-        "OSIR (General)": {
-            "Entropy / Novelty": 7.5,
-            "Internal Consistency": 9.1,
-            "Hypothesis Framing": 7.9,
-            "Thematic Grounding": 8.8,
-            "Citation & Structure": 8.7,
-            "Symbolism & Math Logic": 7.8,
-            "Scientific Utility": 8.3
-        },
-        "OSIR-Field (Physics)": {
-            "Entropy / Novelty": 7.4,
-            "Internal Consistency": 9.2,
-            "Hypothesis Framing": 8.1,
-            "Thematic Grounding": 8.9,
-            "Citation & Structure": 8.5,
-            "Symbolism & Math Logic": 8.4,
-            "Scientific Utility": 8.6
-        }
     }
 }
 def plot_domain_benchmark(domain):
-    """Create bar chart for domain-specific benchmarks"""
     models = list(MODEL_EVALS[domain].keys())
     scores = list(MODEL_EVALS[domain].values())
@@ -110,8 +69,9 @@ def plot_domain_benchmark(domain):
     fig = go.Figure()
     fig.add_trace(go.Bar(
-        x=models,
-        y=scores,
         marker_color=colors,
         text=[f'{score:.3f}' for score in scores],
         textposition='auto'
@@ -119,9 +79,9 @@ def plot_domain_benchmark(domain):
     fig.update_layout(
         title=f"Model Benchmark Scores — {domain}",
-        xaxis_title="Model",
-        yaxis_title="Score",
-        yaxis_range=[0, 1.0],
         template="plotly_white",
         height=500,
         showlegend=False
@@ -129,7 +89,7 @@ def plot_domain_benchmark(domain):
     return fig
 def plot_scieval_comparison(model_name):
-    """Create comparison chart for SCIEVAL metrics"""
     if model_name not in SCIEVAL_METRICS:
         return go.Figure()
@@ -141,8 +101,9 @@ def plot_scieval_comparison(model_name):
     fig.add_trace(go.Bar(
         name='OSIR (General)',
-        x=metrics,
-        y=osir_scores,
         marker_color='#FFD700',
         text=[f'{score:.1f}' for score in osir_scores],
         textposition='auto'
@@ -150,8 +111,9 @@ def plot_scieval_comparison(model_name):
     fig.add_trace(go.Bar(
         name='OSIR-Field (Physics)',
-        x=metrics,
-        y=field_scores,
         marker_color='#FF6B35',
         text=[f'{score:.1f}' for score in field_scores],
         textposition='auto'
@@ -159,13 +121,12 @@ def plot_scieval_comparison(model_name):
     fig.update_layout(
         title=f"SCIEVAL Metrics Comparison — {model_name}",
-        xaxis_title="Metric",
-        yaxis_title="Score (1-10)",
-        yaxis_range=[0, 10],
         template="plotly_white",
         height=500,
-        barmode='group',
-        xaxis_tickangle=-45
     )
     return fig

     },
 }
+# SCIEVAL/OSIR metrics data
 SCIEVAL_METRICS = {
     "Nexa Mistral Sci-7B": {
         "OSIR (General)": {
             "Symbolism & Math Logic": 7.8,
             "Scientific Utility": 8.3
         }
     }
 }
 def plot_domain_benchmark(domain):
+    """Create horizontal bar chart for domain-specific benchmarks"""
     models = list(MODEL_EVALS[domain].keys())
     scores = list(MODEL_EVALS[domain].values())
     fig = go.Figure()
     fig.add_trace(go.Bar(
+        y=models,
+        x=scores,
+        orientation='h',
         marker_color=colors,
         text=[f'{score:.3f}' for score in scores],
         textposition='auto'
     fig.update_layout(
         title=f"Model Benchmark Scores — {domain}",
+        yaxis_title="Model",
+        xaxis_title="Score",
+        xaxis_range=[0, 1.0],
         template="plotly_white",
         height=500,
         showlegend=False
     return fig
 def plot_scieval_comparison(model_name):
+    """Create horizontal comparison chart for SCIEVAL metrics"""
     if model_name not in SCIEVAL_METRICS:
         return go.Figure()
     fig.add_trace(go.Bar(
         name='OSIR (General)',
+        y=metrics,
+        x=osir_scores,
+        orientation='h',
         marker_color='#FFD700',
         text=[f'{score:.1f}' for score in osir_scores],
         textposition='auto'
     fig.add_trace(go.Bar(
         name='OSIR-Field (Physics)',
+        y=metrics,
+        x=field_scores,
+        orientation='h',
         marker_color='#FF6B35',
         text=[f'{score:.1f}' for score in field_scores],
         textposition='auto'
     fig.update_layout(
         title=f"SCIEVAL Metrics Comparison — {model_name}",
+        yaxis_title="Metric",
+        xaxis_title="Score (1-10)",
+        xaxis_range=[0, 10],
         template="plotly_white",
         height=500,
+        barmode='group'
     )
     return fig