Allanatrix commited on
Commit
e4b89b2
·
verified ·
1 Parent(s): 31a042b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -58
app.py CHANGED
@@ -35,7 +35,7 @@ MODEL_EVALS = {
35
  },
36
  }
37
 
38
- # SCIEVAL/OSIR metrics data
39
  SCIEVAL_METRICS = {
40
  "Nexa Mistral Sci-7B": {
41
  "OSIR (General)": {
@@ -56,52 +56,11 @@ SCIEVAL_METRICS = {
56
  "Symbolism & Math Logic": 7.8,
57
  "Scientific Utility": 8.3
58
  }
59
- },
60
- # (Data below here is a demo⬇️)
61
- "GPT-4 Scientific": {
62
- "OSIR (General)": {
63
- "Entropy / Novelty": 7.8,
64
- "Internal Consistency": 8.2,
65
- "Hypothesis Framing": 8.1,
66
- "Thematic Grounding": 8.4,
67
- "Citation & Structure": 8.9,
68
- "Symbolism & Math Logic": 7.4,
69
- "Scientific Utility": 8.1
70
- },
71
- "OSIR-Field (Physics)": {
72
- "Entropy / Novelty": 7.2,
73
- "Internal Consistency": 8.6,
74
- "Hypothesis Framing": 8.3,
75
- "Thematic Grounding": 8.7,
76
- "Citation & Structure": 9.1,
77
- "Symbolism & Math Logic": 8.2,
78
- "Scientific Utility": 8.4
79
- }
80
- },
81
- "Claude Scientific": {
82
- "OSIR (General)": {
83
- "Entropy / Novelty": 7.5,
84
- "Internal Consistency": 9.1,
85
- "Hypothesis Framing": 7.9,
86
- "Thematic Grounding": 8.8,
87
- "Citation & Structure": 8.7,
88
- "Symbolism & Math Logic": 7.8,
89
- "Scientific Utility": 8.3
90
- },
91
- "OSIR-Field (Physics)": {
92
- "Entropy / Novelty": 7.4,
93
- "Internal Consistency": 9.2,
94
- "Hypothesis Framing": 8.1,
95
- "Thematic Grounding": 8.9,
96
- "Citation & Structure": 8.5,
97
- "Symbolism & Math Logic": 8.4,
98
- "Scientific Utility": 8.6
99
- }
100
  }
101
  }
102
 
103
  def plot_domain_benchmark(domain):
104
- """Create bar chart for domain-specific benchmarks"""
105
  models = list(MODEL_EVALS[domain].keys())
106
  scores = list(MODEL_EVALS[domain].values())
107
 
@@ -110,8 +69,9 @@ def plot_domain_benchmark(domain):
110
 
111
  fig = go.Figure()
112
  fig.add_trace(go.Bar(
113
- x=models,
114
- y=scores,
 
115
  marker_color=colors,
116
  text=[f'{score:.3f}' for score in scores],
117
  textposition='auto'
@@ -119,9 +79,9 @@ def plot_domain_benchmark(domain):
119
 
120
  fig.update_layout(
121
  title=f"Model Benchmark Scores — {domain}",
122
- xaxis_title="Model",
123
- yaxis_title="Score",
124
- yaxis_range=[0, 1.0],
125
  template="plotly_white",
126
  height=500,
127
  showlegend=False
@@ -129,7 +89,7 @@ def plot_domain_benchmark(domain):
129
  return fig
130
 
131
  def plot_scieval_comparison(model_name):
132
- """Create comparison chart for SCIEVAL metrics"""
133
  if model_name not in SCIEVAL_METRICS:
134
  return go.Figure()
135
 
@@ -141,8 +101,9 @@ def plot_scieval_comparison(model_name):
141
 
142
  fig.add_trace(go.Bar(
143
  name='OSIR (General)',
144
- x=metrics,
145
- y=osir_scores,
 
146
  marker_color='#FFD700',
147
  text=[f'{score:.1f}' for score in osir_scores],
148
  textposition='auto'
@@ -150,8 +111,9 @@ def plot_scieval_comparison(model_name):
150
 
151
  fig.add_trace(go.Bar(
152
  name='OSIR-Field (Physics)',
153
- x=metrics,
154
- y=field_scores,
 
155
  marker_color='#FF6B35',
156
  text=[f'{score:.1f}' for score in field_scores],
157
  textposition='auto'
@@ -159,13 +121,12 @@ def plot_scieval_comparison(model_name):
159
 
160
  fig.update_layout(
161
  title=f"SCIEVAL Metrics Comparison — {model_name}",
162
- xaxis_title="Metric",
163
- yaxis_title="Score (1-10)",
164
- yaxis_range=[0, 10],
165
  template="plotly_white",
166
  height=500,
167
- barmode='group',
168
- xaxis_tickangle=-45
169
  )
170
  return fig
171
 
 
35
  },
36
  }
37
 
38
+ # SCIEVAL/OSIR metrics data
39
  SCIEVAL_METRICS = {
40
  "Nexa Mistral Sci-7B": {
41
  "OSIR (General)": {
 
56
  "Symbolism & Math Logic": 7.8,
57
  "Scientific Utility": 8.3
58
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
  }
61
 
62
  def plot_domain_benchmark(domain):
63
+ """Create horizontal bar chart for domain-specific benchmarks"""
64
  models = list(MODEL_EVALS[domain].keys())
65
  scores = list(MODEL_EVALS[domain].values())
66
 
 
69
 
70
  fig = go.Figure()
71
  fig.add_trace(go.Bar(
72
+ y=models,
73
+ x=scores,
74
+ orientation='h',
75
  marker_color=colors,
76
  text=[f'{score:.3f}' for score in scores],
77
  textposition='auto'
 
79
 
80
  fig.update_layout(
81
  title=f"Model Benchmark Scores — {domain}",
82
+ yaxis_title="Model",
83
+ xaxis_title="Score",
84
+ xaxis_range=[0, 1.0],
85
  template="plotly_white",
86
  height=500,
87
  showlegend=False
 
89
  return fig
90
 
91
  def plot_scieval_comparison(model_name):
92
+ """Create horizontal comparison chart for SCIEVAL metrics"""
93
  if model_name not in SCIEVAL_METRICS:
94
  return go.Figure()
95
 
 
101
 
102
  fig.add_trace(go.Bar(
103
  name='OSIR (General)',
104
+ y=metrics,
105
+ x=osir_scores,
106
+ orientation='h',
107
  marker_color='#FFD700',
108
  text=[f'{score:.1f}' for score in osir_scores],
109
  textposition='auto'
 
111
 
112
  fig.add_trace(go.Bar(
113
  name='OSIR-Field (Physics)',
114
+ y=metrics,
115
+ x=field_scores,
116
+ orientation='h',
117
  marker_color='#FF6B35',
118
  text=[f'{score:.1f}' for score in field_scores],
119
  textposition='auto'
 
121
 
122
  fig.update_layout(
123
  title=f"SCIEVAL Metrics Comparison — {model_name}",
124
+ yaxis_title="Metric",
125
+ xaxis_title="Score (1-10)",
126
+ xaxis_range=[0, 10],
127
  template="plotly_white",
128
  height=500,
129
+ barmode='group'
 
130
  )
131
  return fig
132