lixuejing commited on
Commit
72d4c29
·
1 Parent(s): b8b086c
Files changed (1) hide show
  1. src/display/utils.py +46 -46
src/display/utils.py CHANGED
@@ -138,49 +138,49 @@ NUMERIC_INTERVALS = {
138
  }
139
 
140
  # Define the baselines
141
- baseline_row = {
142
- AutoEvalColumn.model.name: "<p>Baseline</p>",
143
- AutoEvalColumn.revision.name: "N/A",
144
- AutoEvalColumn.precision.name: None,
145
- AutoEvalColumn.average.name: 92.75,
146
- #AutoEvalColumn.merged.name: False,
147
- AutoEvalColumn.CMMMU.name: 100,
148
- AutoEvalColumn.MMMU.name: 100,
149
- AutoEvalColumn.MMMU_Pro_standard.name: 100,
150
- AutoEvalColumn.MMMU_Pro_vision.name: 100,
151
- AutoEvalColumn.MathVision.name: 100,
152
- AutoEvalColumn.CII_Bench.name: 100,
153
- AutoEvalColumn.Blink.name: 100,
154
- AutoEvalColumn.CharXiv.name: 100,
155
- AutoEvalColumn.MathVerse.name: 100,
156
- AutoEvalColumn.MmvetV2.name: 100,
157
- AutoEvalColumn.Ocrlite.name: 100,
158
- AutoEvalColumn.OcrliteZh.name: 100,
159
- AutoEvalColumn.dummy.name: "baseline",
160
- AutoEvalColumn.model_type.name: "",
161
- AutoEvalColumn.flagged.name: False,
162
- }
163
-
164
- # Define the human baselines
165
- human_baseline_row = {
166
- AutoEvalColumn.model.name: "<p>Human performance</p>",
167
- AutoEvalColumn.revision.name: "N/A",
168
- AutoEvalColumn.precision.name: None,
169
- AutoEvalColumn.average.name: 92.75,
170
- #AutoEvalColumn.merged.name: False,
171
- AutoEvalColumn.CMMMU.name: 100,
172
- AutoEvalColumn.MMMU.name: 100,
173
- AutoEvalColumn.MMMU_Pro_standard.name: 100,
174
- AutoEvalColumn.MMMU_Pro_vision.name: 100,
175
- AutoEvalColumn.MathVision.name: 100,
176
- AutoEvalColumn.CII_Bench.name: 100,
177
- AutoEvalColumn.Blink.name: 100,
178
- AutoEvalColumn.CharXiv.name: 100,
179
- AutoEvalColumn.MathVerse.name: 100,
180
- AutoEvalColumn.MmvetV2.name: 100,
181
- AutoEvalColumn.Ocrlite.name: 100,
182
- AutoEvalColumn.OcrliteZh.name: 100,
183
- AutoEvalColumn.dummy.name: "human_baseline",
184
- AutoEvalColumn.model_type.name: "",
185
- AutoEvalColumn.flagged.name: False,
186
- }
 
138
  }
139
 
140
  # Define the baselines
141
+ #baseline_row = {
142
+ # AutoEvalColumn.model.name: "<p>Baseline</p>",
143
+ # AutoEvalColumn.revision.name: "N/A",
144
+ # AutoEvalColumn.precision.name: None,
145
+ # AutoEvalColumn.average.name: 92.75,
146
+ # #AutoEvalColumn.merged.name: False,
147
+ # AutoEvalColumn.CMMMU.name: 100,
148
+ # AutoEvalColumn.MMMU.name: 100,
149
+ # AutoEvalColumn.MMMU_Pro_standard.name: 100,
150
+ # AutoEvalColumn.MMMU_Pro_vision.name: 100,
151
+ # AutoEvalColumn.MathVision.name: 100,
152
+ # AutoEvalColumn.CII_Bench.name: 100,
153
+ # AutoEvalColumn.Blink.name: 100,
154
+ # AutoEvalColumn.CharXiv.name: 100,
155
+ # AutoEvalColumn.MathVerse.name: 100,
156
+ # AutoEvalColumn.MmvetV2.name: 100,
157
+ # AutoEvalColumn.Ocrlite.name: 100,
158
+ # AutoEvalColumn.OcrliteZh.name: 100,
159
+ # AutoEvalColumn.dummy.name: "baseline",
160
+ # AutoEvalColumn.model_type.name: "",
161
+ # AutoEvalColumn.flagged.name: False,
162
+ #}
163
+ #
164
+ ## Define the human baselines
165
+ #human_baseline_row = {
166
+ # AutoEvalColumn.model.name: "<p>Human performance</p>",
167
+ # AutoEvalColumn.revision.name: "N/A",
168
+ # AutoEvalColumn.precision.name: None,
169
+ # AutoEvalColumn.average.name: 92.75,
170
+ # #AutoEvalColumn.merged.name: False,
171
+ # AutoEvalColumn.CMMMU.name: 100,
172
+ # AutoEvalColumn.MMMU.name: 100,
173
+ # AutoEvalColumn.MMMU_Pro_standard.name: 100,
174
+ # AutoEvalColumn.MMMU_Pro_vision.name: 100,
175
+ # AutoEvalColumn.MathVision.name: 100,
176
+ # AutoEvalColumn.CII_Bench.name: 100,
177
+ # AutoEvalColumn.Blink.name: 100,
178
+ # AutoEvalColumn.CharXiv.name: 100,
179
+ # AutoEvalColumn.MathVerse.name: 100,
180
+ # AutoEvalColumn.MmvetV2.name: 100,
181
+ # AutoEvalColumn.Ocrlite.name: 100,
182
+ # AutoEvalColumn.OcrliteZh.name: 100,
183
+ # AutoEvalColumn.dummy.name: "human_baseline",
184
+ # AutoEvalColumn.model_type.name: "",
185
+ # AutoEvalColumn.flagged.name: False,
186
+ #}