bhys commited on
Commit
b7695f0
·
verified ·
1 Parent(s): e2800b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -29
app.py CHANGED
@@ -73,16 +73,17 @@ def get_dataframe_from_results(eval_results, split):
73
  local_df = local_df.remove_columns(["url"])
74
  local_df = local_df.rename_column("model", "Model name")
75
  local_df = local_df.rename_column("model_family", "Model family")
76
- # local_df = local_df.rename_column("score", "Average score (%)")
77
- # for i in [1, 2, 3]:
78
- # local_df = local_df.rename_column(f"score_level{i}", f"Level {i} score (%)")
79
  df = pd.DataFrame(local_df)
80
  df = df.sort_values(by=["completion_level"], ascending=False)
81
 
82
- numeric_cols = [c for c in local_df.column_names
83
- if c in ["success_rate", "completion_level"]]
84
- df[numeric_cols] = df[numeric_cols].multiply(100).round(decimals=2)
85
- df = df.style.format("{:.2%}", subset=numeric_cols)
 
 
 
 
86
  return df
87
 
88
 
@@ -103,20 +104,6 @@ TYPES = ["markdown", "str", "str", "str", "number", "number", "number", "number"
103
  LEVELS = ["all", 1, 2, 3]
104
 
105
 
106
- def round_and_pad(number, ndigits=2):
107
- # 四舍五入到指定的小数位数
108
- rounded_number = round(number, ndigits)
109
- # 转换为字符串
110
- number_str = str(rounded_number)
111
- # 分离整数部分和小数部分
112
- integer_part, decimal_part = number_str.split('.')
113
- # 如果小数部分不足指定的位数,补零
114
- while len(decimal_part) < ndigits:
115
- decimal_part += '0'
116
- # 拼接回去,并转换回数字
117
- return '.'.join([integer_part, decimal_part])
118
-
119
-
120
  def add_new_eval(
121
  dataset_version: str,
122
  model: str,
@@ -156,7 +143,6 @@ def add_new_eval(
156
  comprehension = {'all': 0, 1: 0, 2: 0, 3: 0}
157
  num = {'all': 0, 1: 0, 2: 0, 3: 0}
158
 
159
- # with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
160
  with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
161
  with open(file_path, 'r') as f:
162
  for ix, line in enumerate(f):
@@ -173,7 +159,8 @@ def add_new_eval(
173
  level = int(gold_results[val_or_test][task_name]["Level"])
174
  score = question_scorer(task, gold_results[val_or_test][task_name])
175
  except KeyError:
176
- return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
 
177
 
178
  scored_file.write(
179
  json.dumps({
@@ -201,11 +188,11 @@ def add_new_eval(
201
  success_rate['all'] += 1
202
 
203
  for key in LEVELS:
204
- success_rate[key] = round_and_pad(success_rate[key] / num[key] / 100)
205
- completion_level[key] = round_and_pad(completion_level[key] / num[key] / 1000)
206
- expertise[key] = round_and_pad(expertise[key] / num[key])
207
- reasoning[key] = round_and_pad(reasoning[key] / num[key])
208
- comprehension[key] = round_and_pad(comprehension[key] / num[key])
209
 
210
  print(success_rate, completion_level, expertise, reasoning, comprehension)
211
 
@@ -265,7 +252,8 @@ def refresh():
265
  dataset_version,
266
  token=TOKEN,
267
  download_mode="force_redownload",
268
- verification_mode="no_checks"
 
269
  )
270
 
271
  new_eval_dataframe = {}
 
73
  local_df = local_df.remove_columns(["url"])
74
  local_df = local_df.rename_column("model", "Model name")
75
  local_df = local_df.rename_column("model_family", "Model family")
 
 
 
76
  df = pd.DataFrame(local_df)
77
  df = df.sort_values(by=["completion_level"], ascending=False)
78
 
79
+ numeric_cols = [c for c in local_df.column_names if c in ["expertise", "reasoning", "comprehension"]]
80
+ df[numeric_cols] = df[numeric_cols].round(decimals=2)
81
+
82
+ percent_cols = [c for c in local_df.column_names if c in ["success_rate", "completion_level"]]
83
+ df = df.style.format("{:.2%}", subset=percent_cols)
84
+
85
+ df = df[["Model name", "Model family", "organisation", "completion_level", "success_rate", "expertise", "reasoning",
86
+ "comprehension"]]
87
  return df
88
 
89
 
 
104
  LEVELS = ["all", 1, 2, 3]
105
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  def add_new_eval(
108
  dataset_version: str,
109
  model: str,
 
143
  comprehension = {'all': 0, 1: 0, 2: 0, 3: 0}
144
  num = {'all': 0, 1: 0, 2: 0, 3: 0}
145
 
 
146
  with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
147
  with open(file_path, 'r') as f:
148
  for ix, line in enumerate(f):
 
159
  level = int(gold_results[val_or_test][task_name]["Level"])
160
  score = question_scorer(task, gold_results[val_or_test][task_name])
161
  except KeyError:
162
+ return format_error(
163
+ f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
164
 
165
  scored_file.write(
166
  json.dumps({
 
188
  success_rate['all'] += 1
189
 
190
  for key in LEVELS:
191
+ success_rate[key] = success_rate[key] / num[key]
192
+ completion_level[key] = completion_level[key] / num[key] / 10
193
+ expertise[key] = expertise[key] / num[key]
194
+ reasoning[key] = reasoning[key] / num[key]
195
+ comprehension[key] = comprehension[key] / num[key]
196
 
197
  print(success_rate, completion_level, expertise, reasoning, comprehension)
198
 
 
252
  dataset_version,
253
  token=TOKEN,
254
  download_mode="force_redownload",
255
+ verification_mode="no_checks",
256
+ trust_remote_code=True
257
  )
258
 
259
  new_eval_dataframe = {}