bhys commited on
Commit
2d6e814
·
verified ·
1 Parent(s): 6d991bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -17,6 +17,7 @@ from content import format_error, format_warning, format_log, TITLE, INTRODUCTIO
17
  CITATION_BUTTON_TEXT, model_hyperlink
18
 
19
  TOKEN = os.environ.get("TOKEN", None)
 
20
 
21
  OWNER = "autogenCTF"
22
  DATA_DATASET = f"{OWNER}/CTFAIA"
@@ -78,11 +79,10 @@ def get_dataframe_from_results(eval_results, split):
78
  df = pd.DataFrame(local_df)
79
  df = df.sort_values(by=["completion_level"], ascending=False)
80
 
81
- numeric_cols = [c for c in local_df.column_names if
82
- c in ["success_rate", "completion_level", "expertise", "reasoning", "comprehension"]]
83
  df[numeric_cols] = df[numeric_cols].multiply(100).round(decimals=2)
84
  df = df.style.format("{:.2%}", subset=numeric_cols)
85
- print(type(df))
86
  return df
87
 
88
 
@@ -134,6 +134,7 @@ def add_new_eval(
134
 
135
  print("Adding new eval")
136
 
 
137
  if model.lower() in set(
138
  [m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
139
  [o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
@@ -155,6 +156,7 @@ def add_new_eval(
155
  comprehension = {'all': 0, 1: 0, 2: 0, 3: 0}
156
  num = {'all': 0, 1: 0, 2: 0, 3: 0}
157
 
 
158
  with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
159
  with open(file_path, 'r') as f:
160
  for ix, line in enumerate(f):
@@ -171,8 +173,7 @@ def add_new_eval(
171
  level = int(gold_results[val_or_test][task_name]["Level"])
172
  score = question_scorer(task, gold_results[val_or_test][task_name])
173
  except KeyError:
174
- return format_error(
175
- f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
176
 
177
  scored_file.write(
178
  json.dumps({
@@ -200,8 +201,8 @@ def add_new_eval(
200
  success_rate['all'] += 1
201
 
202
  for key in LEVELS:
203
- success_rate[key] = round_and_pad(success_rate[key] / num[key])
204
- completion_level[key] = round_and_pad(completion_level[key] / num[key])
205
  expertise[key] = round_and_pad(expertise[key] / num[key])
206
  reasoning[key] = round_and_pad(reasoning[key] / num[key])
207
  comprehension[key] = round_and_pad(comprehension[key] / num[key])
 
17
  CITATION_BUTTON_TEXT, model_hyperlink
18
 
19
  TOKEN = os.environ.get("TOKEN", None)
20
+ print(TOKEN)
21
 
22
  OWNER = "autogenCTF"
23
  DATA_DATASET = f"{OWNER}/CTFAIA"
 
79
  df = pd.DataFrame(local_df)
80
  df = df.sort_values(by=["completion_level"], ascending=False)
81
 
82
+ numeric_cols = [c for c in local_df.column_names
83
+ if c in ["success_rate", "completion_level"]]
84
  df[numeric_cols] = df[numeric_cols].multiply(100).round(decimals=2)
85
  df = df.style.format("{:.2%}", subset=numeric_cols)
 
86
  return df
87
 
88
 
 
134
 
135
  print("Adding new eval")
136
 
137
+ # Check if the combination model/org already exists and prints a warning message if yes
138
  if model.lower() in set(
139
  [m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
140
  [o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
 
156
  comprehension = {'all': 0, 1: 0, 2: 0, 3: 0}
157
  num = {'all': 0, 1: 0, 2: 0, 3: 0}
158
 
159
+ # with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
160
  with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
161
  with open(file_path, 'r') as f:
162
  for ix, line in enumerate(f):
 
173
  level = int(gold_results[val_or_test][task_name]["Level"])
174
  score = question_scorer(task, gold_results[val_or_test][task_name])
175
  except KeyError:
176
+ return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
 
177
 
178
  scored_file.write(
179
  json.dumps({
 
201
  success_rate['all'] += 1
202
 
203
  for key in LEVELS:
204
+ success_rate[key] = round_and_pad(success_rate[key] / num[key] / 100)
205
+ completion_level[key] = round_and_pad(completion_level[key] / num[key] / 1000)
206
  expertise[key] = round_and_pad(expertise[key] / num[key])
207
  reasoning[key] = round_and_pad(reasoning[key] / num[key])
208
  comprehension[key] = round_and_pad(comprehension[key] / num[key])