bhys commited on
Commit
6e02b3f
·
verified ·
1 Parent(s): 716c50f

Upload folder using huggingface_hub

Browse files
__pycache__/content.cpython-310.pyc ADDED
Binary file (4.97 kB). View file
 
__pycache__/scorer.cpython-310.pyc ADDED
Binary file (2.11 kB). View file
 
app.py CHANGED
@@ -31,35 +31,27 @@ YEAR_VERSION = "2024"
31
 
32
  os.makedirs("scored", exist_ok=True)
33
 
34
- """Download the CTFAIA dataset from Hugging Face Hub"""
35
- snapshot_download(
36
- repo_id="autogenCTF/CTFAIA",
37
- repo_type="dataset",
38
- local_dir='./CTFAIA',
39
- local_dir_use_symlinks=True,
40
- token=TOKEN
41
- )
42
-
43
- def print_files_and_sizes(directory):
44
- for root, dirs, files in os.walk(directory):
45
- for file in files:
46
- file_path = os.path.join(root, file)
47
- file_size = os.path.getsize(file_path)
48
- print(f"File: {file_path} Size: {file_size} bytes")
49
-
50
- def get_all_folders(directory):
51
- folders = []
52
- for item in os.listdir(directory):
53
- item_path = os.path.join(directory, item)
54
- if os.path.isdir(item_path):
55
- folders.append(str(item))
56
- return folders
57
-
58
 
59
- all_version = get_all_folders('./CTFAIA')
 
 
 
 
 
60
 
 
 
61
  eval_results = {}
62
  for dataset_version in all_version:
 
 
 
 
 
 
 
 
63
  eval_results[dataset_version] = load_dataset(
64
  RESULTS_DATASET, dataset_version,
65
  token=TOKEN,
@@ -67,9 +59,6 @@ for dataset_version in all_version:
67
  ignore_verifications=True
68
  )
69
 
70
- contact_infos = load_dataset(CONTACT_DATASET, token=TOKEN, download_mode="force_redownload",
71
- ignore_verifications=True)
72
-
73
 
74
  def get_dataframe_from_results(eval_results, split):
75
  local_df = eval_results[split]
@@ -124,8 +113,9 @@ def add_new_eval(
124
  print("Adding new eval")
125
 
126
  # Check if the combination model/org already exists and prints a warning message if yes
127
- if model.lower() in set([m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
128
- [o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
 
129
  return format_warning("This model has been already submitted.")
130
 
131
  if path_to_file is None:
@@ -141,10 +131,7 @@ def add_new_eval(
141
  )
142
 
143
  # Gold answers
144
- gold_results = {}
145
- print_files_and_sizes('./CTFAIA/' + dataset_version)
146
- gold_dataset = load_dataset('./CTFAIA/' + dataset_version)
147
- gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
148
 
149
  # Compute score
150
  file_path = path_to_file.name
@@ -159,7 +146,6 @@ def add_new_eval(
159
  except Exception:
160
  return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
161
 
162
- print(task)
163
  if "final_answer" not in task:
164
  raise format_error(f"Line {ix} contains no final_answer key. Please fix it and resubmit your file.")
165
  answer = task["final_answer"]
@@ -186,7 +172,7 @@ def add_new_eval(
186
  num_questions["all"] += 1
187
  num_questions[level] += 1
188
  for task_name, task in gold_results[val_or_test].items():
189
- level = task['Level']
190
  total_scores["all"] += 10
191
  total_scores[level] += 10
192
 
@@ -212,7 +198,6 @@ def add_new_eval(
212
  "score_level3": scores[3] / total_scores[3] if total_scores[3] else 0,
213
  }
214
  eval_results[dataset_version][val_or_test] = eval_results[dataset_version][val_or_test].add_item(eval_entry)
215
- print(eval_results)
216
  eval_results[dataset_version].push_to_hub(RESULTS_DATASET, config_name=dataset_version, token=TOKEN)
217
 
218
  contact_info = {
 
31
 
32
  os.makedirs("scored", exist_ok=True)
33
 
34
+ all_version = ['2024', '20240423']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ contact_infos = load_dataset(
37
+ CONTACT_DATASET,
38
+ token=TOKEN,
39
+ download_mode="force_redownload",
40
+ ignore_verifications=True
41
+ )
42
 
43
+ all_gold_dataset = {}
44
+ all_gold_results = {}
45
  eval_results = {}
46
  for dataset_version in all_version:
47
+ all_gold_dataset[dataset_version] = load_dataset(
48
+ INTERNAL_DATA_DATASET,
49
+ dataset_version
50
+ )
51
+ all_gold_results[dataset_version] = {
52
+ split: {row["task_name"]: row for row in all_gold_dataset[dataset_version][split]}
53
+ for split in ["test", "validation"]
54
+ }
55
  eval_results[dataset_version] = load_dataset(
56
  RESULTS_DATASET, dataset_version,
57
  token=TOKEN,
 
59
  ignore_verifications=True
60
  )
61
 
 
 
 
62
 
63
  def get_dataframe_from_results(eval_results, split):
64
  local_df = eval_results[split]
 
113
  print("Adding new eval")
114
 
115
  # Check if the combination model/org already exists and prints a warning message if yes
116
+ if model.lower() in set(
117
+ [m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
118
+ [o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
119
  return format_warning("This model has been already submitted.")
120
 
121
  if path_to_file is None:
 
131
  )
132
 
133
  # Gold answers
134
+ gold_results = all_gold_results[dataset_version]
 
 
 
135
 
136
  # Compute score
137
  file_path = path_to_file.name
 
146
  except Exception:
147
  return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
148
 
 
149
  if "final_answer" not in task:
150
  raise format_error(f"Line {ix} contains no final_answer key. Please fix it and resubmit your file.")
151
  answer = task["final_answer"]
 
172
  num_questions["all"] += 1
173
  num_questions[level] += 1
174
  for task_name, task in gold_results[val_or_test].items():
175
+ level = int(task['Level'])
176
  total_scores["all"] += 10
177
  total_scores[level] += 10
178
 
 
198
  "score_level3": scores[3] / total_scores[3] if total_scores[3] else 0,
199
  }
200
  eval_results[dataset_version][val_or_test] = eval_results[dataset_version][val_or_test].add_item(eval_entry)
 
201
  eval_results[dataset_version].push_to_hub(RESULTS_DATASET, config_name=dataset_version, token=TOKEN)
202
 
203
  contact_info = {
scorer.py CHANGED
@@ -41,8 +41,8 @@ def question_scorer(
41
  if user_task["final_answer"] == val["Final answer"]:
42
  score = val["Total score"]
43
  else:
44
- for i, item in enumerate(val["score"]):
45
- if user_task["score_answer"][i] in item["answer"] and item["score"] > score:
46
  score = item["score"]
47
  return score
48
 
 
41
  if user_task["final_answer"] == val["Final answer"]:
42
  score = val["Total score"]
43
  else:
44
+ for i, item in enumerate(val["score"]["question"]):
45
+ if user_task["score_answer"][i] in val["score"]["answer"][i] and val["score"]["score"][i] > score:
46
  score = item["score"]
47
  return score
48