Upload folder using huggingface_hub
Browse files- __pycache__/content.cpython-310.pyc +0 -0
- __pycache__/scorer.cpython-310.pyc +0 -0
- app.py +22 -37
- scorer.py +2 -2
__pycache__/content.cpython-310.pyc
ADDED
Binary file (4.97 kB). View file
|
|
__pycache__/scorer.cpython-310.pyc
ADDED
Binary file (2.11 kB). View file
|
|
app.py
CHANGED
@@ -31,35 +31,27 @@ YEAR_VERSION = "2024"
|
|
31 |
|
32 |
os.makedirs("scored", exist_ok=True)
|
33 |
|
34 |
-
|
35 |
-
snapshot_download(
|
36 |
-
repo_id="autogenCTF/CTFAIA",
|
37 |
-
repo_type="dataset",
|
38 |
-
local_dir='./CTFAIA',
|
39 |
-
local_dir_use_symlinks=True,
|
40 |
-
token=TOKEN
|
41 |
-
)
|
42 |
-
|
43 |
-
def print_files_and_sizes(directory):
|
44 |
-
for root, dirs, files in os.walk(directory):
|
45 |
-
for file in files:
|
46 |
-
file_path = os.path.join(root, file)
|
47 |
-
file_size = os.path.getsize(file_path)
|
48 |
-
print(f"File: {file_path} Size: {file_size} bytes")
|
49 |
-
|
50 |
-
def get_all_folders(directory):
|
51 |
-
folders = []
|
52 |
-
for item in os.listdir(directory):
|
53 |
-
item_path = os.path.join(directory, item)
|
54 |
-
if os.path.isdir(item_path):
|
55 |
-
folders.append(str(item))
|
56 |
-
return folders
|
57 |
-
|
58 |
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
60 |
|
|
|
|
|
61 |
eval_results = {}
|
62 |
for dataset_version in all_version:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
eval_results[dataset_version] = load_dataset(
|
64 |
RESULTS_DATASET, dataset_version,
|
65 |
token=TOKEN,
|
@@ -67,9 +59,6 @@ for dataset_version in all_version:
|
|
67 |
ignore_verifications=True
|
68 |
)
|
69 |
|
70 |
-
contact_infos = load_dataset(CONTACT_DATASET, token=TOKEN, download_mode="force_redownload",
|
71 |
-
ignore_verifications=True)
|
72 |
-
|
73 |
|
74 |
def get_dataframe_from_results(eval_results, split):
|
75 |
local_df = eval_results[split]
|
@@ -124,8 +113,9 @@ def add_new_eval(
|
|
124 |
print("Adding new eval")
|
125 |
|
126 |
# Check if the combination model/org already exists and prints a warning message if yes
|
127 |
-
if model.lower() in set(
|
128 |
-
[
|
|
|
129 |
return format_warning("This model has been already submitted.")
|
130 |
|
131 |
if path_to_file is None:
|
@@ -141,10 +131,7 @@ def add_new_eval(
|
|
141 |
)
|
142 |
|
143 |
# Gold answers
|
144 |
-
gold_results =
|
145 |
-
print_files_and_sizes('./CTFAIA/' + dataset_version)
|
146 |
-
gold_dataset = load_dataset('./CTFAIA/' + dataset_version)
|
147 |
-
gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
|
148 |
|
149 |
# Compute score
|
150 |
file_path = path_to_file.name
|
@@ -159,7 +146,6 @@ def add_new_eval(
|
|
159 |
except Exception:
|
160 |
return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
|
161 |
|
162 |
-
print(task)
|
163 |
if "final_answer" not in task:
|
164 |
raise format_error(f"Line {ix} contains no final_answer key. Please fix it and resubmit your file.")
|
165 |
answer = task["final_answer"]
|
@@ -186,7 +172,7 @@ def add_new_eval(
|
|
186 |
num_questions["all"] += 1
|
187 |
num_questions[level] += 1
|
188 |
for task_name, task in gold_results[val_or_test].items():
|
189 |
-
level = task['Level']
|
190 |
total_scores["all"] += 10
|
191 |
total_scores[level] += 10
|
192 |
|
@@ -212,7 +198,6 @@ def add_new_eval(
|
|
212 |
"score_level3": scores[3] / total_scores[3] if total_scores[3] else 0,
|
213 |
}
|
214 |
eval_results[dataset_version][val_or_test] = eval_results[dataset_version][val_or_test].add_item(eval_entry)
|
215 |
-
print(eval_results)
|
216 |
eval_results[dataset_version].push_to_hub(RESULTS_DATASET, config_name=dataset_version, token=TOKEN)
|
217 |
|
218 |
contact_info = {
|
|
|
31 |
|
32 |
os.makedirs("scored", exist_ok=True)
|
33 |
|
34 |
+
all_version = ['2024', '20240423']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
contact_infos = load_dataset(
|
37 |
+
CONTACT_DATASET,
|
38 |
+
token=TOKEN,
|
39 |
+
download_mode="force_redownload",
|
40 |
+
ignore_verifications=True
|
41 |
+
)
|
42 |
|
43 |
+
all_gold_dataset = {}
|
44 |
+
all_gold_results = {}
|
45 |
eval_results = {}
|
46 |
for dataset_version in all_version:
|
47 |
+
all_gold_dataset[dataset_version] = load_dataset(
|
48 |
+
INTERNAL_DATA_DATASET,
|
49 |
+
dataset_version
|
50 |
+
)
|
51 |
+
all_gold_results[dataset_version] = {
|
52 |
+
split: {row["task_name"]: row for row in all_gold_dataset[dataset_version][split]}
|
53 |
+
for split in ["test", "validation"]
|
54 |
+
}
|
55 |
eval_results[dataset_version] = load_dataset(
|
56 |
RESULTS_DATASET, dataset_version,
|
57 |
token=TOKEN,
|
|
|
59 |
ignore_verifications=True
|
60 |
)
|
61 |
|
|
|
|
|
|
|
62 |
|
63 |
def get_dataframe_from_results(eval_results, split):
|
64 |
local_df = eval_results[split]
|
|
|
113 |
print("Adding new eval")
|
114 |
|
115 |
# Check if the combination model/org already exists and prints a warning message if yes
|
116 |
+
if model.lower() in set(
|
117 |
+
[m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
|
118 |
+
[o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
|
119 |
return format_warning("This model has been already submitted.")
|
120 |
|
121 |
if path_to_file is None:
|
|
|
131 |
)
|
132 |
|
133 |
# Gold answers
|
134 |
+
gold_results = all_gold_results[dataset_version]
|
|
|
|
|
|
|
135 |
|
136 |
# Compute score
|
137 |
file_path = path_to_file.name
|
|
|
146 |
except Exception:
|
147 |
return format_error(f"Line {ix} is incorrectly formatted. Please fix it and resubmit your file.")
|
148 |
|
|
|
149 |
if "final_answer" not in task:
|
150 |
raise format_error(f"Line {ix} contains no final_answer key. Please fix it and resubmit your file.")
|
151 |
answer = task["final_answer"]
|
|
|
172 |
num_questions["all"] += 1
|
173 |
num_questions[level] += 1
|
174 |
for task_name, task in gold_results[val_or_test].items():
|
175 |
+
level = int(task['Level'])
|
176 |
total_scores["all"] += 10
|
177 |
total_scores[level] += 10
|
178 |
|
|
|
198 |
"score_level3": scores[3] / total_scores[3] if total_scores[3] else 0,
|
199 |
}
|
200 |
eval_results[dataset_version][val_or_test] = eval_results[dataset_version][val_or_test].add_item(eval_entry)
|
|
|
201 |
eval_results[dataset_version].push_to_hub(RESULTS_DATASET, config_name=dataset_version, token=TOKEN)
|
202 |
|
203 |
contact_info = {
|
scorer.py
CHANGED
@@ -41,8 +41,8 @@ def question_scorer(
|
|
41 |
if user_task["final_answer"] == val["Final answer"]:
|
42 |
score = val["Total score"]
|
43 |
else:
|
44 |
-
for i, item in enumerate(val["score"]):
|
45 |
-
if user_task["score_answer"][i] in
|
46 |
score = item["score"]
|
47 |
return score
|
48 |
|
|
|
41 |
if user_task["final_answer"] == val["Final answer"]:
|
42 |
score = val["Total score"]
|
43 |
else:
|
44 |
+
for i, item in enumerate(val["score"]["question"]):
|
45 |
+
if user_task["score_answer"][i] in val["score"]["answer"][i] and val["score"]["score"][i] > score:
|
46 |
score = item["score"]
|
47 |
return score
|
48 |
|