Commit
Β·
444cb2e
1
Parent(s):
0030c6c
Add base LLM and modelling framework to submission metadata; update leaderboard columns
Browse files- src/config.py +1 -1
- src/hf_utils.py +29 -7
- src/ui.py +14 -7
src/config.py
CHANGED
@@ -7,7 +7,7 @@ DS_RESULTS_PATH = "results"
|
|
7 |
|
8 |
# leaderboard
|
9 |
# LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
|
10 |
-
LDB_COLS = ["Name", "Models Submitted (%)", "Accuracy (%)", "Runtime Errors (%)"]
|
11 |
|
12 |
|
13 |
# modelling frameworks
|
|
|
7 |
|
8 |
# leaderboard
|
9 |
# LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
|
10 |
+
LDB_COLS = ["Name", 'Modelling Framework', 'Base LLM', "Models Submitted (%)", "Accuracy (%)", "Runtime Errors (%)"]
|
11 |
|
12 |
|
13 |
# modelling frameworks
|
src/hf_utils.py
CHANGED
@@ -43,17 +43,38 @@ def load_leaderboard_data():
|
|
43 |
if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/")
|
44 |
]
|
45 |
|
|
|
|
|
|
|
|
|
|
|
46 |
# for file_path in summary_files:
|
47 |
for file_path in submissions:
|
48 |
dir_name = Path(file_path).parent.name
|
49 |
if dir_name in processed_result_dirs:
|
50 |
continue
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
processed_result_dirs.add(dir_name)
|
53 |
entry = {LDB_COLS[0]: dir_name,
|
54 |
-
LDB_COLS[1]:
|
55 |
-
LDB_COLS[2]:
|
56 |
-
LDB_COLS[3]: '*Calculating...*'
|
|
|
|
|
57 |
|
58 |
# check if summary file exists, otherwise skip
|
59 |
if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files:
|
@@ -72,11 +93,11 @@ def load_leaderboard_data():
|
|
72 |
with open(local_summary_path, "r", encoding="utf-8") as f:
|
73 |
for line in f:
|
74 |
if 'Error perc' in line:
|
75 |
-
entry[LDB_COLS[
|
76 |
if 'Final Solution Accuracy' in line:
|
77 |
-
entry[LDB_COLS[
|
78 |
if 'Submission coverage perc' in line:
|
79 |
-
entry[LDB_COLS[
|
80 |
os.remove(local_summary_path)
|
81 |
else:
|
82 |
print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
|
@@ -92,7 +113,7 @@ def load_leaderboard_data():
|
|
92 |
return pd.DataFrame(leaderboard_entries)
|
93 |
|
94 |
|
95 |
-
def upload_submission(uploaded_file, dir_name, report_file, model_framework):
|
96 |
"""Upload submission to Hugging Face Dataset."""
|
97 |
if not HF_API:
|
98 |
return False, "Hugging Face API not initialized"
|
@@ -119,6 +140,7 @@ def upload_submission(uploaded_file, dir_name, report_file, model_framework):
|
|
119 |
metadata = {
|
120 |
"submission_name": dir_name,
|
121 |
"modelling_framework": model_framework,
|
|
|
122 |
}
|
123 |
HF_API.upload_file(
|
124 |
path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')),
|
|
|
43 |
if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/")
|
44 |
]
|
45 |
|
46 |
+
metadata_files = [
|
47 |
+
f for f in repo_files
|
48 |
+
if f.endswith("metadata.json") and f.startswith(DS_SUBMISSIONS_PATH + "/")
|
49 |
+
]
|
50 |
+
|
51 |
# for file_path in summary_files:
|
52 |
for file_path in submissions:
|
53 |
dir_name = Path(file_path).parent.name
|
54 |
if dir_name in processed_result_dirs:
|
55 |
continue
|
56 |
|
57 |
+
# download metadata file of this submission
|
58 |
+
metadata_file = next((f for f in metadata_files if f.startswith(f"{DS_SUBMISSIONS_PATH}/{dir_name}/")), None)
|
59 |
+
if metadata_file:
|
60 |
+
local_metadata_path = hf_hub_download(
|
61 |
+
repo_id=DATASET_REPO_ID,
|
62 |
+
filename=metadata_file,
|
63 |
+
repo_type="dataset",
|
64 |
+
local_dir=os.path.join("local_hf_downloads", dir_name),
|
65 |
+
)
|
66 |
+
with open(local_metadata_path, "r", encoding="utf-8") as f:
|
67 |
+
metadata = json.load(f)
|
68 |
+
os.remove(local_metadata_path)
|
69 |
+
|
70 |
+
|
71 |
processed_result_dirs.add(dir_name)
|
72 |
entry = {LDB_COLS[0]: dir_name,
|
73 |
+
LDB_COLS[1]: metadata.get("modelling_framework", "Unknown"),
|
74 |
+
LDB_COLS[2]: metadata.get("base_llm", "Unknown"),
|
75 |
+
LDB_COLS[3]: '*Calculating...*',
|
76 |
+
LDB_COLS[4]: '*Calculating...*',
|
77 |
+
LDB_COLS[5]: '*Calculating...*'}
|
78 |
|
79 |
# check if summary file exists, otherwise skip
|
80 |
if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files:
|
|
|
93 |
with open(local_summary_path, "r", encoding="utf-8") as f:
|
94 |
for line in f:
|
95 |
if 'Error perc' in line:
|
96 |
+
entry[LDB_COLS[4]] = float(line.split(":")[1].strip().replace("%", ""))
|
97 |
if 'Final Solution Accuracy' in line:
|
98 |
+
entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
|
99 |
if 'Submission coverage perc' in line:
|
100 |
+
entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
|
101 |
os.remove(local_summary_path)
|
102 |
else:
|
103 |
print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
|
|
|
113 |
return pd.DataFrame(leaderboard_entries)
|
114 |
|
115 |
|
116 |
+
def upload_submission(uploaded_file, dir_name, report_file, model_framework, base_llm):
|
117 |
"""Upload submission to Hugging Face Dataset."""
|
118 |
if not HF_API:
|
119 |
return False, "Hugging Face API not initialized"
|
|
|
140 |
metadata = {
|
141 |
"submission_name": dir_name,
|
142 |
"modelling_framework": model_framework,
|
143 |
+
"base_llm": base_llm,
|
144 |
}
|
145 |
HF_API.upload_file(
|
146 |
path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')),
|
src/ui.py
CHANGED
@@ -8,7 +8,7 @@ from src.hf_utils import load_leaderboard_data, upload_submission, check_name_ex
|
|
8 |
from src.eval import start_background_evaluation
|
9 |
|
10 |
|
11 |
-
def handle_upload(submission_name, uploaded_file, report_file, model_framework, progress=gr.Progress()):
|
12 |
"""Handle file upload and start evaluation."""
|
13 |
if model_framework not in SUPPORTED_FRAMEWORKS:
|
14 |
return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
|
@@ -22,13 +22,14 @@ def handle_upload(submission_name, uploaded_file, report_file, model_framework,
|
|
22 |
# normalize the submission name
|
23 |
submission_name = submission_name.strip().replace(" ", "_").lower()
|
24 |
# keep only alphanumeric characters and underscores, restrict to 30 characters
|
25 |
-
submission_name = "".join(
|
26 |
-
c for c in submission_name if c.isalnum() or c == "_"
|
27 |
-
)[:30]
|
28 |
|
29 |
if not submission_name or submission_name.strip() == "":
|
30 |
return "Submission name is required"
|
31 |
|
|
|
|
|
|
|
32 |
if check_name_exists(submission_name):
|
33 |
return f"Submission name '{submission_name}' already exists. Please choose a different name."
|
34 |
|
@@ -50,7 +51,7 @@ def handle_upload(submission_name, uploaded_file, report_file, model_framework,
|
|
50 |
if not found_one:
|
51 |
return "Empty file. Please upload a valid JSONL file."
|
52 |
|
53 |
-
success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework)
|
54 |
if not success:
|
55 |
return f"Upload failed: {result}"
|
56 |
|
@@ -116,7 +117,7 @@ def create_ui():
|
|
116 |
label="Submission Name (required)",
|
117 |
placeholder="Enter a unique name for your submission",
|
118 |
interactive=True,
|
119 |
-
info="This name will appear on the leaderboard"
|
120 |
)
|
121 |
model_framework = gr.Dropdown(
|
122 |
label="Modelling Framework (required)",
|
@@ -128,6 +129,12 @@ def create_ui():
|
|
128 |
allow_custom_value=False,
|
129 |
filterable=False,
|
130 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
with gr.Row():
|
133 |
report_file = gr.File(
|
@@ -153,7 +160,7 @@ def create_ui():
|
|
153 |
# Event handlers
|
154 |
upload_button.click(
|
155 |
fn=handle_upload,
|
156 |
-
inputs=[submission_name, submission_file, report_file, model_framework],
|
157 |
outputs=[status_box],
|
158 |
show_progress="full",
|
159 |
)
|
|
|
8 |
from src.eval import start_background_evaluation
|
9 |
|
10 |
|
11 |
+
def handle_upload(submission_name, uploaded_file, report_file, model_framework, base_llm, progress=gr.Progress()):
|
12 |
"""Handle file upload and start evaluation."""
|
13 |
if model_framework not in SUPPORTED_FRAMEWORKS:
|
14 |
return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
|
|
|
22 |
# normalize the submission name
|
23 |
submission_name = submission_name.strip().replace(" ", "_").lower()
|
24 |
# keep only alphanumeric characters and underscores, restrict to 30 characters
|
25 |
+
submission_name = "".join(c for c in submission_name if c.isalnum() or c == "_")[:30]
|
|
|
|
|
26 |
|
27 |
if not submission_name or submission_name.strip() == "":
|
28 |
return "Submission name is required"
|
29 |
|
30 |
+
if not base_llm or base_llm.strip() == "":
|
31 |
+
return "Base LLM is required. Please specify the base language model used for generating the models."
|
32 |
+
|
33 |
if check_name_exists(submission_name):
|
34 |
return f"Submission name '{submission_name}' already exists. Please choose a different name."
|
35 |
|
|
|
51 |
if not found_one:
|
52 |
return "Empty file. Please upload a valid JSONL file."
|
53 |
|
54 |
+
success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework, base_llm)
|
55 |
if not success:
|
56 |
return f"Upload failed: {result}"
|
57 |
|
|
|
117 |
label="Submission Name (required)",
|
118 |
placeholder="Enter a unique name for your submission",
|
119 |
interactive=True,
|
120 |
+
info="This name will appear on the leaderboard. It is recommended that it represents the approach you used to generate the models (e.g. 'smart_prompting')",
|
121 |
)
|
122 |
model_framework = gr.Dropdown(
|
123 |
label="Modelling Framework (required)",
|
|
|
129 |
allow_custom_value=False,
|
130 |
filterable=False,
|
131 |
)
|
132 |
+
base_llm = gr.Textbox(
|
133 |
+
label="Base LLM (required)",
|
134 |
+
placeholder="Enter the base LLM used for generating the models (e.g., GPT-4, Llama-3.3)",
|
135 |
+
interactive=True,
|
136 |
+
info="The base LLM used for generating the models."
|
137 |
+
)
|
138 |
|
139 |
with gr.Row():
|
140 |
report_file = gr.File(
|
|
|
160 |
# Event handlers
|
161 |
upload_button.click(
|
162 |
fn=handle_upload,
|
163 |
+
inputs=[submission_name, submission_file, report_file, model_framework, base_llm],
|
164 |
outputs=[status_box],
|
165 |
show_progress="full",
|
166 |
)
|