kostis-init commited on
Commit
444cb2e
Β·
1 Parent(s): 0030c6c

Add base LLM and modelling framework to submission metadata; update leaderboard columns

Browse files
Files changed (3) hide show
  1. src/config.py +1 -1
  2. src/hf_utils.py +29 -7
  3. src/ui.py +14 -7
src/config.py CHANGED
@@ -7,7 +7,7 @@ DS_RESULTS_PATH = "results"
7
 
8
  # leaderboard
9
  # LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
10
- LDB_COLS = ["Name", "Models Submitted (%)", "Accuracy (%)", "Runtime Errors (%)"]
11
 
12
 
13
  # modelling frameworks
 
7
 
8
  # leaderboard
9
  # LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
10
+ LDB_COLS = ["Name", 'Modelling Framework', 'Base LLM', "Models Submitted (%)", "Accuracy (%)", "Runtime Errors (%)"]
11
 
12
 
13
  # modelling frameworks
src/hf_utils.py CHANGED
@@ -43,17 +43,38 @@ def load_leaderboard_data():
43
  if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/")
44
  ]
45
 
 
 
 
 
 
46
  # for file_path in summary_files:
47
  for file_path in submissions:
48
  dir_name = Path(file_path).parent.name
49
  if dir_name in processed_result_dirs:
50
  continue
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  processed_result_dirs.add(dir_name)
53
  entry = {LDB_COLS[0]: dir_name,
54
- LDB_COLS[1]: '*Calculating...*',
55
- LDB_COLS[2]: '*Calculating...*',
56
- LDB_COLS[3]: '*Calculating...*'}
 
 
57
 
58
  # check if summary file exists, otherwise skip
59
  if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files:
@@ -72,11 +93,11 @@ def load_leaderboard_data():
72
  with open(local_summary_path, "r", encoding="utf-8") as f:
73
  for line in f:
74
  if 'Error perc' in line:
75
- entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
76
  if 'Final Solution Accuracy' in line:
77
- entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
78
  if 'Submission coverage perc' in line:
79
- entry[LDB_COLS[1]] = float(line.split(":")[1].strip().replace("%", ""))
80
  os.remove(local_summary_path)
81
  else:
82
  print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
@@ -92,7 +113,7 @@ def load_leaderboard_data():
92
  return pd.DataFrame(leaderboard_entries)
93
 
94
 
95
- def upload_submission(uploaded_file, dir_name, report_file, model_framework):
96
  """Upload submission to Hugging Face Dataset."""
97
  if not HF_API:
98
  return False, "Hugging Face API not initialized"
@@ -119,6 +140,7 @@ def upload_submission(uploaded_file, dir_name, report_file, model_framework):
119
  metadata = {
120
  "submission_name": dir_name,
121
  "modelling_framework": model_framework,
 
122
  }
123
  HF_API.upload_file(
124
  path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')),
 
43
  if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/")
44
  ]
45
 
46
+ metadata_files = [
47
+ f for f in repo_files
48
+ if f.endswith("metadata.json") and f.startswith(DS_SUBMISSIONS_PATH + "/")
49
+ ]
50
+
51
  # for file_path in summary_files:
52
  for file_path in submissions:
53
  dir_name = Path(file_path).parent.name
54
  if dir_name in processed_result_dirs:
55
  continue
56
 
57
+ # download metadata file of this submission
58
+ metadata_file = next((f for f in metadata_files if f.startswith(f"{DS_SUBMISSIONS_PATH}/{dir_name}/")), None)
59
+ if metadata_file:
60
+ local_metadata_path = hf_hub_download(
61
+ repo_id=DATASET_REPO_ID,
62
+ filename=metadata_file,
63
+ repo_type="dataset",
64
+ local_dir=os.path.join("local_hf_downloads", dir_name),
65
+ )
66
+ with open(local_metadata_path, "r", encoding="utf-8") as f:
67
+ metadata = json.load(f)
68
+ os.remove(local_metadata_path)
69
+
70
+
71
  processed_result_dirs.add(dir_name)
72
  entry = {LDB_COLS[0]: dir_name,
73
+ LDB_COLS[1]: metadata.get("modelling_framework", "Unknown"),
74
+ LDB_COLS[2]: metadata.get("base_llm", "Unknown"),
75
+ LDB_COLS[3]: '*Calculating...*',
76
+ LDB_COLS[4]: '*Calculating...*',
77
+ LDB_COLS[5]: '*Calculating...*'}
78
 
79
  # check if summary file exists, otherwise skip
80
  if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files:
 
93
  with open(local_summary_path, "r", encoding="utf-8") as f:
94
  for line in f:
95
  if 'Error perc' in line:
96
+ entry[LDB_COLS[4]] = float(line.split(":")[1].strip().replace("%", ""))
97
  if 'Final Solution Accuracy' in line:
98
+ entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
99
  if 'Submission coverage perc' in line:
100
+ entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
101
  os.remove(local_summary_path)
102
  else:
103
  print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
 
113
  return pd.DataFrame(leaderboard_entries)
114
 
115
 
116
+ def upload_submission(uploaded_file, dir_name, report_file, model_framework, base_llm):
117
  """Upload submission to Hugging Face Dataset."""
118
  if not HF_API:
119
  return False, "Hugging Face API not initialized"
 
140
  metadata = {
141
  "submission_name": dir_name,
142
  "modelling_framework": model_framework,
143
+ "base_llm": base_llm,
144
  }
145
  HF_API.upload_file(
146
  path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')),
src/ui.py CHANGED
@@ -8,7 +8,7 @@ from src.hf_utils import load_leaderboard_data, upload_submission, check_name_ex
8
  from src.eval import start_background_evaluation
9
 
10
 
11
- def handle_upload(submission_name, uploaded_file, report_file, model_framework, progress=gr.Progress()):
12
  """Handle file upload and start evaluation."""
13
  if model_framework not in SUPPORTED_FRAMEWORKS:
14
  return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
@@ -22,13 +22,14 @@ def handle_upload(submission_name, uploaded_file, report_file, model_framework,
22
  # normalize the submission name
23
  submission_name = submission_name.strip().replace(" ", "_").lower()
24
  # keep only alphanumeric characters and underscores, restrict to 30 characters
25
- submission_name = "".join(
26
- c for c in submission_name if c.isalnum() or c == "_"
27
- )[:30]
28
 
29
  if not submission_name or submission_name.strip() == "":
30
  return "Submission name is required"
31
 
 
 
 
32
  if check_name_exists(submission_name):
33
  return f"Submission name '{submission_name}' already exists. Please choose a different name."
34
 
@@ -50,7 +51,7 @@ def handle_upload(submission_name, uploaded_file, report_file, model_framework,
50
  if not found_one:
51
  return "Empty file. Please upload a valid JSONL file."
52
 
53
- success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework)
54
  if not success:
55
  return f"Upload failed: {result}"
56
 
@@ -116,7 +117,7 @@ def create_ui():
116
  label="Submission Name (required)",
117
  placeholder="Enter a unique name for your submission",
118
  interactive=True,
119
- info="This name will appear on the leaderboard"
120
  )
121
  model_framework = gr.Dropdown(
122
  label="Modelling Framework (required)",
@@ -128,6 +129,12 @@ def create_ui():
128
  allow_custom_value=False,
129
  filterable=False,
130
  )
 
 
 
 
 
 
131
 
132
  with gr.Row():
133
  report_file = gr.File(
@@ -153,7 +160,7 @@ def create_ui():
153
  # Event handlers
154
  upload_button.click(
155
  fn=handle_upload,
156
- inputs=[submission_name, submission_file, report_file, model_framework],
157
  outputs=[status_box],
158
  show_progress="full",
159
  )
 
8
  from src.eval import start_background_evaluation
9
 
10
 
11
+ def handle_upload(submission_name, uploaded_file, report_file, model_framework, base_llm, progress=gr.Progress()):
12
  """Handle file upload and start evaluation."""
13
  if model_framework not in SUPPORTED_FRAMEWORKS:
14
  return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
 
22
  # normalize the submission name
23
  submission_name = submission_name.strip().replace(" ", "_").lower()
24
  # keep only alphanumeric characters and underscores, restrict to 30 characters
25
+ submission_name = "".join(c for c in submission_name if c.isalnum() or c == "_")[:30]
 
 
26
 
27
  if not submission_name or submission_name.strip() == "":
28
  return "Submission name is required"
29
 
30
+ if not base_llm or base_llm.strip() == "":
31
+ return "Base LLM is required. Please specify the base language model used for generating the models."
32
+
33
  if check_name_exists(submission_name):
34
  return f"Submission name '{submission_name}' already exists. Please choose a different name."
35
 
 
51
  if not found_one:
52
  return "Empty file. Please upload a valid JSONL file."
53
 
54
+ success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework, base_llm)
55
  if not success:
56
  return f"Upload failed: {result}"
57
 
 
117
  label="Submission Name (required)",
118
  placeholder="Enter a unique name for your submission",
119
  interactive=True,
120
+ info="This name will appear on the leaderboard. It is recommended that it represents the approach you used to generate the models (e.g. 'smart_prompting')",
121
  )
122
  model_framework = gr.Dropdown(
123
  label="Modelling Framework (required)",
 
129
  allow_custom_value=False,
130
  filterable=False,
131
  )
132
+ base_llm = gr.Textbox(
133
+ label="Base LLM (required)",
134
+ placeholder="Enter the base LLM used for generating the models (e.g., GPT-4, Llama-3.3)",
135
+ interactive=True,
136
+ info="The base LLM used for generating the models."
137
+ )
138
 
139
  with gr.Row():
140
  report_file = gr.File(
 
160
  # Event handlers
161
  upload_button.click(
162
  fn=handle_upload,
163
+ inputs=[submission_name, submission_file, report_file, model_framework, base_llm],
164
  outputs=[status_box],
165
  show_progress="full",
166
  )