Spaces:
Running
Running
mehran
commited on
Commit
·
0babe14
1
Parent(s):
352d5dc
remove comments
Browse files- about.py +1 -5
- app.py +12 -29
- leaderboard/leaderboard.py +10 -35
- submission.py +17 -42
about.py
CHANGED
@@ -84,8 +84,4 @@ def render_about():
|
|
84 |
MIZAN is a significant step towards the scientific and localized evaluation of language models for Persian, aiming to serve as a valuable assessment reference for researchers, developers, and anyone interested in practical language models.
|
85 |
""")
|
86 |
|
87 |
-
return about_page
|
88 |
-
|
89 |
-
# To test this function directly (if in a separate file):
|
90 |
-
# if __name__ == '__main__':
|
91 |
-
# render_about().launch()
|
|
|
84 |
MIZAN is a significant step towards the scientific and localized evaluation of language models for Persian, aiming to serve as a valuable assessment reference for researchers, developers, and anyone interested in practical language models.
|
85 |
""")
|
86 |
|
87 |
+
return about_page
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -2,16 +2,11 @@ import gradio as gr
|
|
2 |
from pathlib import Path
|
3 |
import logging
|
4 |
|
5 |
-
# Import LeaderboardApp from the correct location within the 'leaderboard' package
|
6 |
from leaderboard.leaderboard import LeaderboardApp
|
7 |
|
8 |
-
# Import UI rendering functions for other tabs
|
9 |
from about import render_about
|
10 |
from submission import render_submit
|
11 |
|
12 |
-
# --- Logging Setup (Optional but Recommended) ---
|
13 |
-
# You can centralize logging configuration here or ensure each module handles its own.
|
14 |
-
# For simplicity, if other modules already configure logging, this might not be strictly needed here.
|
15 |
logging.basicConfig(
|
16 |
level=logging.INFO,
|
17 |
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
@@ -24,27 +19,19 @@ def create_app():
|
|
24 |
"""
|
25 |
logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
|
26 |
|
27 |
-
# Define the path to the leaderboard's configuration file
|
28 |
-
# This assumes app.py is in the project root, and leaderboard_config.yaml is inside the 'leaderboard' directory.
|
29 |
config_file_path = Path("leaderboard/leaderboard_config.yaml")
|
30 |
|
31 |
if not config_file_path.exists():
|
32 |
logger.error(f"CRITICAL: Leaderboard configuration file not found at {config_file_path}. The application may not function correctly.")
|
33 |
-
# Optionally, you could raise an error here or return a Gradio interface indicating the error.
|
34 |
|
35 |
-
# Initialize the LeaderboardApp with the configuration path
|
36 |
leaderboard_processor = LeaderboardApp(config_path=config_file_path)
|
37 |
|
38 |
-
# Load and process data for the leaderboard
|
39 |
logger.info("Loading and processing leaderboard data...")
|
40 |
leaderboard_processor.load_data()
|
41 |
leaderboard_processor.handle_nulls_in_averages()
|
42 |
leaderboard_processor.generate_model_rankings()
|
43 |
-
# leaderboard_processor.apply_rankings_to_dataframes() # This might be redundant if generate_model_rankings covers it
|
44 |
-
# leaderboard_processor.format_dataframes()
|
45 |
logger.info("Leaderboard data processing complete.")
|
46 |
|
47 |
-
# Create the main Gradio interface using gr.Blocks
|
48 |
with gr.Blocks(title="MIZAN: A Persian LLM Leaderboard") as demo:
|
49 |
gr.Markdown("<h1 style='text-align: center; width: 100%; margin-bottom: 10px;'>🇮🇷 MIZAN: A Persian LLM Leaderboard</h1>")
|
50 |
gr.Markdown("""<p style='font-size: 1.1em; text-align: center; max-width: 800px; margin: 0 auto 20px auto;'>
|
@@ -57,31 +44,27 @@ def create_app():
|
|
57 |
with gr.Tabs():
|
58 |
with gr.TabItem("LLM Benchmark"):
|
59 |
logger.info("Creating 'LLM Benchmark' tab content...")
|
60 |
-
|
61 |
-
# The create_gradio_interface method of LeaderboardApp should return a gr.Blocks or gr.Interface instance
|
62 |
-
leaderboard_processor.create_gradio_interface() # This directly adds its components to the current gr.Blocks scope
|
63 |
logger.info("'LLM Benchmark' tab content created.")
|
64 |
|
65 |
-
with gr.TabItem("About MIZAN"):
|
66 |
-
logger.info("Creating 'About MIZAN' tab content...")
|
67 |
-
render_about()
|
68 |
-
logger.info("'About MIZAN' tab content created.")
|
69 |
|
70 |
with gr.TabItem("Request New Model"):
|
71 |
logger.info("Creating 'Submit Your Model' tab content...")
|
72 |
-
render_submit()
|
73 |
logger.info("'Submit Your Model' tab content created.")
|
74 |
|
75 |
-
logger.info("MIZAN: A Persian LLM Leaderboard application interface created.")
|
76 |
return demo
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
-
logger.info("Launching MIZAN: A Persian LLM Leaderboard application...")
|
80 |
-
pull_app = create_app()
|
81 |
pull_app.launch(
|
82 |
-
debug=True,
|
83 |
-
share=True
|
84 |
-
# server_name="0.0.0.0" # Uncomment to make accessible on your local network
|
85 |
)
|
86 |
-
logger.info("MIZAN: A Persian LLM Leaderboard application has been launched.")
|
87 |
-
# Ensure there are no hidden/invalid characters after this line. A single newline character is standard.
|
|
|
2 |
from pathlib import Path
|
3 |
import logging
|
4 |
|
|
|
5 |
from leaderboard.leaderboard import LeaderboardApp
|
6 |
|
|
|
7 |
from about import render_about
|
8 |
from submission import render_submit
|
9 |
|
|
|
|
|
|
|
10 |
logging.basicConfig(
|
11 |
level=logging.INFO,
|
12 |
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
|
|
19 |
"""
|
20 |
logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
|
21 |
|
|
|
|
|
22 |
config_file_path = Path("leaderboard/leaderboard_config.yaml")
|
23 |
|
24 |
if not config_file_path.exists():
|
25 |
logger.error(f"CRITICAL: Leaderboard configuration file not found at {config_file_path}. The application may not function correctly.")
|
|
|
26 |
|
|
|
27 |
leaderboard_processor = LeaderboardApp(config_path=config_file_path)
|
28 |
|
|
|
29 |
logger.info("Loading and processing leaderboard data...")
|
30 |
leaderboard_processor.load_data()
|
31 |
leaderboard_processor.handle_nulls_in_averages()
|
32 |
leaderboard_processor.generate_model_rankings()
|
|
|
|
|
33 |
logger.info("Leaderboard data processing complete.")
|
34 |
|
|
|
35 |
with gr.Blocks(title="MIZAN: A Persian LLM Leaderboard") as demo:
|
36 |
gr.Markdown("<h1 style='text-align: center; width: 100%; margin-bottom: 10px;'>🇮🇷 MIZAN: A Persian LLM Leaderboard</h1>")
|
37 |
gr.Markdown("""<p style='font-size: 1.1em; text-align: center; max-width: 800px; margin: 0 auto 20px auto;'>
|
|
|
44 |
with gr.Tabs():
|
45 |
with gr.TabItem("LLM Benchmark"):
|
46 |
logger.info("Creating 'LLM Benchmark' tab content...")
|
47 |
+
leaderboard_processor.create_gradio_interface()
|
|
|
|
|
48 |
logger.info("'LLM Benchmark' tab content created.")
|
49 |
|
50 |
+
with gr.TabItem("About MIZAN"):
|
51 |
+
logger.info("Creating 'About MIZAN' tab content...")
|
52 |
+
render_about()
|
53 |
+
logger.info("'About MIZAN' tab content created.")
|
54 |
|
55 |
with gr.TabItem("Request New Model"):
|
56 |
logger.info("Creating 'Submit Your Model' tab content...")
|
57 |
+
render_submit()
|
58 |
logger.info("'Submit Your Model' tab content created.")
|
59 |
|
60 |
+
logger.info("MIZAN: A Persian LLM Leaderboard application interface created.")
|
61 |
return demo
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
+
logger.info("Launching MIZAN: A Persian LLM Leaderboard application...")
|
65 |
+
pull_app = create_app()
|
66 |
pull_app.launch(
|
67 |
+
debug=True,
|
68 |
+
share=True
|
|
|
69 |
)
|
70 |
+
logger.info("MIZAN: A Persian LLM Leaderboard application has been launched.")
|
|
leaderboard/leaderboard.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# leaderboard/leaderboard.py
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import logging
|
@@ -7,14 +6,12 @@ import yaml
|
|
7 |
from typing import Dict, List, Union, Optional, Any
|
8 |
import numpy as np
|
9 |
|
10 |
-
# --- Logging Setup ---
|
11 |
logging.basicConfig(
|
12 |
level=logging.INFO,
|
13 |
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
14 |
)
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
-
# --- Path Definitions ---
|
18 |
LEADERBOARD_DIR = Path(__file__).resolve().parent
|
19 |
CONFIG_FILE_PATH = LEADERBOARD_DIR / "leaderboard_config.yaml"
|
20 |
DATA_DIR = LEADERBOARD_DIR / "boards_data"
|
@@ -87,7 +84,6 @@ class LeaderboardApp:
|
|
87 |
self._load_model_display_configs()
|
88 |
|
89 |
def _load_global_settings(self) -> None:
|
90 |
-
# ... (بدون تغییر نسبت به نسخه قبلی شما) ...
|
91 |
if self.config_path and self.config_path.exists():
|
92 |
try:
|
93 |
with open(self.config_path, 'r', encoding='utf-8') as f:
|
@@ -174,7 +170,6 @@ class LeaderboardApp:
|
|
174 |
return df_row[benchmark_cols].notna().sum()
|
175 |
|
176 |
def generate_model_rankings(self) -> None:
|
177 |
-
# ... (بدون تغییر - منطق فعلی به درستی مدلهای بدون میانگین را به پایین میبرد) ...
|
178 |
logger.info("Generating model rankings for each tab.")
|
179 |
if not self.model_identifier_column:
|
180 |
logger.error("`model_identifier_column` is not set. Cannot perform ranking.")
|
@@ -185,7 +180,7 @@ class LeaderboardApp:
|
|
185 |
main_score_col_for_tab = self.main_scores_map.get(task_key)
|
186 |
if not main_score_col_for_tab or main_score_col_for_tab not in ranked_df.columns:
|
187 |
logger.warning(f"No main score column for task '{task_key}'. Ranking skipped.")
|
188 |
-
ranked_df["Rank"] = pd.NA
|
189 |
self.raw_dataframes[task_key] = ranked_df
|
190 |
continue
|
191 |
ranked_df[main_score_col_for_tab] = pd.to_numeric(ranked_df[main_score_col_for_tab], errors='coerce')
|
@@ -194,7 +189,6 @@ class LeaderboardApp:
|
|
194 |
sort_by_cols = ['_has_main_score', '_sortable_main_score', self.model_identifier_column]
|
195 |
ascending_order = [False, False, True]
|
196 |
ranked_df = ranked_df.sort_values(by=sort_by_cols, ascending=ascending_order, na_position='last')
|
197 |
-
# Assign ranks only to rows that have a main score; others get NA
|
198 |
ranked_df["Rank"] = pd.NA
|
199 |
ranked_df.loc[ranked_df['_has_main_score'], "Rank"] = range(1, ranked_df['_has_main_score'].sum() + 1)
|
200 |
|
@@ -205,7 +199,6 @@ class LeaderboardApp:
|
|
205 |
|
206 |
@staticmethod
|
207 |
def _format_value_as_percentage(value: Any, score_cutoff_for_percentage: float = 0.0) -> Any:
|
208 |
-
# ... (بدون تغییر - این متد ممکن است جای دیگری استفاده شود) ...
|
209 |
if pd.isna(value) or not isinstance(value, (int, float)): return value
|
210 |
if value >= score_cutoff_for_percentage and 0 <= value <= 1.0: return f"{value * 100:.2f}%"
|
211 |
return f"{value:.2f}" if isinstance(value, float) else value
|
@@ -235,13 +228,11 @@ class LeaderboardApp:
|
|
235 |
formatted_df[col_name] = formatted_df[col_name].apply(self._format_parameters_count)
|
236 |
continue
|
237 |
|
238 |
-
if col_name == "Rank":
|
239 |
-
# Convert Rank to integer if possible, otherwise keep as is (e.g. for NA)
|
240 |
try:
|
241 |
-
# Attempt to convert to Int64 to handle pd.NA
|
242 |
formatted_df[col_name] = formatted_df[col_name].astype(pd.Int64Dtype())
|
243 |
except Exception:
|
244 |
-
pass
|
245 |
continue
|
246 |
|
247 |
|
@@ -260,29 +251,27 @@ class LeaderboardApp:
|
|
260 |
is_cell_numeric_type = False
|
261 |
|
262 |
if pd.isna(numeric_x):
|
263 |
-
new_col_values.append("")
|
264 |
continue
|
265 |
|
266 |
formatted_cell_value = original_value_for_cell
|
267 |
|
268 |
if is_cell_numeric_type:
|
269 |
-
if is_mt_bench_tab:
|
270 |
if isinstance(numeric_x, float):
|
271 |
formatted_cell_value = f"{numeric_x:.2f}"
|
272 |
else:
|
273 |
formatted_cell_value = numeric_x
|
274 |
-
else:
|
275 |
if isinstance(numeric_x, (int, float)) and 0 <= numeric_x <= 1.0:
|
276 |
val_multiplied = numeric_x * 100
|
277 |
-
# If original was 0 or 1 (resulting in 0 or 100), format as integer
|
278 |
if numeric_x == 1.0 or numeric_x == 0.0:
|
279 |
formatted_cell_value = f"{val_multiplied:.0f}" # "100" or "0"
|
280 |
else:
|
281 |
-
# Otherwise, format to 2 decimal places (e.g., 88.00, 75.50)
|
282 |
formatted_cell_value = f"{val_multiplied:.2f}"
|
283 |
elif isinstance(numeric_x, float):
|
284 |
formatted_cell_value = f"{numeric_x:.2f}"
|
285 |
-
else:
|
286 |
formatted_cell_value = numeric_x
|
287 |
|
288 |
new_col_values.append(formatted_cell_value)
|
@@ -290,7 +279,6 @@ class LeaderboardApp:
|
|
290 |
return formatted_df
|
291 |
|
292 |
def _apply_markdown_and_bolding(self, df_with_general_formats: pd.DataFrame) -> pd.DataFrame:
|
293 |
-
# ... (بدون تغییر نسبت به نسخه قبلی شما) ...
|
294 |
if df_with_general_formats.empty: return df_with_general_formats
|
295 |
formatted_df = df_with_general_formats.copy()
|
296 |
|
@@ -312,10 +300,6 @@ class LeaderboardApp:
|
|
312 |
if col_name_original in formatted_df.columns:
|
313 |
def to_numeric_for_max(val):
|
314 |
if isinstance(val, str):
|
315 |
-
# Percentage sign is no longer added, so no need to check for it here
|
316 |
-
# if val.endswith('%'):
|
317 |
-
# try: return float(val[:-1])
|
318 |
-
# except ValueError: return -np.inf
|
319 |
try: return float(val) # Handles "88.00", "75.50", "100", "0"
|
320 |
except ValueError: return -np.inf
|
321 |
return val if pd.notna(val) else -np.inf
|
@@ -324,9 +308,8 @@ class LeaderboardApp:
|
|
324 |
|
325 |
if not numeric_series_for_max.empty and numeric_series_for_max.notna().any() and \
|
326 |
pd.api.types.is_numeric_dtype(numeric_series_for_max) and not numeric_series_for_max.eq(-np.inf).all():
|
327 |
-
max_val_numeric = numeric_series_for_max.max(skipna=True)
|
328 |
if pd.notna(max_val_numeric) and max_val_numeric != -np.inf:
|
329 |
-
# Iterate using index to ensure correct .loc access
|
330 |
for i in numeric_series_for_max.index:
|
331 |
current_numeric_val = numeric_series_for_max.loc[i]
|
332 |
if pd.notna(current_numeric_val) and current_numeric_val == max_val_numeric:
|
@@ -340,7 +323,6 @@ class LeaderboardApp:
|
|
340 |
cell_content.strip().lower() in ["n/a", "", "unknown", "nan"]): # Standardize NA display
|
341 |
formatted_df.loc[i, col_name_original] = ""
|
342 |
return formatted_df
|
343 |
-
# ... (بقیه متدهای LeaderboardApp بدون تغییر باقی میمانند، از جمله _get_gr_datatypes, get_prepared_dataframe, make_update_fn_for_task_closure, _create_and_bind_dataframe_component, create_gradio_interface, run_standalone) ...
|
344 |
|
345 |
@staticmethod
|
346 |
def _get_gr_datatypes(df_with_original_cols: pd.DataFrame, model_id_col_original_name: str, score_cols_original_names: List[str]) -> List[str]:
|
@@ -353,13 +335,10 @@ class LeaderboardApp:
|
|
353 |
|
354 |
for col_name_original in df_with_original_cols.columns:
|
355 |
if col_name_original == "Rank":
|
356 |
-
datatypes.append("number")
|
357 |
elif col_name_original in markdown_cols_original_names:
|
358 |
datatypes.append("markdown")
|
359 |
else:
|
360 |
-
# Most other formatted cells become strings
|
361 |
-
# Checking the dtype of the formatted column can be more robust
|
362 |
-
# For now, default to str for non-markdown, non-rank
|
363 |
datatypes.append("str")
|
364 |
return datatypes
|
365 |
|
@@ -392,9 +371,7 @@ class LeaderboardApp:
|
|
392 |
|
393 |
if processed_df.empty: return pd.DataFrame()
|
394 |
|
395 |
-
# Apply cell formatting (this now includes the new number formatting rules)
|
396 |
processed_df = self._apply_general_formatting_to_cells(processed_df, task_key)
|
397 |
-
# Apply markdown and bolding
|
398 |
processed_df = self._apply_markdown_and_bolding(processed_df)
|
399 |
|
400 |
if self.columns_to_hide:
|
@@ -403,16 +380,14 @@ class LeaderboardApp:
|
|
403 |
processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
|
404 |
|
405 |
if "Rank" in processed_df.columns:
|
406 |
-
# Ensure Rank is first, if it exists
|
407 |
cols_order = ["Rank"] + [col for col in processed_df.columns if col != "Rank"]
|
408 |
processed_df = processed_df[cols_order]
|
409 |
|
410 |
-
# Convert Rank to string for display after all operations, to handle NA consistently with other strings
|
411 |
if "Rank" in processed_df.columns:
|
412 |
processed_df["Rank"] = processed_df["Rank"].apply(lambda x: str(int(x)) if pd.notna(x) and isinstance(x, (float,int)) and x == int(x) else (str(x) if pd.notna(x) else ""))
|
413 |
|
414 |
|
415 |
-
processed_df = processed_df.fillna("")
|
416 |
return processed_df
|
417 |
|
418 |
def make_update_fn_for_task_closure(self, task_key_for_df_data: str):
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import logging
|
|
|
6 |
from typing import Dict, List, Union, Optional, Any
|
7 |
import numpy as np
|
8 |
|
|
|
9 |
logging.basicConfig(
|
10 |
level=logging.INFO,
|
11 |
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
12 |
)
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
|
|
15 |
LEADERBOARD_DIR = Path(__file__).resolve().parent
|
16 |
CONFIG_FILE_PATH = LEADERBOARD_DIR / "leaderboard_config.yaml"
|
17 |
DATA_DIR = LEADERBOARD_DIR / "boards_data"
|
|
|
84 |
self._load_model_display_configs()
|
85 |
|
86 |
def _load_global_settings(self) -> None:
|
|
|
87 |
if self.config_path and self.config_path.exists():
|
88 |
try:
|
89 |
with open(self.config_path, 'r', encoding='utf-8') as f:
|
|
|
170 |
return df_row[benchmark_cols].notna().sum()
|
171 |
|
172 |
def generate_model_rankings(self) -> None:
|
|
|
173 |
logger.info("Generating model rankings for each tab.")
|
174 |
if not self.model_identifier_column:
|
175 |
logger.error("`model_identifier_column` is not set. Cannot perform ranking.")
|
|
|
180 |
main_score_col_for_tab = self.main_scores_map.get(task_key)
|
181 |
if not main_score_col_for_tab or main_score_col_for_tab not in ranked_df.columns:
|
182 |
logger.warning(f"No main score column for task '{task_key}'. Ranking skipped.")
|
183 |
+
ranked_df["Rank"] = pd.NA
|
184 |
self.raw_dataframes[task_key] = ranked_df
|
185 |
continue
|
186 |
ranked_df[main_score_col_for_tab] = pd.to_numeric(ranked_df[main_score_col_for_tab], errors='coerce')
|
|
|
189 |
sort_by_cols = ['_has_main_score', '_sortable_main_score', self.model_identifier_column]
|
190 |
ascending_order = [False, False, True]
|
191 |
ranked_df = ranked_df.sort_values(by=sort_by_cols, ascending=ascending_order, na_position='last')
|
|
|
192 |
ranked_df["Rank"] = pd.NA
|
193 |
ranked_df.loc[ranked_df['_has_main_score'], "Rank"] = range(1, ranked_df['_has_main_score'].sum() + 1)
|
194 |
|
|
|
199 |
|
200 |
@staticmethod
|
201 |
def _format_value_as_percentage(value: Any, score_cutoff_for_percentage: float = 0.0) -> Any:
|
|
|
202 |
if pd.isna(value) or not isinstance(value, (int, float)): return value
|
203 |
if value >= score_cutoff_for_percentage and 0 <= value <= 1.0: return f"{value * 100:.2f}%"
|
204 |
return f"{value:.2f}" if isinstance(value, float) else value
|
|
|
228 |
formatted_df[col_name] = formatted_df[col_name].apply(self._format_parameters_count)
|
229 |
continue
|
230 |
|
231 |
+
if col_name == "Rank":
|
|
|
232 |
try:
|
|
|
233 |
formatted_df[col_name] = formatted_df[col_name].astype(pd.Int64Dtype())
|
234 |
except Exception:
|
235 |
+
pass
|
236 |
continue
|
237 |
|
238 |
|
|
|
251 |
is_cell_numeric_type = False
|
252 |
|
253 |
if pd.isna(numeric_x):
|
254 |
+
new_col_values.append("")
|
255 |
continue
|
256 |
|
257 |
formatted_cell_value = original_value_for_cell
|
258 |
|
259 |
if is_cell_numeric_type:
|
260 |
+
if is_mt_bench_tab:
|
261 |
if isinstance(numeric_x, float):
|
262 |
formatted_cell_value = f"{numeric_x:.2f}"
|
263 |
else:
|
264 |
formatted_cell_value = numeric_x
|
265 |
+
else:
|
266 |
if isinstance(numeric_x, (int, float)) and 0 <= numeric_x <= 1.0:
|
267 |
val_multiplied = numeric_x * 100
|
|
|
268 |
if numeric_x == 1.0 or numeric_x == 0.0:
|
269 |
formatted_cell_value = f"{val_multiplied:.0f}" # "100" or "0"
|
270 |
else:
|
|
|
271 |
formatted_cell_value = f"{val_multiplied:.2f}"
|
272 |
elif isinstance(numeric_x, float):
|
273 |
formatted_cell_value = f"{numeric_x:.2f}"
|
274 |
+
else:
|
275 |
formatted_cell_value = numeric_x
|
276 |
|
277 |
new_col_values.append(formatted_cell_value)
|
|
|
279 |
return formatted_df
|
280 |
|
281 |
def _apply_markdown_and_bolding(self, df_with_general_formats: pd.DataFrame) -> pd.DataFrame:
|
|
|
282 |
if df_with_general_formats.empty: return df_with_general_formats
|
283 |
formatted_df = df_with_general_formats.copy()
|
284 |
|
|
|
300 |
if col_name_original in formatted_df.columns:
|
301 |
def to_numeric_for_max(val):
|
302 |
if isinstance(val, str):
|
|
|
|
|
|
|
|
|
303 |
try: return float(val) # Handles "88.00", "75.50", "100", "0"
|
304 |
except ValueError: return -np.inf
|
305 |
return val if pd.notna(val) else -np.inf
|
|
|
308 |
|
309 |
if not numeric_series_for_max.empty and numeric_series_for_max.notna().any() and \
|
310 |
pd.api.types.is_numeric_dtype(numeric_series_for_max) and not numeric_series_for_max.eq(-np.inf).all():
|
311 |
+
max_val_numeric = numeric_series_for_max.max(skipna=True)
|
312 |
if pd.notna(max_val_numeric) and max_val_numeric != -np.inf:
|
|
|
313 |
for i in numeric_series_for_max.index:
|
314 |
current_numeric_val = numeric_series_for_max.loc[i]
|
315 |
if pd.notna(current_numeric_val) and current_numeric_val == max_val_numeric:
|
|
|
323 |
cell_content.strip().lower() in ["n/a", "", "unknown", "nan"]): # Standardize NA display
|
324 |
formatted_df.loc[i, col_name_original] = ""
|
325 |
return formatted_df
|
|
|
326 |
|
327 |
@staticmethod
|
328 |
def _get_gr_datatypes(df_with_original_cols: pd.DataFrame, model_id_col_original_name: str, score_cols_original_names: List[str]) -> List[str]:
|
|
|
335 |
|
336 |
for col_name_original in df_with_original_cols.columns:
|
337 |
if col_name_original == "Rank":
|
338 |
+
datatypes.append("number")
|
339 |
elif col_name_original in markdown_cols_original_names:
|
340 |
datatypes.append("markdown")
|
341 |
else:
|
|
|
|
|
|
|
342 |
datatypes.append("str")
|
343 |
return datatypes
|
344 |
|
|
|
371 |
|
372 |
if processed_df.empty: return pd.DataFrame()
|
373 |
|
|
|
374 |
processed_df = self._apply_general_formatting_to_cells(processed_df, task_key)
|
|
|
375 |
processed_df = self._apply_markdown_and_bolding(processed_df)
|
376 |
|
377 |
if self.columns_to_hide:
|
|
|
380 |
processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
|
381 |
|
382 |
if "Rank" in processed_df.columns:
|
|
|
383 |
cols_order = ["Rank"] + [col for col in processed_df.columns if col != "Rank"]
|
384 |
processed_df = processed_df[cols_order]
|
385 |
|
|
|
386 |
if "Rank" in processed_df.columns:
|
387 |
processed_df["Rank"] = processed_df["Rank"].apply(lambda x: str(int(x)) if pd.notna(x) and isinstance(x, (float,int)) and x == int(x) else (str(x) if pd.notna(x) else ""))
|
388 |
|
389 |
|
390 |
+
processed_df = processed_df.fillna("")
|
391 |
return processed_df
|
392 |
|
393 |
def make_update_fn_for_task_closure(self, task_key_for_df_data: str):
|
submission.py
CHANGED
@@ -4,27 +4,23 @@ import os
|
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
import pandas as pd
|
7 |
-
import io
|
8 |
import logging
|
9 |
|
10 |
from huggingface_hub import HfApi, HfFolder, hf_hub_download
|
11 |
-
from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError
|
12 |
|
13 |
-
# --- Logging Setup ---
|
14 |
-
# (Add this if not already present, or integrate with a central logging config)
|
15 |
logging.basicConfig(
|
16 |
level=logging.INFO,
|
17 |
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
18 |
)
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
21 |
-
# --- Hugging Face Hub Configuration ---
|
22 |
-
# IMPORTANT: Replace with your actual repository details
|
23 |
-
TARGET_REPO_ID = "MCINext/submitted-models" # e.g., "MehranS/MIZAN_submissions" # Suggested change for consistency
|
24 |
-
TARGET_REPO_TYPE = "dataset" # Recommended type for storing data
|
25 |
-
FILENAME_IN_REPO = "model_submissions.csv" # The name of the CSV file within the Hub repository
|
26 |
|
27 |
-
|
|
|
|
|
|
|
28 |
CSV_HEADER = [
|
29 |
'timestamp', 'model_name', 'base_model', 'revision',
|
30 |
'precision', 'weight_type', 'model_type', 'status', 'submission_type'
|
@@ -32,10 +28,10 @@ CSV_HEADER = [
|
|
32 |
|
33 |
def get_hf_token() -> str | None:
|
34 |
"""Retrieves the Hugging Face token from environment variables or HfFolder."""
|
35 |
-
token = os.environ.get("HF_TOKEN")
|
36 |
if not token:
|
37 |
try:
|
38 |
-
token = HfFolder.get_token()
|
39 |
except Exception:
|
40 |
logger.warning("Hugging Face token not found in HfFolder and HF_TOKEN env var is not set.")
|
41 |
token = None
|
@@ -60,54 +56,48 @@ def add_new_eval_hf_to_hub(model_name_hf_id: str, revision_hf: str) -> gr.Markdo
|
|
60 |
submission_data = {
|
61 |
'timestamp': timestamp,
|
62 |
'model_name': model_name_hf_id.strip(),
|
63 |
-
'base_model': 'N/A',
|
64 |
'revision': revision_hf.strip() if revision_hf else 'main',
|
65 |
'precision': 'To be fetched/determined',
|
66 |
'weight_type': 'To be fetched/determined',
|
67 |
'model_type': 'To be fetched/determined',
|
68 |
-
'status': 'pending_hub_submission',
|
69 |
-
'submission_type': 'huggingface_simple_form_to_hub'
|
70 |
}
|
71 |
|
72 |
try:
|
73 |
-
# 1. Attempt to download the existing CSV from the Hub
|
74 |
try:
|
75 |
local_download_path = hf_hub_download(
|
76 |
repo_id=TARGET_REPO_ID,
|
77 |
filename=FILENAME_IN_REPO,
|
78 |
repo_type=TARGET_REPO_TYPE,
|
79 |
token=token,
|
80 |
-
# force_download=True,
|
81 |
)
|
82 |
-
# Read the downloaded CSV into a pandas DataFrame
|
83 |
df = pd.read_csv(local_download_path)
|
84 |
-
# Ensure columns match CSV_HEADER, add missing ones with NaN if necessary
|
85 |
for col in CSV_HEADER:
|
86 |
if col not in df.columns:
|
87 |
df[col] = pd.NA
|
88 |
-
df = df[CSV_HEADER]
|
89 |
file_exists_on_hub = True
|
90 |
logger.info(f"Successfully downloaded existing '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}'.")
|
91 |
except EntryNotFoundError:
|
92 |
logger.info(f"'{FILENAME_IN_REPO}' not found in '{TARGET_REPO_ID}'. A new file will be created.")
|
93 |
-
df = pd.DataFrame(columns=CSV_HEADER)
|
94 |
file_exists_on_hub = False
|
95 |
except HfHubHTTPError as e:
|
96 |
logger.error(f"HTTP error downloading '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}': {e.status_code} - {e.hf_raise}")
|
97 |
error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Hub Error:** Could not access the repository '{TARGET_REPO_ID}'. (HTTP {e.status_code}). Please check token permissions and repository ID.</div>"
|
98 |
return gr.Markdown(error_html)
|
99 |
|
100 |
-
# 2. Append the new submission data
|
101 |
new_row_df = pd.DataFrame([submission_data])
|
102 |
df = pd.concat([df, new_row_df], ignore_index=True)
|
103 |
|
104 |
-
# 3. Convert the DataFrame back to CSV in-memory
|
105 |
csv_buffer = io.StringIO()
|
106 |
-
df.to_csv(csv_buffer, index=False, header=True)
|
107 |
csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
|
108 |
csv_buffer.close()
|
109 |
|
110 |
-
# 4. Upload the updated CSV content to the Hub
|
111 |
commit_message = f"Add submission: {submission_data['model_name']} (rev: {submission_data['revision']})"
|
112 |
if not file_exists_on_hub:
|
113 |
commit_message = f"Create '{FILENAME_IN_REPO}' and add first submission: {submission_data['model_name']}"
|
@@ -135,7 +125,6 @@ def add_new_eval_hf_to_hub(model_name_hf_id: str, revision_hf: str) -> gr.Markdo
|
|
135 |
|
136 |
|
137 |
def render_submit():
|
138 |
-
# Text for Introduction and Option 1 (Hugging Face Form)
|
139 |
intro_and_option1_guidance = """
|
140 |
# Request Model Evaluation for MIZAN
|
141 |
|
@@ -149,7 +138,6 @@ Please choose the submission path that best fits how your model can be accessed
|
|
149 |
If your model and its tokenizer can be loaded directly using their Hugging Face identifier (e.g., `username/model_name`), you can use the simplified form below to submit its key identifiers. Your submission will be added to our central tracking repository on the Hugging Face Hub. Our team will attempt to gather other necessary details from the Hub.
|
150 |
"""
|
151 |
|
152 |
-
# Text for Option 2 (Email Submission)
|
153 |
option2_email_guidance = """
|
154 |
---
|
155 |
|
@@ -183,7 +171,7 @@ Our team will review your email and work with you to facilitate the evaluation p
|
|
183 |
submission_result_hf_form = gr.Markdown()
|
184 |
|
185 |
request_hf_button.click(
|
186 |
-
fn=add_new_eval_hf_to_hub,
|
187 |
inputs=[
|
188 |
model_name_textbox_hf,
|
189 |
revision_name_textbox_hf,
|
@@ -193,17 +181,4 @@ Our team will review your email and work with you to facilitate the evaluation p
|
|
193 |
|
194 |
gr.Markdown(option2_email_guidance)
|
195 |
|
196 |
-
return submit_tab_interface
|
197 |
-
|
198 |
-
# For direct testing of this file:
|
199 |
-
if __name__ == '__main__':
|
200 |
-
# You would need to set TARGET_REPO_ID and have a valid HF_TOKEN env var or be logged in.
|
201 |
-
# Example: os.environ["HF_TOKEN"] = "your_hf_write_token"
|
202 |
-
# TARGET_REPO_ID = "your-user/your-test-dataset" # Make sure this repo exists
|
203 |
-
|
204 |
-
if not TARGET_REPO_ID.startswith("YOUR_"): # Basic check to prevent running with placeholder
|
205 |
-
print(f"Testing submission to Hub. Target repo: {TARGET_REPO_ID}")
|
206 |
-
test_interface = render_submit()
|
207 |
-
test_interface.launch(debug=True)
|
208 |
-
else:
|
209 |
-
print("Please update TARGET_REPO_ID in submission.py before running this test.")
|
|
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
import pandas as pd
|
7 |
+
import io
|
8 |
import logging
|
9 |
|
10 |
from huggingface_hub import HfApi, HfFolder, hf_hub_download
|
11 |
+
from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError
|
12 |
|
|
|
|
|
13 |
logging.basicConfig(
|
14 |
level=logging.INFO,
|
15 |
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
16 |
)
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
TARGET_REPO_ID = "MCINext/submitted-models"
|
21 |
+
TARGET_REPO_TYPE = "dataset"
|
22 |
+
FILENAME_IN_REPO = "model_submissions.csv"
|
23 |
+
|
24 |
CSV_HEADER = [
|
25 |
'timestamp', 'model_name', 'base_model', 'revision',
|
26 |
'precision', 'weight_type', 'model_type', 'status', 'submission_type'
|
|
|
28 |
|
29 |
def get_hf_token() -> str | None:
|
30 |
"""Retrieves the Hugging Face token from environment variables or HfFolder."""
|
31 |
+
token = os.environ.get("HF_TOKEN")
|
32 |
if not token:
|
33 |
try:
|
34 |
+
token = HfFolder.get_token()
|
35 |
except Exception:
|
36 |
logger.warning("Hugging Face token not found in HfFolder and HF_TOKEN env var is not set.")
|
37 |
token = None
|
|
|
56 |
submission_data = {
|
57 |
'timestamp': timestamp,
|
58 |
'model_name': model_name_hf_id.strip(),
|
59 |
+
'base_model': 'N/A',
|
60 |
'revision': revision_hf.strip() if revision_hf else 'main',
|
61 |
'precision': 'To be fetched/determined',
|
62 |
'weight_type': 'To be fetched/determined',
|
63 |
'model_type': 'To be fetched/determined',
|
64 |
+
'status': 'pending_hub_submission',
|
65 |
+
'submission_type': 'huggingface_simple_form_to_hub'
|
66 |
}
|
67 |
|
68 |
try:
|
|
|
69 |
try:
|
70 |
local_download_path = hf_hub_download(
|
71 |
repo_id=TARGET_REPO_ID,
|
72 |
filename=FILENAME_IN_REPO,
|
73 |
repo_type=TARGET_REPO_TYPE,
|
74 |
token=token,
|
75 |
+
# force_download=True,
|
76 |
)
|
|
|
77 |
df = pd.read_csv(local_download_path)
|
|
|
78 |
for col in CSV_HEADER:
|
79 |
if col not in df.columns:
|
80 |
df[col] = pd.NA
|
81 |
+
df = df[CSV_HEADER]
|
82 |
file_exists_on_hub = True
|
83 |
logger.info(f"Successfully downloaded existing '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}'.")
|
84 |
except EntryNotFoundError:
|
85 |
logger.info(f"'{FILENAME_IN_REPO}' not found in '{TARGET_REPO_ID}'. A new file will be created.")
|
86 |
+
df = pd.DataFrame(columns=CSV_HEADER)
|
87 |
file_exists_on_hub = False
|
88 |
except HfHubHTTPError as e:
|
89 |
logger.error(f"HTTP error downloading '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}': {e.status_code} - {e.hf_raise}")
|
90 |
error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Hub Error:** Could not access the repository '{TARGET_REPO_ID}'. (HTTP {e.status_code}). Please check token permissions and repository ID.</div>"
|
91 |
return gr.Markdown(error_html)
|
92 |
|
|
|
93 |
new_row_df = pd.DataFrame([submission_data])
|
94 |
df = pd.concat([df, new_row_df], ignore_index=True)
|
95 |
|
|
|
96 |
csv_buffer = io.StringIO()
|
97 |
+
df.to_csv(csv_buffer, index=False, header=True)
|
98 |
csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
|
99 |
csv_buffer.close()
|
100 |
|
|
|
101 |
commit_message = f"Add submission: {submission_data['model_name']} (rev: {submission_data['revision']})"
|
102 |
if not file_exists_on_hub:
|
103 |
commit_message = f"Create '{FILENAME_IN_REPO}' and add first submission: {submission_data['model_name']}"
|
|
|
125 |
|
126 |
|
127 |
def render_submit():
|
|
|
128 |
intro_and_option1_guidance = """
|
129 |
# Request Model Evaluation for MIZAN
|
130 |
|
|
|
138 |
If your model and its tokenizer can be loaded directly using their Hugging Face identifier (e.g., `username/model_name`), you can use the simplified form below to submit its key identifiers. Your submission will be added to our central tracking repository on the Hugging Face Hub. Our team will attempt to gather other necessary details from the Hub.
|
139 |
"""
|
140 |
|
|
|
141 |
option2_email_guidance = """
|
142 |
---
|
143 |
|
|
|
171 |
submission_result_hf_form = gr.Markdown()
|
172 |
|
173 |
request_hf_button.click(
|
174 |
+
fn=add_new_eval_hf_to_hub,
|
175 |
inputs=[
|
176 |
model_name_textbox_hf,
|
177 |
revision_name_textbox_hf,
|
|
|
181 |
|
182 |
gr.Markdown(option2_email_guidance)
|
183 |
|
184 |
+
return submit_tab_interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|