Spaces:

MCINext
/

mizan-llm-leaderboard

Running

App Files Files Community

mehran commited on 20 days ago

Commit

0babe14

1 Parent(s): 352d5dc

remove comments

Browse files

Files changed (4) hide show

about.py +1 -5
app.py +12 -29
leaderboard/leaderboard.py +10 -35
submission.py +17 -42

about.py CHANGED Viewed

@@ -84,8 +84,4 @@ def render_about():
         MIZAN is a significant step towards the scientific and localized evaluation of language models for Persian, aiming to serve as a valuable assessment reference for researchers, developers, and anyone interested in practical language models.
         """)
-    return about_page
-# To test this function directly (if in a separate file):
-# if __name__ == '__main__':
-#     render_about().launch()

         MIZAN is a significant step towards the scientific and localized evaluation of language models for Persian, aiming to serve as a valuable assessment reference for researchers, developers, and anyone interested in practical language models.
         """)
+    return about_page

app.py CHANGED Viewed

@@ -2,16 +2,11 @@ import gradio as gr
 from pathlib import Path
 import logging
-# Import LeaderboardApp from the correct location within the 'leaderboard' package
 from leaderboard.leaderboard import LeaderboardApp
-# Import UI rendering functions for other tabs
 from about import render_about
 from submission import render_submit
-# --- Logging Setup (Optional but Recommended) ---
-# You can centralize logging configuration here or ensure each module handles its own.
-# For simplicity, if other modules already configure logging, this might not be strictly needed here.
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
@@ -24,27 +19,19 @@ def create_app():
     """
     logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
-    # Define the path to the leaderboard's configuration file
-    # This assumes app.py is in the project root, and leaderboard_config.yaml is inside the 'leaderboard' directory.
     config_file_path = Path("leaderboard/leaderboard_config.yaml")
     if not config_file_path.exists():
         logger.error(f"CRITICAL: Leaderboard configuration file not found at {config_file_path}. The application may not function correctly.")
-        # Optionally, you could raise an error here or return a Gradio interface indicating the error.
-    # Initialize the LeaderboardApp with the configuration path
     leaderboard_processor = LeaderboardApp(config_path=config_file_path)
-    # Load and process data for the leaderboard
     logger.info("Loading and processing leaderboard data...")
     leaderboard_processor.load_data()
     leaderboard_processor.handle_nulls_in_averages()
     leaderboard_processor.generate_model_rankings()
-    # leaderboard_processor.apply_rankings_to_dataframes() # This might be redundant if generate_model_rankings covers it
-    # leaderboard_processor.format_dataframes()
     logger.info("Leaderboard data processing complete.")
-    # Create the main Gradio interface using gr.Blocks
     with gr.Blocks(title="MIZAN: A Persian LLM Leaderboard") as demo:
         gr.Markdown("<h1 style='text-align: center; width: 100%; margin-bottom: 10px;'>🇮🇷 MIZAN: A Persian LLM Leaderboard</h1>")
         gr.Markdown("""<p style='font-size: 1.1em; text-align: center; max-width: 800px; margin: 0 auto 20px auto;'>
@@ -57,31 +44,27 @@ def create_app():
         with gr.Tabs():
             with gr.TabItem("LLM Benchmark"):
                 logger.info("Creating 'LLM Benchmark' tab content...")
-                # Embed the leaderboard interface generated by LeaderboardApp
-                # The create_gradio_interface method of LeaderboardApp should return a gr.Blocks or gr.Interface instance
-                leaderboard_processor.create_gradio_interface() # This directly adds its components to the current gr.Blocks scope
                 logger.info("'LLM Benchmark' tab content created.")
-            with gr.TabItem("About MIZAN"): # Changed from "About PULL"
-                logger.info("Creating 'About MIZAN' tab content...") # Changed from "About PULL"
-                render_about() # Call the function that returns the 'About' page Blocks
-                logger.info("'About MIZAN' tab content created.") # Changed from "About PULL"
             with gr.TabItem("Request New Model"):
                 logger.info("Creating 'Submit Your Model' tab content...")
-                render_submit() # Call the function that returns the 'Submit' page Blocks
                 logger.info("'Submit Your Model' tab content created.")
-    logger.info("MIZAN: A Persian LLM Leaderboard application interface created.") # Changed from "PULL Leaderboard"
     return demo
 if __name__ == "__main__":
-    logger.info("Launching MIZAN: A Persian LLM Leaderboard application...") # Changed from "PULL Leaderboard"
-    pull_app = create_app() # Variable name 'pull_app' kept as is, but can be changed if desired e.g., to 'mizan_app'
     pull_app.launch(
-        debug=True, # Enable Gradio debug mode for more detailed error messages in development
-        share=True # Uncomment to create a public link (useful for temporary sharing)
-        # server_name="0.0.0.0" # Uncomment to make accessible on your local network
     )
-    logger.info("MIZAN: A Persian LLM Leaderboard application has been launched.") # Changed from "PULL Leaderboard"
-# Ensure there are no hidden/invalid characters after this line. A single newline character is standard.

 from pathlib import Path
 import logging
 from leaderboard.leaderboard import LeaderboardApp
 from about import render_about
 from submission import render_submit
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
     """
     logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
     config_file_path = Path("leaderboard/leaderboard_config.yaml")
     if not config_file_path.exists():
         logger.error(f"CRITICAL: Leaderboard configuration file not found at {config_file_path}. The application may not function correctly.")
     leaderboard_processor = LeaderboardApp(config_path=config_file_path)
     logger.info("Loading and processing leaderboard data...")
     leaderboard_processor.load_data()
     leaderboard_processor.handle_nulls_in_averages()
     leaderboard_processor.generate_model_rankings()
     logger.info("Leaderboard data processing complete.")
     with gr.Blocks(title="MIZAN: A Persian LLM Leaderboard") as demo:
         gr.Markdown("<h1 style='text-align: center; width: 100%; margin-bottom: 10px;'>🇮🇷 MIZAN: A Persian LLM Leaderboard</h1>")
         gr.Markdown("""<p style='font-size: 1.1em; text-align: center; max-width: 800px; margin: 0 auto 20px auto;'>
         with gr.Tabs():
             with gr.TabItem("LLM Benchmark"):
                 logger.info("Creating 'LLM Benchmark' tab content...")
+                leaderboard_processor.create_gradio_interface()
                 logger.info("'LLM Benchmark' tab content created.")
+            with gr.TabItem("About MIZAN"):
+                logger.info("Creating 'About MIZAN' tab content...")
+                render_about()
+                logger.info("'About MIZAN' tab content created.")
             with gr.TabItem("Request New Model"):
                 logger.info("Creating 'Submit Your Model' tab content...")
+                render_submit()
                 logger.info("'Submit Your Model' tab content created.")
+    logger.info("MIZAN: A Persian LLM Leaderboard application interface created.")
     return demo
 if __name__ == "__main__":
+    logger.info("Launching MIZAN: A Persian LLM Leaderboard application...")
+    pull_app = create_app()
     pull_app.launch(
+        debug=True,
+        share=True
     )
+    logger.info("MIZAN: A Persian LLM Leaderboard application has been launched.")

leaderboard/leaderboard.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# leaderboard/leaderboard.py
 import gradio as gr
 import pandas as pd
 import logging
@@ -7,14 +6,12 @@ import yaml
 from typing import Dict, List, Union, Optional, Any
 import numpy as np
-# --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
-# --- Path Definitions ---
 LEADERBOARD_DIR = Path(__file__).resolve().parent
 CONFIG_FILE_PATH = LEADERBOARD_DIR / "leaderboard_config.yaml"
 DATA_DIR = LEADERBOARD_DIR / "boards_data"
@@ -87,7 +84,6 @@ class LeaderboardApp:
         self._load_model_display_configs()
     def _load_global_settings(self) -> None:
-        # ... (بدون تغییر نسبت به نسخه قبلی شما) ...
         if self.config_path and self.config_path.exists():
             try:
                 with open(self.config_path, 'r', encoding='utf-8') as f:
@@ -174,7 +170,6 @@ class LeaderboardApp:
         return df_row[benchmark_cols].notna().sum()
     def generate_model_rankings(self) -> None:
-        # ... (بدون تغییر - منطق فعلی به درستی مدل‌های بدون میانگین را به پایین می‌برد) ...
         logger.info("Generating model rankings for each tab.")
         if not self.model_identifier_column:
             logger.error("`model_identifier_column` is not set. Cannot perform ranking.")
@@ -185,7 +180,7 @@ class LeaderboardApp:
             main_score_col_for_tab = self.main_scores_map.get(task_key)
             if not main_score_col_for_tab or main_score_col_for_tab not in ranked_df.columns:
                 logger.warning(f"No main score column for task '{task_key}'. Ranking skipped.")
-                ranked_df["Rank"] = pd.NA # Use pd.NA for missing ranks
                 self.raw_dataframes[task_key] = ranked_df
                 continue
             ranked_df[main_score_col_for_tab] = pd.to_numeric(ranked_df[main_score_col_for_tab], errors='coerce')
@@ -194,7 +189,6 @@ class LeaderboardApp:
             sort_by_cols = ['_has_main_score', '_sortable_main_score', self.model_identifier_column]
             ascending_order = [False, False, True]
             ranked_df = ranked_df.sort_values(by=sort_by_cols, ascending=ascending_order, na_position='last')
-            # Assign ranks only to rows that have a main score; others get NA
             ranked_df["Rank"] = pd.NA
             ranked_df.loc[ranked_df['_has_main_score'], "Rank"] = range(1, ranked_df['_has_main_score'].sum() + 1)
@@ -205,7 +199,6 @@ class LeaderboardApp:
     @staticmethod
     def _format_value_as_percentage(value: Any, score_cutoff_for_percentage: float = 0.0) -> Any:
-        # ... (بدون تغییر - این متد ممکن است جای دیگری استفاده شود) ...
         if pd.isna(value) or not isinstance(value, (int, float)): return value
         if value >= score_cutoff_for_percentage and 0 <= value <= 1.0: return f"{value * 100:.2f}%"
         return f"{value:.2f}" if isinstance(value, float) else value
@@ -235,13 +228,11 @@ class LeaderboardApp:
                 formatted_df[col_name] = formatted_df[col_name].apply(self._format_parameters_count)
                 continue
-            if col_name == "Rank": # Rank should typically be integer or NA, no special formatting here
-                # Convert Rank to integer if possible, otherwise keep as is (e.g. for NA)
                 try:
-                    # Attempt to convert to Int64 to handle pd.NA
                     formatted_df[col_name] = formatted_df[col_name].astype(pd.Int64Dtype())
                 except Exception:
-                    pass # If conversion fails, leave as is
                 continue
@@ -260,29 +251,27 @@ class LeaderboardApp:
                         is_cell_numeric_type = False
                 if pd.isna(numeric_x):
-                    new_col_values.append("") # Display NaNs as empty strings
                     continue
                 formatted_cell_value = original_value_for_cell
                 if is_cell_numeric_type:
-                    if is_mt_bench_tab: # Special handling for mt_bench tab
                         if isinstance(numeric_x, float):
                             formatted_cell_value = f"{numeric_x:.2f}"
                         else:
                             formatted_cell_value = numeric_x
-                    else: # For all other tabs
                         if isinstance(numeric_x, (int, float)) and 0 <= numeric_x <= 1.0:
                             val_multiplied = numeric_x * 100
-                            # If original was 0 or 1 (resulting in 0 or 100), format as integer
                             if numeric_x == 1.0 or numeric_x == 0.0:
                                 formatted_cell_value = f"{val_multiplied:.0f}" # "100" or "0"
                             else:
-                                # Otherwise, format to 2 decimal places (e.g., 88.00, 75.50)
                                 formatted_cell_value = f"{val_multiplied:.2f}"
                         elif isinstance(numeric_x, float):
                             formatted_cell_value = f"{numeric_x:.2f}"
-                        else: # Integers outside 0-1 range, etc.
                             formatted_cell_value = numeric_x
                 new_col_values.append(formatted_cell_value)
@@ -290,7 +279,6 @@ class LeaderboardApp:
         return formatted_df
     def _apply_markdown_and_bolding(self, df_with_general_formats: pd.DataFrame) -> pd.DataFrame:
-        # ... (بدون تغییر نسبت به نسخه قبلی شما) ...
         if df_with_general_formats.empty: return df_with_general_formats
         formatted_df = df_with_general_formats.copy()
@@ -312,10 +300,6 @@ class LeaderboardApp:
             if col_name_original in formatted_df.columns:
                 def to_numeric_for_max(val):
                     if isinstance(val, str):
-                        # Percentage sign is no longer added, so no need to check for it here
-                        # if val.endswith('%'):
-                        #     try: return float(val[:-1])
-                        #     except ValueError: return -np.inf
                         try: return float(val) # Handles "88.00", "75.50", "100", "0"
                         except ValueError: return -np.inf
                     return val if pd.notna(val) else -np.inf
@@ -324,9 +308,8 @@ class LeaderboardApp:
                 if not numeric_series_for_max.empty and numeric_series_for_max.notna().any() and \
                    pd.api.types.is_numeric_dtype(numeric_series_for_max) and not numeric_series_for_max.eq(-np.inf).all():
-                    max_val_numeric = numeric_series_for_max.max(skipna=True) # Ensure skipna=True for max
                     if pd.notna(max_val_numeric) and max_val_numeric != -np.inf:
-                        # Iterate using index to ensure correct .loc access
                         for i in numeric_series_for_max.index:
                             current_numeric_val = numeric_series_for_max.loc[i]
                             if pd.notna(current_numeric_val) and current_numeric_val == max_val_numeric:
@@ -340,7 +323,6 @@ class LeaderboardApp:
                                     cell_content.strip().lower() in ["n/a", "", "unknown", "nan"]): # Standardize NA display
                                     formatted_df.loc[i, col_name_original] = ""
         return formatted_df
-    # ... (بقیه متدهای LeaderboardApp بدون تغییر باقی می‌مانند، از جمله _get_gr_datatypes, get_prepared_dataframe, make_update_fn_for_task_closure, _create_and_bind_dataframe_component, create_gradio_interface, run_standalone) ...
     @staticmethod
     def _get_gr_datatypes(df_with_original_cols: pd.DataFrame, model_id_col_original_name: str, score_cols_original_names: List[str]) -> List[str]:
@@ -353,13 +335,10 @@ class LeaderboardApp:
         for col_name_original in df_with_original_cols.columns:
             if col_name_original == "Rank":
-                datatypes.append("number") # Rank can be number or string if NA
             elif col_name_original in markdown_cols_original_names:
                 datatypes.append("markdown")
             else:
-                # Most other formatted cells become strings
-                # Checking the dtype of the formatted column can be more robust
-                # For now, default to str for non-markdown, non-rank
                 datatypes.append("str")
         return datatypes
@@ -392,9 +371,7 @@ class LeaderboardApp:
         if processed_df.empty: return pd.DataFrame()
-        # Apply cell formatting (this now includes the new number formatting rules)
         processed_df = self._apply_general_formatting_to_cells(processed_df, task_key)
-        # Apply markdown and bolding
         processed_df = self._apply_markdown_and_bolding(processed_df)
         if self.columns_to_hide:
@@ -403,16 +380,14 @@ class LeaderboardApp:
                 processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
         if "Rank" in processed_df.columns:
-            # Ensure Rank is first, if it exists
             cols_order = ["Rank"] + [col for col in processed_df.columns if col != "Rank"]
             processed_df = processed_df[cols_order]
-        # Convert Rank to string for display after all operations, to handle NA consistently with other strings
         if "Rank" in processed_df.columns:
              processed_df["Rank"] = processed_df["Rank"].apply(lambda x: str(int(x)) if pd.notna(x) and isinstance(x, (float,int)) and x == int(x) else (str(x) if pd.notna(x) else ""))
-        processed_df = processed_df.fillna("") # Final fillna for display
         return processed_df
     def make_update_fn_for_task_closure(self, task_key_for_df_data: str):

 import gradio as gr
 import pandas as pd
 import logging
 from typing import Dict, List, Union, Optional, Any
 import numpy as np
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
 LEADERBOARD_DIR = Path(__file__).resolve().parent
 CONFIG_FILE_PATH = LEADERBOARD_DIR / "leaderboard_config.yaml"
 DATA_DIR = LEADERBOARD_DIR / "boards_data"
         self._load_model_display_configs()
     def _load_global_settings(self) -> None:
         if self.config_path and self.config_path.exists():
             try:
                 with open(self.config_path, 'r', encoding='utf-8') as f:
         return df_row[benchmark_cols].notna().sum()
     def generate_model_rankings(self) -> None:
         logger.info("Generating model rankings for each tab.")
         if not self.model_identifier_column:
             logger.error("`model_identifier_column` is not set. Cannot perform ranking.")
             main_score_col_for_tab = self.main_scores_map.get(task_key)
             if not main_score_col_for_tab or main_score_col_for_tab not in ranked_df.columns:
                 logger.warning(f"No main score column for task '{task_key}'. Ranking skipped.")
+                ranked_df["Rank"] = pd.NA
                 self.raw_dataframes[task_key] = ranked_df
                 continue
             ranked_df[main_score_col_for_tab] = pd.to_numeric(ranked_df[main_score_col_for_tab], errors='coerce')
             sort_by_cols = ['_has_main_score', '_sortable_main_score', self.model_identifier_column]
             ascending_order = [False, False, True]
             ranked_df = ranked_df.sort_values(by=sort_by_cols, ascending=ascending_order, na_position='last')
             ranked_df["Rank"] = pd.NA
             ranked_df.loc[ranked_df['_has_main_score'], "Rank"] = range(1, ranked_df['_has_main_score'].sum() + 1)
     @staticmethod
     def _format_value_as_percentage(value: Any, score_cutoff_for_percentage: float = 0.0) -> Any:
         if pd.isna(value) or not isinstance(value, (int, float)): return value
         if value >= score_cutoff_for_percentage and 0 <= value <= 1.0: return f"{value * 100:.2f}%"
         return f"{value:.2f}" if isinstance(value, float) else value
                 formatted_df[col_name] = formatted_df[col_name].apply(self._format_parameters_count)
                 continue
+            if col_name == "Rank":
                 try:
                     formatted_df[col_name] = formatted_df[col_name].astype(pd.Int64Dtype())
                 except Exception:
+                    pass
                 continue
                         is_cell_numeric_type = False
                 if pd.isna(numeric_x):
+                    new_col_values.append("")
                     continue
                 formatted_cell_value = original_value_for_cell
                 if is_cell_numeric_type:
+                    if is_mt_bench_tab:
                         if isinstance(numeric_x, float):
                             formatted_cell_value = f"{numeric_x:.2f}"
                         else:
                             formatted_cell_value = numeric_x
+                    else:
                         if isinstance(numeric_x, (int, float)) and 0 <= numeric_x <= 1.0:
                             val_multiplied = numeric_x * 100
                             if numeric_x == 1.0 or numeric_x == 0.0:
                                 formatted_cell_value = f"{val_multiplied:.0f}" # "100" or "0"
                             else:
                                 formatted_cell_value = f"{val_multiplied:.2f}"
                         elif isinstance(numeric_x, float):
                             formatted_cell_value = f"{numeric_x:.2f}"
+                        else:
                             formatted_cell_value = numeric_x
                 new_col_values.append(formatted_cell_value)
         return formatted_df
     def _apply_markdown_and_bolding(self, df_with_general_formats: pd.DataFrame) -> pd.DataFrame:
         if df_with_general_formats.empty: return df_with_general_formats
         formatted_df = df_with_general_formats.copy()
             if col_name_original in formatted_df.columns:
                 def to_numeric_for_max(val):
                     if isinstance(val, str):
                         try: return float(val) # Handles "88.00", "75.50", "100", "0"
                         except ValueError: return -np.inf
                     return val if pd.notna(val) else -np.inf
                 if not numeric_series_for_max.empty and numeric_series_for_max.notna().any() and \
                    pd.api.types.is_numeric_dtype(numeric_series_for_max) and not numeric_series_for_max.eq(-np.inf).all():
+                    max_val_numeric = numeric_series_for_max.max(skipna=True)
                     if pd.notna(max_val_numeric) and max_val_numeric != -np.inf:
                         for i in numeric_series_for_max.index:
                             current_numeric_val = numeric_series_for_max.loc[i]
                             if pd.notna(current_numeric_val) and current_numeric_val == max_val_numeric:
                                     cell_content.strip().lower() in ["n/a", "", "unknown", "nan"]): # Standardize NA display
                                     formatted_df.loc[i, col_name_original] = ""
         return formatted_df
     @staticmethod
     def _get_gr_datatypes(df_with_original_cols: pd.DataFrame, model_id_col_original_name: str, score_cols_original_names: List[str]) -> List[str]:
         for col_name_original in df_with_original_cols.columns:
             if col_name_original == "Rank":
+                datatypes.append("number")
             elif col_name_original in markdown_cols_original_names:
                 datatypes.append("markdown")
             else:
                 datatypes.append("str")
         return datatypes
         if processed_df.empty: return pd.DataFrame()
         processed_df = self._apply_general_formatting_to_cells(processed_df, task_key)
         processed_df = self._apply_markdown_and_bolding(processed_df)
         if self.columns_to_hide:
                 processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
         if "Rank" in processed_df.columns:
             cols_order = ["Rank"] + [col for col in processed_df.columns if col != "Rank"]
             processed_df = processed_df[cols_order]
         if "Rank" in processed_df.columns:
              processed_df["Rank"] = processed_df["Rank"].apply(lambda x: str(int(x)) if pd.notna(x) and isinstance(x, (float,int)) and x == int(x) else (str(x) if pd.notna(x) else ""))
+        processed_df = processed_df.fillna("")
         return processed_df
     def make_update_fn_for_task_closure(self, task_key_for_df_data: str):

submission.py CHANGED Viewed

@@ -4,27 +4,23 @@ import os
 from datetime import datetime
 from pathlib import Path
 import pandas as pd
-import io # To handle string as a file-like object for pandas
 import logging
 from huggingface_hub import HfApi, HfFolder, hf_hub_download
-from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError # For specific error handling
-# --- Logging Setup ---
-# (Add this if not already present, or integrate with a central logging config)
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
-# --- Hugging Face Hub Configuration ---
-# IMPORTANT: Replace with your actual repository details
-TARGET_REPO_ID = "MCINext/submitted-models"  # e.g., "MehranS/MIZAN_submissions" # Suggested change for consistency
-TARGET_REPO_TYPE = "dataset"  # Recommended type for storing data
-FILENAME_IN_REPO = "model_submissions.csv"  # The name of the CSV file within the Hub repository
-# Define the header for your CSV file. This must be consistent.
 CSV_HEADER = [
     'timestamp', 'model_name', 'base_model', 'revision',
     'precision', 'weight_type', 'model_type', 'status', 'submission_type'
@@ -32,10 +28,10 @@ CSV_HEADER = [
 def get_hf_token() -> str | None:
     """Retrieves the Hugging Face token from environment variables or HfFolder."""
-    token = os.environ.get("HF_TOKEN")  # Standard for Spaces secrets
     if not token:
         try:
-            token = HfFolder.get_token() # Fallback for local development after CLI login
         except Exception:
             logger.warning("Hugging Face token not found in HfFolder and HF_TOKEN env var is not set.")
             token = None
@@ -60,54 +56,48 @@ def add_new_eval_hf_to_hub(model_name_hf_id: str, revision_hf: str) -> gr.Markdo
     submission_data = {
         'timestamp': timestamp,
         'model_name': model_name_hf_id.strip(),
-        'base_model': 'N/A', # As per the simple form's design
         'revision': revision_hf.strip() if revision_hf else 'main',
         'precision': 'To be fetched/determined',
         'weight_type': 'To be fetched/determined',
         'model_type': 'To be fetched/determined',
-        'status': 'pending_hub_submission', # New status indicating it's for Hub processing
-        'submission_type': 'huggingface_simple_form_to_hub' # New type
     }
     try:
-        # 1. Attempt to download the existing CSV from the Hub
         try:
             local_download_path = hf_hub_download(
                 repo_id=TARGET_REPO_ID,
                 filename=FILENAME_IN_REPO,
                 repo_type=TARGET_REPO_TYPE,
                 token=token,
-                # force_download=True, # Consider this if caching becomes an issue
             )
-            # Read the downloaded CSV into a pandas DataFrame
             df = pd.read_csv(local_download_path)
-            # Ensure columns match CSV_HEADER, add missing ones with NaN if necessary
             for col in CSV_HEADER:
                 if col not in df.columns:
                     df[col] = pd.NA
-            df = df[CSV_HEADER] # Reorder/select columns to match header
             file_exists_on_hub = True
             logger.info(f"Successfully downloaded existing '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}'.")
         except EntryNotFoundError:
             logger.info(f"'{FILENAME_IN_REPO}' not found in '{TARGET_REPO_ID}'. A new file will be created.")
-            df = pd.DataFrame(columns=CSV_HEADER) # Create an empty DataFrame with the correct headers
             file_exists_on_hub = False
         except HfHubHTTPError as e:
             logger.error(f"HTTP error downloading '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}': {e.status_code} - {e.hf_raise}")
             error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Hub Error:** Could not access the repository '{TARGET_REPO_ID}'. (HTTP {e.status_code}). Please check token permissions and repository ID.</div>"
             return gr.Markdown(error_html)
-        # 2. Append the new submission data
         new_row_df = pd.DataFrame([submission_data])
         df = pd.concat([df, new_row_df], ignore_index=True)
-        # 3. Convert the DataFrame back to CSV in-memory
         csv_buffer = io.StringIO()
-        df.to_csv(csv_buffer, index=False, header=True) # Always include header
         csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
         csv_buffer.close()
-        # 4. Upload the updated CSV content to the Hub
         commit_message = f"Add submission: {submission_data['model_name']} (rev: {submission_data['revision']})"
         if not file_exists_on_hub:
             commit_message = f"Create '{FILENAME_IN_REPO}' and add first submission: {submission_data['model_name']}"
@@ -135,7 +125,6 @@ def add_new_eval_hf_to_hub(model_name_hf_id: str, revision_hf: str) -> gr.Markdo
 def render_submit():
-    # Text for Introduction and Option 1 (Hugging Face Form)
     intro_and_option1_guidance = """
 # Request Model Evaluation for MIZAN
@@ -149,7 +138,6 @@ Please choose the submission path that best fits how your model can be accessed
 If your model and its tokenizer can be loaded directly using their Hugging Face identifier (e.g., `username/model_name`), you can use the simplified form below to submit its key identifiers. Your submission will be added to our central tracking repository on the Hugging Face Hub. Our team will attempt to gather other necessary details from the Hub.
 """
-    # Text for Option 2 (Email Submission)
     option2_email_guidance = """
 ---
@@ -183,7 +171,7 @@ Our team will review your email and work with you to facilitate the evaluation p
         submission_result_hf_form = gr.Markdown()
         request_hf_button.click(
-            fn=add_new_eval_hf_to_hub, # Use the new function
             inputs=[
                 model_name_textbox_hf,
                 revision_name_textbox_hf,
@@ -193,17 +181,4 @@ Our team will review your email and work with you to facilitate the evaluation p
         gr.Markdown(option2_email_guidance)
-    return submit_tab_interface
-# For direct testing of this file:
-if __name__ == '__main__':
-    # You would need to set TARGET_REPO_ID and have a valid HF_TOKEN env var or be logged in.
-    # Example: os.environ["HF_TOKEN"] = "your_hf_write_token"
-    # TARGET_REPO_ID = "your-user/your-test-dataset" # Make sure this repo exists
-    if not TARGET_REPO_ID.startswith("YOUR_"): # Basic check to prevent running with placeholder
-        print(f"Testing submission to Hub. Target repo: {TARGET_REPO_ID}")
-        test_interface = render_submit()
-        test_interface.launch(debug=True)
-    else:
-        print("Please update TARGET_REPO_ID in submission.py before running this test.")

 from datetime import datetime
 from pathlib import Path
 import pandas as pd
+import io
 import logging
 from huggingface_hub import HfApi, HfFolder, hf_hub_download
+from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
+TARGET_REPO_ID = "MCINext/submitted-models"
+TARGET_REPO_TYPE = "dataset"
+FILENAME_IN_REPO = "model_submissions.csv"
 CSV_HEADER = [
     'timestamp', 'model_name', 'base_model', 'revision',
     'precision', 'weight_type', 'model_type', 'status', 'submission_type'
 def get_hf_token() -> str | None:
     """Retrieves the Hugging Face token from environment variables or HfFolder."""
+    token = os.environ.get("HF_TOKEN")
     if not token:
         try:
+            token = HfFolder.get_token()
         except Exception:
             logger.warning("Hugging Face token not found in HfFolder and HF_TOKEN env var is not set.")
             token = None
     submission_data = {
         'timestamp': timestamp,
         'model_name': model_name_hf_id.strip(),
+        'base_model': 'N/A',
         'revision': revision_hf.strip() if revision_hf else 'main',
         'precision': 'To be fetched/determined',
         'weight_type': 'To be fetched/determined',
         'model_type': 'To be fetched/determined',
+        'status': 'pending_hub_submission',
+        'submission_type': 'huggingface_simple_form_to_hub'
     }
     try:
         try:
             local_download_path = hf_hub_download(
                 repo_id=TARGET_REPO_ID,
                 filename=FILENAME_IN_REPO,
                 repo_type=TARGET_REPO_TYPE,
                 token=token,
+                # force_download=True,
             )
             df = pd.read_csv(local_download_path)
             for col in CSV_HEADER:
                 if col not in df.columns:
                     df[col] = pd.NA
+            df = df[CSV_HEADER]
             file_exists_on_hub = True
             logger.info(f"Successfully downloaded existing '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}'.")
         except EntryNotFoundError:
             logger.info(f"'{FILENAME_IN_REPO}' not found in '{TARGET_REPO_ID}'. A new file will be created.")
+            df = pd.DataFrame(columns=CSV_HEADER)
             file_exists_on_hub = False
         except HfHubHTTPError as e:
             logger.error(f"HTTP error downloading '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}': {e.status_code} - {e.hf_raise}")
             error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Hub Error:** Could not access the repository '{TARGET_REPO_ID}'. (HTTP {e.status_code}). Please check token permissions and repository ID.</div>"
             return gr.Markdown(error_html)
         new_row_df = pd.DataFrame([submission_data])
         df = pd.concat([df, new_row_df], ignore_index=True)
         csv_buffer = io.StringIO()
+        df.to_csv(csv_buffer, index=False, header=True)
         csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
         csv_buffer.close()
         commit_message = f"Add submission: {submission_data['model_name']} (rev: {submission_data['revision']})"
         if not file_exists_on_hub:
             commit_message = f"Create '{FILENAME_IN_REPO}' and add first submission: {submission_data['model_name']}"
 def render_submit():
     intro_and_option1_guidance = """
 # Request Model Evaluation for MIZAN
 If your model and its tokenizer can be loaded directly using their Hugging Face identifier (e.g., `username/model_name`), you can use the simplified form below to submit its key identifiers. Your submission will be added to our central tracking repository on the Hugging Face Hub. Our team will attempt to gather other necessary details from the Hub.
 """
     option2_email_guidance = """
 ---
         submission_result_hf_form = gr.Markdown()
         request_hf_button.click(
+            fn=add_new_eval_hf_to_hub,
             inputs=[
                 model_name_textbox_hf,
                 revision_name_textbox_hf,
         gr.Markdown(option2_email_guidance)
+    return submit_tab_interface