File size: 7,941 Bytes
fe79a14
 
 
 
 
 
0babe14
fe79a14
 
 
0babe14
fe79a14
 
 
 
 
 
 
 
0babe14
 
 
 
fe79a14
 
 
 
 
 
 
0babe14
fe79a14
 
0babe14
fe79a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0babe14
fe79a14
 
 
 
0babe14
 
fe79a14
 
 
 
 
 
 
 
 
0babe14
fe79a14
 
 
 
 
0babe14
fe79a14
 
 
 
0babe14
fe79a14
 
 
 
 
 
 
 
 
 
0babe14
fe79a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0babe14
fe79a14
 
 
 
 
 
 
 
 
0babe14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import gradio as gr
import csv
import os
from datetime import datetime
from pathlib import Path
import pandas as pd
import io 
import logging

from huggingface_hub import HfApi, HfFolder, hf_hub_download
from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError 

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
)
logger = logging.getLogger(__name__)


TARGET_REPO_ID = "MCINext/submitted-models"  
TARGET_REPO_TYPE = "dataset"  
FILENAME_IN_REPO = "model_submissions.csv"  

CSV_HEADER = [
    'timestamp', 'model_name', 'base_model', 'revision',
    'precision', 'weight_type', 'model_type', 'status', 'submission_type'
]

def get_hf_token() -> str | None:
    """Retrieves the Hugging Face token from environment variables or HfFolder."""
    token = os.environ.get("HF_TOKEN")  
    if not token:
        try:
            token = HfFolder.get_token() 
        except Exception:
            logger.warning("Hugging Face token not found in HfFolder and HF_TOKEN env var is not set.")
            token = None
    return token

def add_new_eval_hf_to_hub(model_name_hf_id: str, revision_hf: str) -> gr.Markdown:
    """
    Handles new Hugging Face model evaluation requests by saving them to a CSV file
    in a specified Hugging Face Hub repository.
    """
    if not model_name_hf_id:
        return gr.Markdown("⚠️ **Model Name (Hugging Face ID) is required.** Please enter a valid Hugging Face model ID.")

    token = get_hf_token()
    if not token:
        error_html = "<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Configuration Error:** Hugging Face Token not found. Cannot save submission to the Hub. Please ensure the `HF_TOKEN` Space secret is set with write permissions to the target repository.</div>"
        return gr.Markdown(error_html)

    api = HfApi(token=token)
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    submission_data = {
        'timestamp': timestamp,
        'model_name': model_name_hf_id.strip(),
        'base_model': 'N/A', 
        'revision': revision_hf.strip() if revision_hf else 'main',
        'precision': 'To be fetched/determined',
        'weight_type': 'To be fetched/determined',
        'model_type': 'To be fetched/determined',
        'status': 'pending_hub_submission', 
        'submission_type': 'huggingface_simple_form_to_hub'
    }

    try:
        try:
            local_download_path = hf_hub_download(
                repo_id=TARGET_REPO_ID,
                filename=FILENAME_IN_REPO,
                repo_type=TARGET_REPO_TYPE,
                token=token,
                # force_download=True, 
            )
            df = pd.read_csv(local_download_path)
            for col in CSV_HEADER:
                if col not in df.columns:
                    df[col] = pd.NA
            df = df[CSV_HEADER] 
            file_exists_on_hub = True
            logger.info(f"Successfully downloaded existing '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}'.")
        except EntryNotFoundError:
            logger.info(f"'{FILENAME_IN_REPO}' not found in '{TARGET_REPO_ID}'. A new file will be created.")
            df = pd.DataFrame(columns=CSV_HEADER) 
            file_exists_on_hub = False
        except HfHubHTTPError as e:
            logger.error(f"HTTP error downloading '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}': {e.status_code} - {e.hf_raise}")
            error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Hub Error:** Could not access the repository '{TARGET_REPO_ID}'. (HTTP {e.status_code}). Please check token permissions and repository ID.</div>"
            return gr.Markdown(error_html)

        new_row_df = pd.DataFrame([submission_data])
        df = pd.concat([df, new_row_df], ignore_index=True)

        csv_buffer = io.StringIO()
        df.to_csv(csv_buffer, index=False, header=True) 
        csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
        csv_buffer.close()

        commit_message = f"Add submission: {submission_data['model_name']} (rev: {submission_data['revision']})"
        if not file_exists_on_hub:
            commit_message = f"Create '{FILENAME_IN_REPO}' and add first submission: {submission_data['model_name']}"

        api.upload_file(
            path_or_fileobj=csv_content_bytes, # Pass the bytes directly
            path_in_repo=FILENAME_IN_REPO,
            repo_id=TARGET_REPO_ID,
            repo_type=TARGET_REPO_TYPE,
            commit_message=commit_message
        )

        logger.info(f"Submission for '{submission_data['model_name']}' pushed to '{TARGET_REPO_ID}/{FILENAME_IN_REPO}'.")
        success_message_html = f"""
        <div style='color:green; padding:10px; border:1px solid green; border-radius:5px;'>
            ✅ Request for Hugging Face model '<strong>{submission_data['model_name']}</strong>' (Revision: {submission_data['revision']}) has been successfully submitted to the central repository on Hugging Face Hub!
        </div>
        """
        return gr.Markdown(success_message_html)

    except Exception as e:
        logger.error(f"An unexpected error occurred while processing submission to Hugging Face Hub: {e}", exc_info=True)
        error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **System Error:** An unexpected error occurred: {e}. Please try again or contact support.</div>"
        return gr.Markdown(error_html)


def render_submit():
    intro_and_option1_guidance = """
# Request Model Evaluation for MIZAN

We're excited to evaluate new models for **MIZAN: A Persian LLM Leaderboard**!
Please choose the submission path that best fits how your model can be accessed for evaluation.

---

### **Option 1: Your model is publicly available on Hugging Face Hub**

If your model and its tokenizer can be loaded directly using their Hugging Face identifier (e.g., `username/model_name`), you can use the simplified form below to submit its key identifiers. Your submission will be added to our central tracking repository on the Hugging Face Hub. Our team will attempt to gather other necessary details from the Hub.
"""

    option2_email_guidance = """
---

### **Option 2: Your model is NOT on Hugging Face, is private, or requires custom setup**

If your model is hosted elsewhere, is private, requires specific access permissions, needs custom inference code, or involves a more complex setup for evaluation, please initiate your submission request via email.

**To submit via email, please send comprehensive details to:**
📧 **[email protected]**

Our team will review your email and work with you to facilitate the evaluation process.
    """

    with gr.Blocks() as submit_tab_interface:
        gr.Markdown(intro_and_option1_guidance)

        with gr.Group():
            gr.Markdown("### ✨ Form for Option 1: Submit a Hugging Face Model to the Hub")
            
            model_name_textbox_hf = gr.Textbox(
                label="Model Name (Hugging Face ID: e.g., username/model_name)",
                placeholder="bigscience/bloom-560m"
            )
            revision_name_textbox_hf = gr.Textbox(
                label="Revision/Commit (Optional, defaults to 'main' if left empty)",
                placeholder="e.g., main, or a specific commit hash"
            )
            
            request_hf_button = gr.Button("🚀 Request Evaluation & Submit to Hub", variant="primary")
        
        submission_result_hf_form = gr.Markdown()

        request_hf_button.click(
            fn=add_new_eval_hf_to_hub, 
            inputs=[
                model_name_textbox_hf,
                revision_name_textbox_hf,
            ],
            outputs=submission_result_hf_form,
        )
        
        gr.Markdown(option2_email_guidance)

    return submit_tab_interface