Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,20 +2,22 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
|
5 |
-
|
|
|
6 |
CITATION_BUTTON_LABEL,
|
7 |
CITATION_BUTTON_TEXT,
|
8 |
-
EVALUATION_QUEUE_TEXT,
|
9 |
INTRODUCTION_TEXT,
|
10 |
LLM_BENCHMARKS_TEXT,
|
11 |
TITLE,
|
12 |
)
|
13 |
-
|
14 |
-
|
|
|
15 |
from src.envs import REPO_ID # Keep if needed for restart_space or other functions
|
16 |
-
#
|
17 |
-
#
|
18 |
-
|
19 |
|
20 |
# --- Elo Leaderboard Configuration ---
|
21 |
# Enhanced data with Rank (placeholder), Organizer, License, and URL
|
@@ -33,6 +35,7 @@ data = [
|
|
33 |
]
|
34 |
|
35 |
# Create a master DataFrame
|
|
|
36 |
master_df = pd.DataFrame(data)
|
37 |
|
38 |
# Define categories for selection (user-facing)
|
@@ -60,15 +63,17 @@ def update_leaderboard(category):
|
|
60 |
score_column = category_to_column[DEFAULT_CATEGORY]
|
61 |
if score_column not in master_df.columns: # Check fallback column too
|
62 |
# Return empty df with correct columns if still invalid
|
|
|
63 |
return pd.DataFrame({
|
64 |
"Rank": [],
|
65 |
"Model": [],
|
66 |
-
"
|
67 |
-
"
|
68 |
"Elo Score": []
|
69 |
})
|
70 |
|
71 |
# Select base columns + the score column for sorting
|
|
|
72 |
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
|
73 |
df = master_df[cols_to_select].copy()
|
74 |
|
@@ -79,27 +84,30 @@ def update_leaderboard(category):
|
|
79 |
df.reset_index(drop=True, inplace=True)
|
80 |
df.insert(0, 'Rank', df.index + 1)
|
81 |
|
82 |
-
# Format Model Name as
|
83 |
-
#
|
84 |
df['Model'] = df.apply(
|
85 |
lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
|
86 |
axis=1
|
87 |
)
|
88 |
-
# Alternative simpler Markdown format (might render differently in gr.Dataframe):
|
89 |
-
# df['Model'] = df.apply(lambda row: f"[{row['model_name']}]({row['url'] if pd.notna(row['url']) else '#'})", axis=1)
|
90 |
-
|
91 |
|
92 |
# Rename the score column to 'Elo Score' for consistent display
|
93 |
df.rename(columns={score_column: 'Elo Score'}, inplace=True)
|
94 |
|
95 |
-
#
|
96 |
-
|
|
|
|
|
97 |
df = df[final_columns]
|
|
|
|
|
|
|
|
|
98 |
|
99 |
return df
|
100 |
|
101 |
# --- Mock/Placeholder functions/data for other tabs ---
|
102 |
-
# (
|
103 |
print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
|
104 |
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
105 |
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
@@ -118,42 +126,45 @@ def restart_space():
|
|
118 |
# Example CSS (add to your custom_css or inline):
|
119 |
# table { width: 100%; border-collapse: collapse; }
|
120 |
# th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; }
|
121 |
-
# th { background-color: #f2f2f2; }
|
122 |
# tr:nth-child(even) { background-color: #f9f9f9; }
|
123 |
# tr:hover { background-color: #e9e9e9; }
|
|
|
|
|
124 |
|
125 |
-
|
|
|
126 |
|
127 |
with demo:
|
128 |
-
|
|
|
|
|
129 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
130 |
|
131 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
132 |
with gr.TabItem("π
MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
133 |
with gr.Column():
|
134 |
-
gr.Markdown("## Model Elo Rankings by Category")
|
135 |
category_selector = gr.Radio(
|
136 |
choices=CATEGORIES,
|
137 |
-
label="Select Category:",
|
138 |
-
value=DEFAULT_CATEGORY,
|
139 |
interactive=True,
|
140 |
-
# container=False, # Removed for default layout
|
141 |
-
# info="Select a category to see the rankings based on its specific Elo score." # Optional info tooltip
|
142 |
)
|
143 |
leaderboard_df_component = gr.Dataframe(
|
144 |
# Initialize with sorted data for the default category
|
145 |
value=update_leaderboard(DEFAULT_CATEGORY),
|
|
|
146 |
headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
|
147 |
-
#
|
148 |
-
#
|
149 |
datatype=["number", "html", "str", "str", "number"],
|
150 |
interactive=False,
|
151 |
-
|
152 |
-
height=400 + (len(master_df) * 20), # Base height + dynamic part
|
153 |
row_count=(len(master_df), "fixed"),
|
154 |
col_count=(5, "fixed"),
|
155 |
-
wrap=True, # Allow text wrapping
|
156 |
-
|
157 |
)
|
158 |
# Link the radio button change to the update function
|
159 |
category_selector.change(
|
@@ -162,23 +173,58 @@ with demo:
|
|
162 |
outputs=leaderboard_df_component
|
163 |
)
|
164 |
|
165 |
-
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=1):
|
166 |
-
|
167 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
168 |
|
169 |
# --- Submit Tab (Commented out as in original request) ---
|
|
|
170 |
# with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
|
171 |
-
#
|
172 |
-
#
|
173 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
# --- Citation Row (at the bottom, outside Tabs) ---
|
176 |
with gr.Accordion("π Citation", open=False):
|
177 |
-
#
|
178 |
citation_button = gr.Textbox(
|
179 |
value=CITATION_BUTTON_TEXT,
|
180 |
label=CITATION_BUTTON_LABEL,
|
181 |
-
lines=
|
182 |
elem_id="citation-button",
|
183 |
show_copy_button=True,
|
184 |
)
|
@@ -189,5 +235,8 @@ with demo:
|
|
189 |
# scheduler.start()
|
190 |
|
191 |
# --- Launch the app ---
|
|
|
192 |
if __name__ == "__main__":
|
|
|
|
|
193 |
demo.launch()
|
|
|
2 |
import pandas as pd
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
|
5 |
+
# Assuming these are correctly defined in your src.about module
|
6 |
+
from src.about import (
|
7 |
CITATION_BUTTON_LABEL,
|
8 |
CITATION_BUTTON_TEXT,
|
9 |
+
EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
|
10 |
INTRODUCTION_TEXT,
|
11 |
LLM_BENCHMARKS_TEXT,
|
12 |
TITLE,
|
13 |
)
|
14 |
+
# Assuming this is correctly defined in your src.display.css_html_js module
|
15 |
+
from src.display.css_html_js import custom_css
|
16 |
+
# Assuming this is correctly defined in your src.envs module
|
17 |
from src.envs import REPO_ID # Keep if needed for restart_space or other functions
|
18 |
+
# Assuming this is correctly defined in your src.submission.submit module
|
19 |
+
from src.submission.submit import add_new_eval # Keep if using the submit tab
|
20 |
+
|
21 |
|
22 |
# --- Elo Leaderboard Configuration ---
|
23 |
# Enhanced data with Rank (placeholder), Organizer, License, and URL
|
|
|
35 |
]
|
36 |
|
37 |
# Create a master DataFrame
|
38 |
+
# Note: Columns 'organizer' and 'license' are created in lowercase here.
|
39 |
master_df = pd.DataFrame(data)
|
40 |
|
41 |
# Define categories for selection (user-facing)
|
|
|
63 |
score_column = category_to_column[DEFAULT_CATEGORY]
|
64 |
if score_column not in master_df.columns: # Check fallback column too
|
65 |
# Return empty df with correct columns if still invalid
|
66 |
+
# *** FIX APPLIED HERE: Use lowercase keys consistent with DataFrame ***
|
67 |
return pd.DataFrame({
|
68 |
"Rank": [],
|
69 |
"Model": [],
|
70 |
+
"organizer": [], # lowercase
|
71 |
+
"license": [], # lowercase
|
72 |
"Elo Score": []
|
73 |
})
|
74 |
|
75 |
# Select base columns + the score column for sorting
|
76 |
+
# Ensure 'organizer' and 'license' are selected correctly (lowercase)
|
77 |
cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
|
78 |
df = master_df[cols_to_select].copy()
|
79 |
|
|
|
84 |
df.reset_index(drop=True, inplace=True)
|
85 |
df.insert(0, 'Rank', df.index + 1)
|
86 |
|
87 |
+
# Format Model Name as HTML Hyperlink
|
88 |
+
# The resulting column name will be 'Model' (capitalized)
|
89 |
df['Model'] = df.apply(
|
90 |
lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
|
91 |
axis=1
|
92 |
)
|
|
|
|
|
|
|
93 |
|
94 |
# Rename the score column to 'Elo Score' for consistent display
|
95 |
df.rename(columns={score_column: 'Elo Score'}, inplace=True)
|
96 |
|
97 |
+
# --- FIX APPLIED HERE ---
|
98 |
+
# Select and reorder columns for final display using the ACTUAL column names in df
|
99 |
+
# Use lowercase 'organizer' and 'license' here because they haven't been renamed.
|
100 |
+
final_columns = ["Rank", "Model", "organizer", "license", "Elo Score"]
|
101 |
df = df[final_columns]
|
102 |
+
# -----------------------
|
103 |
+
|
104 |
+
# Note: The DataFrame returned now has columns:
|
105 |
+
# 'Rank', 'Model', 'organizer', 'license', 'Elo Score'
|
106 |
|
107 |
return df
|
108 |
|
109 |
# --- Mock/Placeholder functions/data for other tabs ---
|
110 |
+
# (If the Submit tab is used, ensure these variables are appropriately populated or handled)
|
111 |
print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
|
112 |
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
113 |
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
|
|
|
126 |
# Example CSS (add to your custom_css or inline):
|
127 |
# table { width: 100%; border-collapse: collapse; }
|
128 |
# th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; }
|
129 |
+
# th { background-color: #f2f2f2; font-weight: bold; } /* Added bold */
|
130 |
# tr:nth-child(even) { background-color: #f9f9f9; }
|
131 |
# tr:hover { background-color: #e9e9e9; }
|
132 |
+
# td a { color: #007bff; text-decoration: none; } /* Style links in table */
|
133 |
+
# td a:hover { text-decoration: underline; }
|
134 |
|
135 |
+
# Use a theme for better default styling
|
136 |
+
demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
|
137 |
|
138 |
with demo:
|
139 |
+
# Use the TITLE variable imported from src.about
|
140 |
+
gr.HTML(TITLE)
|
141 |
+
# Use the INTRODUCTION_TEXT variable imported from src.about
|
142 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
143 |
|
144 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
145 |
with gr.TabItem("π
MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
146 |
with gr.Column():
|
147 |
+
gr.Markdown("## Model Elo Rankings by Category")
|
148 |
category_selector = gr.Radio(
|
149 |
choices=CATEGORIES,
|
150 |
+
label="Select Category:",
|
151 |
+
value=DEFAULT_CATEGORY,
|
152 |
interactive=True,
|
|
|
|
|
153 |
)
|
154 |
leaderboard_df_component = gr.Dataframe(
|
155 |
# Initialize with sorted data for the default category
|
156 |
value=update_leaderboard(DEFAULT_CATEGORY),
|
157 |
+
# Headers for DISPLAY remain capitalized
|
158 |
headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
|
159 |
+
# Datatype maps to the final df columns: Rank, Model, organizer, license, Elo Score
|
160 |
+
# 'html' is used for the 'Model' column containing the <a> tag
|
161 |
datatype=["number", "html", "str", "str", "number"],
|
162 |
interactive=False,
|
163 |
+
height=400 + (len(master_df) * 25), # Adjusted height slightly
|
|
|
164 |
row_count=(len(master_df), "fixed"),
|
165 |
col_count=(5, "fixed"),
|
166 |
+
wrap=True, # Allow text wrapping
|
167 |
+
elem_id="leaderboard-table" # CSS hook
|
168 |
)
|
169 |
# Link the radio button change to the update function
|
170 |
category_selector.change(
|
|
|
173 |
outputs=leaderboard_df_component
|
174 |
)
|
175 |
|
176 |
+
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=1):
|
177 |
+
# Use the LLM_BENCHMARKS_TEXT variable imported from src.about
|
178 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
179 |
|
180 |
# --- Submit Tab (Commented out as in original request) ---
|
181 |
+
# Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported if uncommented
|
182 |
# with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
|
183 |
+
# with gr.Column():
|
184 |
+
# with gr.Row():
|
185 |
+
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import
|
186 |
+
# with gr.Column():
|
187 |
+
# with gr.Accordion(f"β
Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
|
188 |
+
# finished_eval_table = gr.components.Dataframe(
|
189 |
+
# value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
190 |
+
# )
|
191 |
+
# with gr.Accordion(f"π Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
|
192 |
+
# running_eval_table = gr.components.Dataframe(
|
193 |
+
# value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
194 |
+
# )
|
195 |
+
# with gr.Accordion(f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
|
196 |
+
# pending_eval_table = gr.components.Dataframe(
|
197 |
+
# value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
|
198 |
+
# )
|
199 |
+
# with gr.Row():
|
200 |
+
# gr.Markdown("# βοΈβ¨ Submit your model here!", elem_classes="markdown-text")
|
201 |
+
# with gr.Row():
|
202 |
+
# with gr.Column():
|
203 |
+
# model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)") # Clarified label
|
204 |
+
# revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
|
205 |
+
# # Example choices, adjust as needed for your actual submission logic
|
206 |
+
# model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True)
|
207 |
+
# with gr.Column():
|
208 |
+
# precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
|
209 |
+
# weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True)
|
210 |
+
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
211 |
+
# submit_button = gr.Button("Submit Eval")
|
212 |
+
# submission_result = gr.Markdown() # To display success/failure message
|
213 |
+
# # Ensure add_new_eval is correctly imported and handles these inputs
|
214 |
+
# submit_button.click(
|
215 |
+
# add_new_eval, # Requires import
|
216 |
+
# [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
|
217 |
+
# submission_result,
|
218 |
+
# )
|
219 |
+
|
220 |
|
221 |
# --- Citation Row (at the bottom, outside Tabs) ---
|
222 |
with gr.Accordion("π Citation", open=False):
|
223 |
+
# Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported from src.about
|
224 |
citation_button = gr.Textbox(
|
225 |
value=CITATION_BUTTON_TEXT,
|
226 |
label=CITATION_BUTTON_LABEL,
|
227 |
+
lines=10, # Reduced lines slightly
|
228 |
elem_id="citation-button",
|
229 |
show_copy_button=True,
|
230 |
)
|
|
|
235 |
# scheduler.start()
|
236 |
|
237 |
# --- Launch the app ---
|
238 |
+
# Ensures the app launches only when the script is run directly
|
239 |
if __name__ == "__main__":
|
240 |
+
# Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
|
241 |
+
# Make sure your src module files (about.py etc.) are in the same directory or accessible in PYTHONPATH
|
242 |
demo.launch()
|