Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -290,12 +290,12 @@ def evaluate_scientific_submission(
|
|
290 |
description: str,
|
291 |
detected_category: str,
|
292 |
validation_info: Dict,
|
293 |
-
) -> Tuple[str, pd.DataFrame, object, object
|
294 |
"""Evaluate validated predictions using scientific methodology."""
|
295 |
|
296 |
try:
|
297 |
if predictions_df is None:
|
298 |
-
return "β No valid predictions to evaluate", None, None, None
|
299 |
|
300 |
# Get complete test set with targets
|
301 |
global complete_test_set, current_leaderboard
|
@@ -310,7 +310,7 @@ def evaluate_scientific_submission(
|
|
310 |
|
311 |
if any(track_data.get('error') for track_data in evaluation_results.get('tracks', {}).values()):
|
312 |
errors = [track_data['error'] for track_data in evaluation_results['tracks'].values() if track_data.get('error')]
|
313 |
-
return f"β Evaluation errors: {'; '.join(errors)}", None, None, None
|
314 |
|
315 |
# Add to scientific leaderboard
|
316 |
print("π Adding to scientific leaderboard...")
|
@@ -387,11 +387,11 @@ def evaluate_scientific_submission(
|
|
387 |
{report}
|
388 |
"""
|
389 |
|
390 |
-
return success_msg, display_leaderboard, summary_plot, cross_track_plot
|
391 |
|
392 |
except Exception as e:
|
393 |
error_msg = f"β Scientific evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
394 |
-
return error_msg, None, None, None
|
395 |
|
396 |
def refresh_track_leaderboard(
|
397 |
track: str,
|
@@ -1217,7 +1217,7 @@ with gr.Blocks(
|
|
1217 |
# Submit for evaluation
|
1218 |
def handle_scientific_submission(predictions, model_name, author, description, category, validation_info):
|
1219 |
if predictions is None:
|
1220 |
-
return "β Please validate your submission first", None, None, None
|
1221 |
|
1222 |
return evaluate_scientific_submission(
|
1223 |
predictions, model_name, author, description, category, validation_info
|
@@ -1226,7 +1226,7 @@ with gr.Blocks(
|
|
1226 |
submit_btn.click(
|
1227 |
fn=handle_scientific_submission,
|
1228 |
inputs=[predictions_validated, model_name_input, author_input, description_input, detected_category_state, validation_info_state],
|
1229 |
-
outputs=[evaluation_output, results_table, submission_plot, cross_track_plot
|
1230 |
)
|
1231 |
|
1232 |
# Track leaderboard refresh functions
|
@@ -1274,27 +1274,31 @@ with gr.Blocks(
|
|
1274 |
outputs=[comparison_output, comparison_plot]
|
1275 |
)
|
1276 |
|
1277 |
-
#
|
1278 |
-
def
|
|
|
|
|
|
|
|
|
1279 |
if current_leaderboard is not None and not current_leaderboard.empty:
|
1280 |
model_choices = current_leaderboard['model_name'].tolist()
|
1281 |
else:
|
1282 |
model_choices = []
|
1283 |
|
1284 |
return (
|
1285 |
-
|
1286 |
-
|
|
|
|
|
|
|
|
|
1287 |
)
|
1288 |
|
1289 |
-
# Load initial data and update dropdowns
|
1290 |
demo.load(
|
1291 |
-
fn=
|
1292 |
-
refresh_google_track("", "all", 0.0),
|
1293 |
-
update_dropdown_choices()
|
1294 |
-
),
|
1295 |
outputs=[
|
1296 |
-
|
1297 |
-
|
1298 |
]
|
1299 |
)
|
1300 |
|
|
|
290 |
description: str,
|
291 |
detected_category: str,
|
292 |
validation_info: Dict,
|
293 |
+
) -> Tuple[str, pd.DataFrame, object, object]:
|
294 |
"""Evaluate validated predictions using scientific methodology."""
|
295 |
|
296 |
try:
|
297 |
if predictions_df is None:
|
298 |
+
return "β No valid predictions to evaluate", None, None, None
|
299 |
|
300 |
# Get complete test set with targets
|
301 |
global complete_test_set, current_leaderboard
|
|
|
310 |
|
311 |
if any(track_data.get('error') for track_data in evaluation_results.get('tracks', {}).values()):
|
312 |
errors = [track_data['error'] for track_data in evaluation_results['tracks'].values() if track_data.get('error')]
|
313 |
+
return f"β Evaluation errors: {'; '.join(errors)}", None, None, None
|
314 |
|
315 |
# Add to scientific leaderboard
|
316 |
print("π Adding to scientific leaderboard...")
|
|
|
387 |
{report}
|
388 |
"""
|
389 |
|
390 |
+
return success_msg, display_leaderboard, summary_plot, cross_track_plot
|
391 |
|
392 |
except Exception as e:
|
393 |
error_msg = f"β Scientific evaluation failed: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
394 |
+
return error_msg, None, None, None
|
395 |
|
396 |
def refresh_track_leaderboard(
|
397 |
track: str,
|
|
|
1217 |
# Submit for evaluation
|
1218 |
def handle_scientific_submission(predictions, model_name, author, description, category, validation_info):
|
1219 |
if predictions is None:
|
1220 |
+
return "β Please validate your submission first", None, None, None
|
1221 |
|
1222 |
return evaluate_scientific_submission(
|
1223 |
predictions, model_name, author, description, category, validation_info
|
|
|
1226 |
submit_btn.click(
|
1227 |
fn=handle_scientific_submission,
|
1228 |
inputs=[predictions_validated, model_name_input, author_input, description_input, detected_category_state, validation_info_state],
|
1229 |
+
outputs=[evaluation_output, results_table, submission_plot, cross_track_plot]
|
1230 |
)
|
1231 |
|
1232 |
# Track leaderboard refresh functions
|
|
|
1274 |
outputs=[comparison_output, comparison_plot]
|
1275 |
)
|
1276 |
|
1277 |
+
# Load initial data and update dropdowns
|
1278 |
+
def load_initial_data():
|
1279 |
+
# Load initial Google track data
|
1280 |
+
google_data = refresh_google_track("", "all", 0.0)
|
1281 |
+
|
1282 |
+
# Update dropdown choices
|
1283 |
if current_leaderboard is not None and not current_leaderboard.empty:
|
1284 |
model_choices = current_leaderboard['model_name'].tolist()
|
1285 |
else:
|
1286 |
model_choices = []
|
1287 |
|
1288 |
return (
|
1289 |
+
google_data[0], # google_leaderboard
|
1290 |
+
google_data[1], # google_ranking_plot
|
1291 |
+
google_data[2], # google_comparison_plot
|
1292 |
+
google_data[3], # google_stats
|
1293 |
+
gr.Dropdown(choices=model_choices), # model_select
|
1294 |
+
gr.CheckboxGroup(choices=model_choices) # comparison_models
|
1295 |
)
|
1296 |
|
|
|
1297 |
demo.load(
|
1298 |
+
fn=load_initial_data,
|
|
|
|
|
|
|
1299 |
outputs=[
|
1300 |
+
google_leaderboard, google_ranking_plot, google_comparison_plot, google_stats,
|
1301 |
+
model_select, comparison_models
|
1302 |
]
|
1303 |
)
|
1304 |
|