|
import gradio as gr |
|
import pandas as pd |
|
import os |
|
import shutil |
|
from pathlib import Path |
|
import subprocess |
|
import time |
|
import threading |
|
import sys |
|
|
|
|
|
SUBMISSIONS_DIR = "submissions" |
|
RESULTS_DIR = "results" |
|
EVAL_SCRIPT_PATH = "eval.py" |
|
|
|
|
|
|
|
|
|
def setup_directories(): |
|
"""Creates the submissions and results directories if they don't exist.""" |
|
os.makedirs(SUBMISSIONS_DIR, exist_ok=True) |
|
os.makedirs(RESULTS_DIR, exist_ok=True) |
|
if not os.listdir(RESULTS_DIR): |
|
initial_result_demo_path = Path(RESULTS_DIR) / "initial_example_result" |
|
if not initial_result_demo_path.exists(): |
|
os.makedirs(initial_result_demo_path, exist_ok=True) |
|
with open(initial_result_demo_path / "summary.txt", "w") as f: |
|
f.write("This is a placeholder initial result.\nScore: 0\n") |
|
print(f"Created a sample directory in '{RESULTS_DIR}' for demonstration.") |
|
|
|
|
|
def load_leaderboard_data(): |
|
""" |
|
Scans the RESULTS_DIR for subdirectories and returns a DataFrame. |
|
Each subdirectory name is an entry. Tries to parse a 'Score' from 'summary.txt'. |
|
""" |
|
if not os.path.exists(RESULTS_DIR): |
|
return pd.DataFrame(columns=["Result Directory", "Score", "Files"]) |
|
|
|
result_dirs = [d for d in os.listdir(RESULTS_DIR) if os.path.isdir(Path(RESULTS_DIR) / d)] |
|
|
|
leaderboard_entries = [] |
|
|
|
|
|
sorted_result_dirs = sorted( |
|
result_dirs, |
|
key=lambda d: (Path(RESULTS_DIR) / d).stat().st_mtime, |
|
reverse=True |
|
) |
|
|
|
for dir_name in sorted_result_dirs: |
|
entry = {"Result Directory": dir_name, "Score": "N/A", "Files": 0} |
|
result_dir_path = Path(RESULTS_DIR) / dir_name |
|
|
|
try: |
|
entry["Files"] = len([f for f in os.listdir(result_dir_path) if os.path.isfile(result_dir_path / f)]) |
|
except Exception: |
|
pass |
|
|
|
summary_file = result_dir_path / "summary.txt" |
|
if summary_file.exists(): |
|
try: |
|
with open(summary_file, "r") as f: |
|
for line in f: |
|
if line.lower().startswith("score:"): |
|
entry["Score"] = line.split(":", 1)[1].strip() |
|
break |
|
except Exception as e: |
|
print(f"Error parsing summary for {dir_name}: {e}") |
|
|
|
leaderboard_entries.append(entry) |
|
|
|
if not leaderboard_entries: |
|
return pd.DataFrame(columns=["Result Directory", "Score", "Files"]) |
|
|
|
return pd.DataFrame(leaderboard_entries) |
|
|
|
|
|
def run_evaluation_in_background(submission_dir_path_str: str, results_dir_str: str, submission_name_for_log: str): |
|
""" |
|
This function runs eval.py in a subprocess. It's intended to be run in a separate thread. |
|
Outputs from eval.py will go to the console where app.py is running. |
|
""" |
|
print( |
|
f"BACKGROUND THREAD: Starting evaluation for '{submission_name_for_log}' using path '{submission_dir_path_str}'...") |
|
|
|
if not Path(EVAL_SCRIPT_PATH).exists(): |
|
print( |
|
f"BACKGROUND THREAD: CRITICAL ERROR - Evaluation script '{EVAL_SCRIPT_PATH}' not found. Eval aborted for '{submission_name_for_log}'.") |
|
return |
|
|
|
command = [sys.executable, EVAL_SCRIPT_PATH, submission_dir_path_str, results_dir_str] |
|
|
|
try: |
|
|
|
process = subprocess.run( |
|
command, |
|
capture_output=True, |
|
text=True, |
|
check=False, |
|
timeout=300 |
|
) |
|
|
|
eval_output = process.stdout.strip() |
|
eval_error = process.stderr.strip() |
|
|
|
print( |
|
f"--- BACKGROUND Eval STDOUT ({submission_name_for_log}) ---\n{eval_output if eval_output else '<No stdout>'}") |
|
if eval_error: |
|
print(f"--- BACKGROUND Eval STDERR ({submission_name_for_log}) ---\n{eval_error}") |
|
|
|
if process.returncode == 0: |
|
print(f"BACKGROUND THREAD: Evaluation successful for '{submission_name_for_log}'.") |
|
else: |
|
print( |
|
f"BACKGROUND THREAD: Evaluation FAILED for '{submission_name_for_log}'. Script exit code: {process.returncode}") |
|
|
|
except subprocess.TimeoutExpired: |
|
print(f"BACKGROUND THREAD: Evaluation for '{submission_name_for_log}' TIMED OUT after 5 minutes.") |
|
except FileNotFoundError: |
|
print( |
|
f"BACKGROUND THREAD: FileNotFoundError - Could not execute command. Ensure 'python' is in PATH and '{EVAL_SCRIPT_PATH}' is correct for '{submission_name_for_log}'.") |
|
except Exception as e: |
|
print( |
|
f"BACKGROUND THREAD: An unexpected error occurred during evaluation for '{submission_name_for_log}': {str(e)}") |
|
|
|
print(f"BACKGROUND THREAD: Finished evaluation attempt for '{submission_name_for_log}'.") |
|
|
|
|
|
def handle_upload_and_kickoff_eval(uploaded_files_list, progress=gr.Progress(track_tqdm=True)): |
|
""" |
|
Handles directory upload, saves files, and starts eval.py in a background thread. |
|
Yields a status message for the UI. The leaderboard updates separately. |
|
""" |
|
yield "Processing upload..." |
|
|
|
if not uploaded_files_list: |
|
yield "No directory uploaded. Please select a directory." |
|
return |
|
|
|
try: |
|
|
|
first_temp_file_path = Path(uploaded_files_list[0].name) |
|
original_uploaded_dir_name = first_temp_file_path.parent.name |
|
|
|
submission_dir_path = Path(SUBMISSIONS_DIR) / original_uploaded_dir_name |
|
|
|
|
|
if submission_dir_path.exists(): |
|
timestamp = time.strftime("%Y%m%d-%H%M%S") |
|
descriptive_name_for_log_and_status = f"{original_uploaded_dir_name}_{timestamp}" |
|
submission_dir_path = Path(SUBMISSIONS_DIR) / descriptive_name_for_log_and_status |
|
status_update_msg = f"Directory '{original_uploaded_dir_name}' existed. Saving as '{descriptive_name_for_log_and_status}'." |
|
original_uploaded_dir_name = descriptive_name_for_log_and_status |
|
else: |
|
descriptive_name_for_log_and_status = original_uploaded_dir_name |
|
status_update_msg = f"Copying files for '{descriptive_name_for_log_and_status}'..." |
|
|
|
os.makedirs(submission_dir_path, exist_ok=True) |
|
progress(0.1, desc=status_update_msg) |
|
|
|
for i, temp_file_obj in enumerate(progress.tqdm(uploaded_files_list, desc="Copying files")): |
|
temp_file_path = Path(temp_file_obj.name) |
|
file_name_in_dir = temp_file_path.name |
|
target_file_path = submission_dir_path / file_name_in_dir |
|
shutil.copy(str(temp_file_path), str(target_file_path)) |
|
|
|
upload_completion_msg = f"Upload of '{descriptive_name_for_log_and_status}' complete." |
|
progress(0.8, desc=upload_completion_msg) |
|
|
|
except Exception as e: |
|
yield f"Error during upload: {str(e)}" |
|
return |
|
|
|
|
|
if not Path(EVAL_SCRIPT_PATH).exists(): |
|
yield f"{upload_completion_msg} BUT CRITICAL ERROR: Evaluation script '{EVAL_SCRIPT_PATH}' not found. Evaluation cannot be started." |
|
return |
|
|
|
|
|
abs_submission_path = str(submission_dir_path.resolve()) |
|
abs_results_path = str(Path(RESULTS_DIR).resolve()) |
|
|
|
eval_thread = threading.Thread( |
|
target=run_evaluation_in_background, |
|
args=(abs_submission_path, abs_results_path, descriptive_name_for_log_and_status), |
|
daemon=True |
|
) |
|
eval_thread.start() |
|
|
|
final_status_msg = ( |
|
f"{upload_completion_msg} Evaluation for '{descriptive_name_for_log_and_status}' has started in the background. " |
|
"The leaderboard will auto-refresh (or use manual refresh)." |
|
) |
|
progress(1.0, desc="Background evaluation initiated.") |
|
yield final_status_msg |
|
|
|
|
|
|
|
setup_directories() |
|
|
|
|
|
with gr.Blocks(title="Background Submission, Evaluation, and Leaderboard") as demo: |
|
gr.Markdown("# Background Submission, Evaluation & Results") |
|
gr.Markdown( |
|
f"Upload submissions (directories) to **'{SUBMISSIONS_DIR}'**. " |
|
f"The evaluation script (`{EVAL_SCRIPT_PATH}`) will process them in the background. " |
|
f"Results appear in **'{RESULTS_DIR}'**. The leaderboard auto-refreshes." |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("## π€ Upload & Evaluate Submission") |
|
upload_button = gr.UploadButton( |
|
"Click to Upload Directory for Evaluation", |
|
file_count="directory", |
|
) |
|
upload_status_textbox = gr.Textbox(label="Current Status", interactive=False, lines=4) |
|
|
|
with gr.Column(scale=2): |
|
gr.Markdown("## π Results Leaderboard") |
|
leaderboard_df_component = gr.DataFrame( |
|
value=load_leaderboard_data, |
|
label="Leaderboard (auto-refreshes)", |
|
interactive=False, |
|
|
|
) |
|
refresh_leaderboard_button = gr.Button("π Refresh Leaderboard Manually") |
|
|
|
|
|
upload_button.upload( |
|
fn=handle_upload_and_kickoff_eval, |
|
inputs=[upload_button], |
|
outputs=[upload_status_textbox], |
|
show_progress="full" |
|
) |
|
|
|
refresh_leaderboard_button.click( |
|
fn=load_leaderboard_data, |
|
inputs=None, |
|
outputs=[leaderboard_df_component] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.queue().launch() |