kostis-init's picture
replace with simpler app
b5712a3
raw
history blame
10.3 kB
import gradio as gr
import pandas as pd
import os
import shutil
from pathlib import Path
import subprocess # For running eval.py
import time
import threading # For background tasks
import sys
# --- Configuration ---
SUBMISSIONS_DIR = "submissions"
RESULTS_DIR = "results"
EVAL_SCRIPT_PATH = "eval.py"
# --- Helper Functions ---
def setup_directories():
"""Creates the submissions and results directories if they don't exist."""
os.makedirs(SUBMISSIONS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)
if not os.listdir(RESULTS_DIR): # Add a placeholder if results is empty
initial_result_demo_path = Path(RESULTS_DIR) / "initial_example_result"
if not initial_result_demo_path.exists():
os.makedirs(initial_result_demo_path, exist_ok=True)
with open(initial_result_demo_path / "summary.txt", "w") as f:
f.write("This is a placeholder initial result.\nScore: 0\n")
print(f"Created a sample directory in '{RESULTS_DIR}' for demonstration.")
def load_leaderboard_data():
"""
Scans the RESULTS_DIR for subdirectories and returns a DataFrame.
Each subdirectory name is an entry. Tries to parse a 'Score' from 'summary.txt'.
"""
if not os.path.exists(RESULTS_DIR):
return pd.DataFrame(columns=["Result Directory", "Score", "Files"])
result_dirs = [d for d in os.listdir(RESULTS_DIR) if os.path.isdir(Path(RESULTS_DIR) / d)]
leaderboard_entries = []
# Sort by modification time of the directory (newest first)
# This requires getting mtime for each directory.
sorted_result_dirs = sorted(
result_dirs,
key=lambda d: (Path(RESULTS_DIR) / d).stat().st_mtime,
reverse=True
)
for dir_name in sorted_result_dirs:
entry = {"Result Directory": dir_name, "Score": "N/A", "Files": 0}
result_dir_path = Path(RESULTS_DIR) / dir_name
try:
entry["Files"] = len([f for f in os.listdir(result_dir_path) if os.path.isfile(result_dir_path / f)])
except Exception:
pass # Directory might have been removed during scan
summary_file = result_dir_path / "summary.txt"
if summary_file.exists():
try:
with open(summary_file, "r") as f:
for line in f:
if line.lower().startswith("score:"):
entry["Score"] = line.split(":", 1)[1].strip()
break
except Exception as e:
print(f"Error parsing summary for {dir_name}: {e}")
leaderboard_entries.append(entry)
if not leaderboard_entries:
return pd.DataFrame(columns=["Result Directory", "Score", "Files"])
return pd.DataFrame(leaderboard_entries)
def run_evaluation_in_background(submission_dir_path_str: str, results_dir_str: str, submission_name_for_log: str):
"""
This function runs eval.py in a subprocess. It's intended to be run in a separate thread.
Outputs from eval.py will go to the console where app.py is running.
"""
print(
f"BACKGROUND THREAD: Starting evaluation for '{submission_name_for_log}' using path '{submission_dir_path_str}'...")
if not Path(EVAL_SCRIPT_PATH).exists():
print(
f"BACKGROUND THREAD: CRITICAL ERROR - Evaluation script '{EVAL_SCRIPT_PATH}' not found. Eval aborted for '{submission_name_for_log}'.")
return
command = [sys.executable, EVAL_SCRIPT_PATH, submission_dir_path_str, results_dir_str]
try:
# Using subprocess.run which is simpler for blocking calls within this thread
process = subprocess.run(
command,
capture_output=True,
text=True,
check=False, # Handle non-zero exit codes manually
timeout=300 # 5-minute timeout for the evaluation script
)
eval_output = process.stdout.strip()
eval_error = process.stderr.strip()
print(
f"--- BACKGROUND Eval STDOUT ({submission_name_for_log}) ---\n{eval_output if eval_output else '<No stdout>'}")
if eval_error: # Only print stderr if it's not empty
print(f"--- BACKGROUND Eval STDERR ({submission_name_for_log}) ---\n{eval_error}")
if process.returncode == 0:
print(f"BACKGROUND THREAD: Evaluation successful for '{submission_name_for_log}'.")
else:
print(
f"BACKGROUND THREAD: Evaluation FAILED for '{submission_name_for_log}'. Script exit code: {process.returncode}")
except subprocess.TimeoutExpired:
print(f"BACKGROUND THREAD: Evaluation for '{submission_name_for_log}' TIMED OUT after 5 minutes.")
except FileNotFoundError: # This means 'python' or EVAL_SCRIPT_PATH could not be found by subprocess
print(
f"BACKGROUND THREAD: FileNotFoundError - Could not execute command. Ensure 'python' is in PATH and '{EVAL_SCRIPT_PATH}' is correct for '{submission_name_for_log}'.")
except Exception as e:
print(
f"BACKGROUND THREAD: An unexpected error occurred during evaluation for '{submission_name_for_log}': {str(e)}")
print(f"BACKGROUND THREAD: Finished evaluation attempt for '{submission_name_for_log}'.")
def handle_upload_and_kickoff_eval(uploaded_files_list, progress=gr.Progress(track_tqdm=True)):
"""
Handles directory upload, saves files, and starts eval.py in a background thread.
Yields a status message for the UI. The leaderboard updates separately.
"""
yield "Processing upload..." # Initial status
if not uploaded_files_list:
yield "No directory uploaded. Please select a directory."
return
try:
# Determine original uploaded directory name
first_temp_file_path = Path(uploaded_files_list[0].name)
original_uploaded_dir_name = first_temp_file_path.parent.name
submission_dir_path = Path(SUBMISSIONS_DIR) / original_uploaded_dir_name
# Handle potential name collision
if submission_dir_path.exists():
timestamp = time.strftime("%Y%m%d-%H%M%S")
descriptive_name_for_log_and_status = f"{original_uploaded_dir_name}_{timestamp}"
submission_dir_path = Path(SUBMISSIONS_DIR) / descriptive_name_for_log_and_status
status_update_msg = f"Directory '{original_uploaded_dir_name}' existed. Saving as '{descriptive_name_for_log_and_status}'."
original_uploaded_dir_name = descriptive_name_for_log_and_status # Use new name for logging
else:
descriptive_name_for_log_and_status = original_uploaded_dir_name
status_update_msg = f"Copying files for '{descriptive_name_for_log_and_status}'..."
os.makedirs(submission_dir_path, exist_ok=True)
progress(0.1, desc=status_update_msg)
for i, temp_file_obj in enumerate(progress.tqdm(uploaded_files_list, desc="Copying files")):
temp_file_path = Path(temp_file_obj.name)
file_name_in_dir = temp_file_path.name
target_file_path = submission_dir_path / file_name_in_dir
shutil.copy(str(temp_file_path), str(target_file_path))
upload_completion_msg = f"Upload of '{descriptive_name_for_log_and_status}' complete."
progress(0.8, desc=upload_completion_msg)
except Exception as e:
yield f"Error during upload: {str(e)}"
return
# --- Start evaluation in a background thread ---
if not Path(EVAL_SCRIPT_PATH).exists():
yield f"{upload_completion_msg} BUT CRITICAL ERROR: Evaluation script '{EVAL_SCRIPT_PATH}' not found. Evaluation cannot be started."
return
# Ensure paths passed to thread are absolute strings, good practice for threads.
abs_submission_path = str(submission_dir_path.resolve())
abs_results_path = str(Path(RESULTS_DIR).resolve())
eval_thread = threading.Thread(
target=run_evaluation_in_background,
args=(abs_submission_path, abs_results_path, descriptive_name_for_log_and_status),
daemon=True # Set as daemon so it exits when main app exits
)
eval_thread.start()
final_status_msg = (
f"{upload_completion_msg} Evaluation for '{descriptive_name_for_log_and_status}' has started in the background. "
"The leaderboard will auto-refresh (or use manual refresh)."
)
progress(1.0, desc="Background evaluation initiated.")
yield final_status_msg
# --- Create Directories ---
setup_directories()
# --- Gradio App Definition ---
with gr.Blocks(title="Background Submission, Evaluation, and Leaderboard") as demo:
gr.Markdown("# Background Submission, Evaluation & Results")
gr.Markdown(
f"Upload submissions (directories) to **'{SUBMISSIONS_DIR}'**. "
f"The evaluation script (`{EVAL_SCRIPT_PATH}`) will process them in the background. "
f"Results appear in **'{RESULTS_DIR}'**. The leaderboard auto-refreshes."
)
with gr.Row():
with gr.Column(scale=1): # Upload Column
gr.Markdown("## πŸ“€ Upload & Evaluate Submission")
upload_button = gr.UploadButton(
"Click to Upload Directory for Evaluation",
file_count="directory",
)
upload_status_textbox = gr.Textbox(label="Current Status", interactive=False, lines=4)
with gr.Column(scale=2): # Leaderboard Column
gr.Markdown("## πŸ† Results Leaderboard")
leaderboard_df_component = gr.DataFrame(
value=load_leaderboard_data, # Load initial data
label="Leaderboard (auto-refreshes)",
interactive=False,
# every=20 # Auto-refresh leaderboard data every 20 seconds
)
refresh_leaderboard_button = gr.Button("πŸ”„ Refresh Leaderboard Manually")
# --- Event Handlers ---
upload_button.upload(
fn=handle_upload_and_kickoff_eval,
inputs=[upload_button],
outputs=[upload_status_textbox], # Only one output now for the status message
show_progress="full"
)
refresh_leaderboard_button.click(
fn=load_leaderboard_data,
inputs=None,
outputs=[leaderboard_df_component]
)
if __name__ == "__main__":
demo.queue().launch()