File size: 10,337 Bytes
36ea38b
 
b5712a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36ea38b
 
b5712a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36ea38b
b5712a3
 
 
 
 
 
cd74708
36ea38b
b5712a3
 
36ea38b
b5712a3
 
 
 
 
 
 
 
36ea38b
b5712a3
 
 
cd74708
b5712a3
cd74708
2862155
b5712a3
 
 
 
 
 
 
 
 
36ea38b
b5712a3
 
 
 
 
 
 
 
 
36ea38b
b5712a3
 
 
 
 
36ea38b
b5712a3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import gradio as gr
import pandas as pd
import os
import shutil
from pathlib import Path
import subprocess  # For running eval.py
import time
import threading  # For background tasks
import sys

# --- Configuration ---
SUBMISSIONS_DIR = "submissions"
RESULTS_DIR = "results"
EVAL_SCRIPT_PATH = "eval.py"


# --- Helper Functions ---

def setup_directories():
    """Creates the submissions and results directories if they don't exist."""
    os.makedirs(SUBMISSIONS_DIR, exist_ok=True)
    os.makedirs(RESULTS_DIR, exist_ok=True)
    if not os.listdir(RESULTS_DIR):  # Add a placeholder if results is empty
        initial_result_demo_path = Path(RESULTS_DIR) / "initial_example_result"
        if not initial_result_demo_path.exists():
            os.makedirs(initial_result_demo_path, exist_ok=True)
            with open(initial_result_demo_path / "summary.txt", "w") as f:
                f.write("This is a placeholder initial result.\nScore: 0\n")
            print(f"Created a sample directory in '{RESULTS_DIR}' for demonstration.")


def load_leaderboard_data():
    """
    Scans the RESULTS_DIR for subdirectories and returns a DataFrame.
    Each subdirectory name is an entry. Tries to parse a 'Score' from 'summary.txt'.
    """
    if not os.path.exists(RESULTS_DIR):
        return pd.DataFrame(columns=["Result Directory", "Score", "Files"])

    result_dirs = [d for d in os.listdir(RESULTS_DIR) if os.path.isdir(Path(RESULTS_DIR) / d)]

    leaderboard_entries = []
    # Sort by modification time of the directory (newest first)
    # This requires getting mtime for each directory.
    sorted_result_dirs = sorted(
        result_dirs,
        key=lambda d: (Path(RESULTS_DIR) / d).stat().st_mtime,
        reverse=True
    )

    for dir_name in sorted_result_dirs:
        entry = {"Result Directory": dir_name, "Score": "N/A", "Files": 0}
        result_dir_path = Path(RESULTS_DIR) / dir_name

        try:
            entry["Files"] = len([f for f in os.listdir(result_dir_path) if os.path.isfile(result_dir_path / f)])
        except Exception:
            pass  # Directory might have been removed during scan

        summary_file = result_dir_path / "summary.txt"
        if summary_file.exists():
            try:
                with open(summary_file, "r") as f:
                    for line in f:
                        if line.lower().startswith("score:"):
                            entry["Score"] = line.split(":", 1)[1].strip()
                            break
            except Exception as e:
                print(f"Error parsing summary for {dir_name}: {e}")

        leaderboard_entries.append(entry)

    if not leaderboard_entries:
        return pd.DataFrame(columns=["Result Directory", "Score", "Files"])

    return pd.DataFrame(leaderboard_entries)


def run_evaluation_in_background(submission_dir_path_str: str, results_dir_str: str, submission_name_for_log: str):
    """
    This function runs eval.py in a subprocess. It's intended to be run in a separate thread.
    Outputs from eval.py will go to the console where app.py is running.
    """
    print(
        f"BACKGROUND THREAD: Starting evaluation for '{submission_name_for_log}' using path '{submission_dir_path_str}'...")

    if not Path(EVAL_SCRIPT_PATH).exists():
        print(
            f"BACKGROUND THREAD: CRITICAL ERROR - Evaluation script '{EVAL_SCRIPT_PATH}' not found. Eval aborted for '{submission_name_for_log}'.")
        return

    command = [sys.executable, EVAL_SCRIPT_PATH, submission_dir_path_str, results_dir_str]

    try:
        # Using subprocess.run which is simpler for blocking calls within this thread
        process = subprocess.run(
            command,
            capture_output=True,
            text=True,
            check=False,  # Handle non-zero exit codes manually
            timeout=300  # 5-minute timeout for the evaluation script
        )

        eval_output = process.stdout.strip()
        eval_error = process.stderr.strip()

        print(
            f"--- BACKGROUND Eval STDOUT ({submission_name_for_log}) ---\n{eval_output if eval_output else '<No stdout>'}")
        if eval_error:  # Only print stderr if it's not empty
            print(f"--- BACKGROUND Eval STDERR ({submission_name_for_log}) ---\n{eval_error}")

        if process.returncode == 0:
            print(f"BACKGROUND THREAD: Evaluation successful for '{submission_name_for_log}'.")
        else:
            print(
                f"BACKGROUND THREAD: Evaluation FAILED for '{submission_name_for_log}'. Script exit code: {process.returncode}")

    except subprocess.TimeoutExpired:
        print(f"BACKGROUND THREAD: Evaluation for '{submission_name_for_log}' TIMED OUT after 5 minutes.")
    except FileNotFoundError:  # This means 'python' or EVAL_SCRIPT_PATH could not be found by subprocess
        print(
            f"BACKGROUND THREAD: FileNotFoundError - Could not execute command. Ensure 'python' is in PATH and '{EVAL_SCRIPT_PATH}' is correct for '{submission_name_for_log}'.")
    except Exception as e:
        print(
            f"BACKGROUND THREAD: An unexpected error occurred during evaluation for '{submission_name_for_log}': {str(e)}")

    print(f"BACKGROUND THREAD: Finished evaluation attempt for '{submission_name_for_log}'.")


def handle_upload_and_kickoff_eval(uploaded_files_list, progress=gr.Progress(track_tqdm=True)):
    """
    Handles directory upload, saves files, and starts eval.py in a background thread.
    Yields a status message for the UI. The leaderboard updates separately.
    """
    yield "Processing upload..."  # Initial status

    if not uploaded_files_list:
        yield "No directory uploaded. Please select a directory."
        return

    try:
        # Determine original uploaded directory name
        first_temp_file_path = Path(uploaded_files_list[0].name)
        original_uploaded_dir_name = first_temp_file_path.parent.name

        submission_dir_path = Path(SUBMISSIONS_DIR) / original_uploaded_dir_name

        # Handle potential name collision
        if submission_dir_path.exists():
            timestamp = time.strftime("%Y%m%d-%H%M%S")
            descriptive_name_for_log_and_status = f"{original_uploaded_dir_name}_{timestamp}"
            submission_dir_path = Path(SUBMISSIONS_DIR) / descriptive_name_for_log_and_status
            status_update_msg = f"Directory '{original_uploaded_dir_name}' existed. Saving as '{descriptive_name_for_log_and_status}'."
            original_uploaded_dir_name = descriptive_name_for_log_and_status  # Use new name for logging
        else:
            descriptive_name_for_log_and_status = original_uploaded_dir_name
            status_update_msg = f"Copying files for '{descriptive_name_for_log_and_status}'..."

        os.makedirs(submission_dir_path, exist_ok=True)
        progress(0.1, desc=status_update_msg)

        for i, temp_file_obj in enumerate(progress.tqdm(uploaded_files_list, desc="Copying files")):
            temp_file_path = Path(temp_file_obj.name)
            file_name_in_dir = temp_file_path.name
            target_file_path = submission_dir_path / file_name_in_dir
            shutil.copy(str(temp_file_path), str(target_file_path))

        upload_completion_msg = f"Upload of '{descriptive_name_for_log_and_status}' complete."
        progress(0.8, desc=upload_completion_msg)

    except Exception as e:
        yield f"Error during upload: {str(e)}"
        return

    # --- Start evaluation in a background thread ---
    if not Path(EVAL_SCRIPT_PATH).exists():
        yield f"{upload_completion_msg} BUT CRITICAL ERROR: Evaluation script '{EVAL_SCRIPT_PATH}' not found. Evaluation cannot be started."
        return

    # Ensure paths passed to thread are absolute strings, good practice for threads.
    abs_submission_path = str(submission_dir_path.resolve())
    abs_results_path = str(Path(RESULTS_DIR).resolve())

    eval_thread = threading.Thread(
        target=run_evaluation_in_background,
        args=(abs_submission_path, abs_results_path, descriptive_name_for_log_and_status),
        daemon=True  # Set as daemon so it exits when main app exits
    )
    eval_thread.start()

    final_status_msg = (
        f"{upload_completion_msg} Evaluation for '{descriptive_name_for_log_and_status}' has started in the background. "
        "The leaderboard will auto-refresh (or use manual refresh)."
    )
    progress(1.0, desc="Background evaluation initiated.")
    yield final_status_msg


# --- Create Directories ---
setup_directories()

# --- Gradio App Definition ---
with gr.Blocks(title="Background Submission, Evaluation, and Leaderboard") as demo:
    gr.Markdown("# Background Submission, Evaluation & Results")
    gr.Markdown(
        f"Upload submissions (directories) to **'{SUBMISSIONS_DIR}'**. "
        f"The evaluation script (`{EVAL_SCRIPT_PATH}`) will process them in the background. "
        f"Results appear in **'{RESULTS_DIR}'**. The leaderboard auto-refreshes."
    )

    with gr.Row():
        with gr.Column(scale=1):  # Upload Column
            gr.Markdown("## πŸ“€ Upload & Evaluate Submission")
            upload_button = gr.UploadButton(
                "Click to Upload Directory for Evaluation",
                file_count="directory",
            )
            upload_status_textbox = gr.Textbox(label="Current Status", interactive=False, lines=4)

        with gr.Column(scale=2):  # Leaderboard Column
            gr.Markdown("## πŸ† Results Leaderboard")
            leaderboard_df_component = gr.DataFrame(
                value=load_leaderboard_data,  # Load initial data
                label="Leaderboard (auto-refreshes)",
                interactive=False,
                # every=20  # Auto-refresh leaderboard data every 20 seconds
            )
            refresh_leaderboard_button = gr.Button("πŸ”„ Refresh Leaderboard Manually")

    # --- Event Handlers ---
    upload_button.upload(
        fn=handle_upload_and_kickoff_eval,
        inputs=[upload_button],
        outputs=[upload_status_textbox],  # Only one output now for the status message
        show_progress="full"
    )

    refresh_leaderboard_button.click(
        fn=load_leaderboard_data,
        inputs=None,
        outputs=[leaderboard_df_component]
    )

if __name__ == "__main__":
    demo.queue().launch()