import os import pandas as pd import gradio as gr import logging import time # Import the new Settings, Evaluator, and Runner classes from settings import Settings from evaluator import Evaluator from runner import Runner # Configure logging logging.basicConfig(level=logging.INFO, force=True) logger = logging.getLogger(__name__) # Initialize settings, evaluator, and runner settings = Settings() evaluator = Evaluator(settings) runner = Runner(settings) LOGIN_MESSAGE = "Please Login to Hugging Face with the button." EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer']) def _format_elapsed_time(elapsed_time): """Formats elapsed time into minutes and seconds.""" minutes = int(elapsed_time // 60) seconds = elapsed_time % 60 if minutes > 0: return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds" else: return f"Elapsed time: {seconds:.2f} seconds" def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]: """ Helper function to run the agent on a list of questions and return status and results. """ start_time = time.time() logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.") # The runner handles the agent execution and saving of answers question_answer_pairs_df = runner.run_agent(questions_list, username) end_time = time.time() elapsed_time_str = _format_elapsed_time(end_time - start_time) message = f"Agent run complete. {elapsed_time_str}" logger.info(message) return message, question_answer_pairs_df def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]: """Runs the agent on one random question.""" if profile: try: question = evaluator.get_one_question() return _run_agent_on_questions([question], profile.username) except Exception as e: logger.error(f"Error getting one question: {e}") return f"Error getting question: {e}", EMPTY_RESULTS_TABLE else: return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]: """Runs the agent on all questions.""" if profile: try: questions = evaluator.get_questions() return _run_agent_on_questions(questions, profile.username) except Exception as e: logger.error(f"Error getting all questions: {e}") return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE else: return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE def submit(profile: gr.OAuthProfile | None) -> str: """Submits cached answers for evaluation.""" if profile: return evaluator.submit_answers(profile.username) else: return LOGIN_MESSAGE # --- Build Gradio Interface using Blocks --- with gr.Blocks() as demo: gr.Markdown("# GAIA Agent Evaluation Runner") gr.Markdown( """ **Instructions:** 1. Log in to your Hugging Face account using the button below. 2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all. 3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.** --- **Disclaimers:** * Running 'Get All Answers' can take significant time as the agent processes all 20 questions. * Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution. * The 'Submit Answers' button uses the most recent agent answers cached locally for your username. * **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally). """ ) gr.LoginButton() run_one_button = gr.Button("Get One Answer") run_all_button = gr.Button("Get All Answers") submit_button = gr.Button("Submit Answers") status_output = gr.Textbox( label="Run Status / Submission Result", lines=5, interactive=False) results_table = gr.DataFrame( label="Questions and Agent Answers", wrap=True) run_one_button.click( fn=run_one, outputs=[status_output, results_table] ) run_all_button.click( fn=run_all, outputs=[status_output, results_table] ) submit_button.click( fn=submit, outputs=[status_output] ) if __name__ == "__main__": logger.info("\n" + "-"*30 + " App Starting " + "-"*30) # Check for SPACE_HOST and SPACE_ID at startup for information space_host_startup = os.getenv("SPACE_HOST") space_id_startup = os.getenv("SPACE_ID") if space_host_startup: logger.info(f"✅ SPACE_HOST found: {space_host_startup}") logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space") else: logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).") if space_id_startup: logger.info(f"✅ SPACE_ID found: {space_id_startup}") logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") else: logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.") logger.info("-"*(60 + len(" App Starting ")) + "\n") logger.info("Launching Gradio Interface for GAIA Agent Evaluation...") demo.launch(debug=True, share=False)