Spaces:
Runtime error
Runtime error
import os | |
import pandas as pd | |
import gradio as gr | |
import logging | |
import time | |
# Import the new Settings, Evaluator, and Runner classes | |
from settings import Settings | |
from evaluator import Evaluator | |
from runner import Runner | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, force=True) | |
logger = logging.getLogger(__name__) | |
# Initialize settings, evaluator, and runner | |
settings = Settings() | |
evaluator = Evaluator(settings) | |
runner = Runner(settings) | |
LOGIN_MESSAGE = "Please Login to Hugging Face with the button." | |
EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer']) | |
def _format_elapsed_time(elapsed_time): | |
"""Formats elapsed time into minutes and seconds.""" | |
minutes = int(elapsed_time // 60) | |
seconds = elapsed_time % 60 | |
if minutes > 0: | |
return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds" | |
else: | |
return f"Elapsed time: {seconds:.2f} seconds" | |
def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]: | |
""" | |
Helper function to run the agent on a list of questions and return status and results. | |
""" | |
start_time = time.time() | |
logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.") | |
# The runner handles the agent execution and saving of answers | |
question_answer_pairs_df = runner.run_agent(questions_list, username) | |
end_time = time.time() | |
elapsed_time_str = _format_elapsed_time(end_time - start_time) | |
message = f"Agent run complete. {elapsed_time_str}" | |
logger.info(message) | |
return message, question_answer_pairs_df | |
def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]: | |
"""Runs the agent on one random question.""" | |
if profile: | |
try: | |
question = evaluator.get_one_question() | |
return _run_agent_on_questions([question], profile.username) | |
except Exception as e: | |
logger.error(f"Error getting one question: {e}") | |
return f"Error getting question: {e}", EMPTY_RESULTS_TABLE | |
else: | |
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE | |
def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]: | |
"""Runs the agent on all questions.""" | |
if profile: | |
try: | |
questions = evaluator.get_questions() | |
return _run_agent_on_questions(questions, profile.username) | |
except Exception as e: | |
logger.error(f"Error getting all questions: {e}") | |
return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE | |
else: | |
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE | |
def submit(profile: gr.OAuthProfile | None) -> str: | |
"""Submits cached answers for evaluation.""" | |
if profile: | |
return evaluator.submit_answers(profile.username) | |
else: | |
return LOGIN_MESSAGE | |
# --- Build Gradio Interface using Blocks --- | |
with gr.Blocks() as demo: | |
gr.Markdown("# GAIA Agent Evaluation Runner") | |
gr.Markdown( | |
""" | |
**Instructions:** | |
1. Log in to your Hugging Face account using the button below. | |
2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all. | |
3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.** | |
--- | |
**Disclaimers:** | |
* Running 'Get All Answers' can take significant time as the agent processes all 20 questions. | |
* Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution. | |
* The 'Submit Answers' button uses the most recent agent answers cached locally for your username. | |
* **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally). | |
""" | |
) | |
gr.LoginButton() | |
run_one_button = gr.Button("Get One Answer") | |
run_all_button = gr.Button("Get All Answers") | |
submit_button = gr.Button("Submit Answers") | |
status_output = gr.Textbox( | |
label="Run Status / Submission Result", lines=5, interactive=False) | |
results_table = gr.DataFrame( | |
label="Questions and Agent Answers", wrap=True) | |
run_one_button.click( | |
fn=run_one, outputs=[status_output, results_table] | |
) | |
run_all_button.click( | |
fn=run_all, outputs=[status_output, results_table] | |
) | |
submit_button.click( | |
fn=submit, outputs=[status_output] | |
) | |
if __name__ == "__main__": | |
logger.info("\n" + "-"*30 + " App Starting " + "-"*30) | |
# Check for SPACE_HOST and SPACE_ID at startup for information | |
space_host_startup = os.getenv("SPACE_HOST") | |
space_id_startup = os.getenv("SPACE_ID") | |
if space_host_startup: | |
logger.info(f"✅ SPACE_HOST found: {space_host_startup}") | |
logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space") | |
else: | |
logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).") | |
if space_id_startup: | |
logger.info(f"✅ SPACE_ID found: {space_id_startup}") | |
logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") | |
logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") | |
else: | |
logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.") | |
logger.info("-"*(60 + len(" App Starting ")) + "\n") | |
logger.info("Launching Gradio Interface for GAIA Agent Evaluation...") | |
demo.launch(debug=True, share=False) |