hassenhamdi's picture
Update app.py
068bf30 verified
import os
import pandas as pd
import gradio as gr
import logging
import time
# Import the new Settings, Evaluator, and Runner classes
from settings import Settings
from evaluator import Evaluator
from runner import Runner
# Configure logging
logging.basicConfig(level=logging.INFO, force=True)
logger = logging.getLogger(__name__)
# Initialize settings, evaluator, and runner
settings = Settings()
evaluator = Evaluator(settings)
runner = Runner(settings)
LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
def _format_elapsed_time(elapsed_time):
"""Formats elapsed time into minutes and seconds."""
minutes = int(elapsed_time // 60)
seconds = elapsed_time % 60
if minutes > 0:
return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
else:
return f"Elapsed time: {seconds:.2f} seconds"
def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
"""
Helper function to run the agent on a list of questions and return status and results.
"""
start_time = time.time()
logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
# The runner handles the agent execution and saving of answers
question_answer_pairs_df = runner.run_agent(questions_list, username)
end_time = time.time()
elapsed_time_str = _format_elapsed_time(end_time - start_time)
message = f"Agent run complete. {elapsed_time_str}"
logger.info(message)
return message, question_answer_pairs_df
def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
"""Runs the agent on one random question."""
if profile:
try:
question = evaluator.get_one_question()
return _run_agent_on_questions([question], profile.username)
except Exception as e:
logger.error(f"Error getting one question: {e}")
return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
else:
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
"""Runs the agent on all questions."""
if profile:
try:
questions = evaluator.get_questions()
return _run_agent_on_questions(questions, profile.username)
except Exception as e:
logger.error(f"Error getting all questions: {e}")
return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
else:
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
def submit(profile: gr.OAuthProfile | None) -> str:
"""Submits cached answers for evaluation."""
if profile:
return evaluator.submit_answers(profile.username)
else:
return LOGIN_MESSAGE
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Log in to your Hugging Face account using the button below.
2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
---
**Disclaimers:**
* Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
* Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
* The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
* **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
"""
)
gr.LoginButton()
run_one_button = gr.Button("Get One Answer")
run_all_button = gr.Button("Get All Answers")
submit_button = gr.Button("Submit Answers")
status_output = gr.Textbox(
label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(
label="Questions and Agent Answers", wrap=True)
run_one_button.click(
fn=run_one, outputs=[status_output, results_table]
)
run_all_button.click(
fn=run_all, outputs=[status_output, results_table]
)
submit_button.click(
fn=submit, outputs=[status_output]
)
if __name__ == "__main__":
logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
# Check for SPACE_HOST and SPACE_ID at startup for information
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
logger.info(f"✅ SPACE_ID found: {space_id_startup}")
logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.")
logger.info("-"*(60 + len(" App Starting ")) + "\n")
logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
demo.launch(debug=True, share=False)