hassenhamdi's picture
Update app.py
e205d28 verified
raw
history blame
5.57 kB
import os
import pandas as pd
import gradio as gr
import logging
import time
from settings import Settings
from evaluator import Evaluator
from runner import Runner
# Configure logging
logging.basicConfig(level=logging.INFO, force=True)
logger = logging.getLogger(__name__)
settings = Settings(
username=os.getenv("USERNAME"),
space_id=os.getenv("SPACE_ID")
)
evaluator = Evaluator(settings)
runner = Runner(settings)
LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
def _format_elapsed_time(elapsed_time):
"""Formats elapsed time into minutes and seconds."""
minutes = int(elapsed_time // 60)
seconds = elapsed_time % 60
if minutes > 0:
return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
else:
return f"Elapsed time: {seconds:.2f} seconds"
def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
"""
Helper function to run the agent on a list of questions and return status and results.
"""
start_time = time.time()
logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
# The runner handles the agent execution and saving of answers
question_answer_pairs_df = runner.run_agent(questions_list, username)
end_time = time.time()
elapsed_time_str = _format_elapsed_time(end_time - start_time)
message = f"Agent run complete. {elapsed_time_str}"
logger.info(message)
return message, question_answer_pairs_df
def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
"""Runs the agent on one random question."""
if profile:
try:
question = evaluator.get_one_question()
return _run_agent_on_questions([question], profile.username)
except Exception as e:
logger.error(f"Error getting one question: {e}")
return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
else:
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
"""Runs the agent on all questions."""
if profile:
try:
questions = evaluator.get_questions()
return _run_agent_on_questions(questions, profile.username)
except Exception as e:
logger.error(f"Error getting all questions: {e}")
return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
else:
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
def submit(profile: gr.OAuthProfile | None) -> str:
"""Submits cached answers for evaluation."""
if profile:
return evaluator.submit_answers(profile.username)
else:
return LOGIN_MESSAGE
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Log in to your Hugging Face account using the button below.
2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
---
**Disclaimers:**
* Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
* Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
* The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
* **API Keys Required:** Ensure `GEMINI_API_KEY`, `USERNAME`, and `SPACE_ID` are set as Space Secrets (or environment variables if running locally).
"""
)
gr.LoginButton()
run_one_button = gr.Button("Get One Answer")
run_all_button = gr.Button("Get All Answers")
submit_button = gr.Button("Submit Answers")
status_output = gr.Textbox(
label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(
label="Questions and Agent Answers", wrap=True)
run_one_button.click(
fn=run_one, outputs=[status_output, results_table]
)
run_all_button.click(
fn=run_all, outputs=[status_output, results_table]
)
submit_button.click(
fn=submit, outputs=[status_output]
)
if __name__ == "__main__":
logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
# Check for SPACE_HOST and SPACE_ID at startup for information
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
logger.info(f"✅ SPACE_ID found: {space_id_startup}")
logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.")
logger.info("-"*(60 + len(" App Starting ")) + "\n")
logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
demo.launch(debug=True, share=False)