Spaces:
Runtime error
Runtime error
File size: 5,572 Bytes
10e9b7d 3c4371f 4ca33ca 2f798b2 ceee7cf 068bf30 4ca33ca ceee7cf 4ca33ca ceee7cf 9c7bde9 068bf30 4ca33ca ceee7cf 068bf30 4ca33ca 33eedd4 4ca33ca 31243f4 4ca33ca 31243f4 4ca33ca 7e4a06b 4ca33ca e80aab9 4ca33ca e80aab9 4ca33ca e80aab9 4ca33ca e80aab9 4ca33ca 0ee0419 e514fd7 4ca33ca 068bf30 e514fd7 e80aab9 7e4a06b e80aab9 4ca33ca e80aab9 4ca33ca e80aab9 4ca33ca e80aab9 4ca33ca 3c4371f ceee7cf 7d65c66 3c4371f 4ca33ca 3c4371f 4ca33ca 7d65c66 ceee7cf 4ca33ca 7d65c66 4ca33ca 068bf30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import pandas as pd
import gradio as gr
import logging
import time
# Import the new Settings, Evaluator, and Runner classes
from settings import Settings
from evaluator import Evaluator
from runner import Runner
# Configure logging
logging.basicConfig(level=logging.INFO, force=True)
logger = logging.getLogger(__name__)
# Initialize settings, evaluator, and runner
settings = Settings()
evaluator = Evaluator(settings)
runner = Runner(settings)
LOGIN_MESSAGE = "Please Login to Hugging Face with the button."
EMPTY_RESULTS_TABLE = pd.DataFrame(columns=['task_id', 'question', 'answer'])
def _format_elapsed_time(elapsed_time):
"""Formats elapsed time into minutes and seconds."""
minutes = int(elapsed_time // 60)
seconds = elapsed_time % 60
if minutes > 0:
return f"Elapsed time: {minutes} minutes {seconds:.2f} seconds"
else:
return f"Elapsed time: {seconds:.2f} seconds"
def _run_agent_on_questions(questions_list: list, username: str) -> tuple[str, pd.DataFrame]:
"""
Helper function to run the agent on a list of questions and return status and results.
"""
start_time = time.time()
logger.info(f"Starting agent run for user: {username} on {len(questions_list)} questions.")
# The runner handles the agent execution and saving of answers
question_answer_pairs_df = runner.run_agent(questions_list, username)
end_time = time.time()
elapsed_time_str = _format_elapsed_time(end_time - start_time)
message = f"Agent run complete. {elapsed_time_str}"
logger.info(message)
return message, question_answer_pairs_df
def run_one(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
"""Runs the agent on one random question."""
if profile:
try:
question = evaluator.get_one_question()
return _run_agent_on_questions([question], profile.username)
except Exception as e:
logger.error(f"Error getting one question: {e}")
return f"Error getting question: {e}", EMPTY_RESULTS_TABLE
else:
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
def run_all(profile: gr.OAuthProfile | None) -> tuple[str, pd.DataFrame]:
"""Runs the agent on all questions."""
if profile:
try:
questions = evaluator.get_questions()
return _run_agent_on_questions(questions, profile.username)
except Exception as e:
logger.error(f"Error getting all questions: {e}")
return f"Error getting questions: {e}", EMPTY_RESULTS_TABLE
else:
return LOGIN_MESSAGE, EMPTY_RESULTS_TABLE
def submit(profile: gr.OAuthProfile | None) -> str:
"""Submits cached answers for evaluation."""
if profile:
return evaluator.submit_answers(profile.username)
else:
return LOGIN_MESSAGE
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Log in to your Hugging Face account using the button below.
2. Click 'Get One Answer' to run the agent on a random question or 'Get All Answers' to run all.
3. Click 'Submit Answers' to submit answers for evaluation. **Your HF username will be submitted for leaderboard tracking.**
---
**Disclaimers:**
* Running 'Get All Answers' can take significant time as the agent processes all 20 questions.
* Agent logs are detailed (DEBUG level) and may appear interleaved due to parallel execution.
* The 'Submit Answers' button uses the most recent agent answers cached locally for your username.
* **API Keys Required:** Ensure `GEMINI_API_KEY` is set as a Space Secret (or environment variable if running locally).
"""
)
gr.LoginButton()
run_one_button = gr.Button("Get One Answer")
run_all_button = gr.Button("Get All Answers")
submit_button = gr.Button("Submit Answers")
status_output = gr.Textbox(
label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(
label="Questions and Agent Answers", wrap=True)
run_one_button.click(
fn=run_one, outputs=[status_output, results_table]
)
run_all_button.click(
fn=run_all, outputs=[status_output, results_table]
)
submit_button.click(
fn=submit, outputs=[status_output]
)
if __name__ == "__main__":
logger.info("\n" + "-"*30 + " App Starting " + "-"*30)
# Check for SPACE_HOST and SPACE_ID at startup for information
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
logger.info(f"✅ SPACE_ID found: {space_id_startup}")
logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
logger.info("ℹ️ SPACE_ID environment variable not found. Repo URL cannot be determined.")
logger.info("-"*(60 + len(" App Starting ")) + "\n")
logger.info("Launching Gradio Interface for GAIA Agent Evaluation...")
demo.launch(debug=True, share=False) |