File size: 3,386 Bytes
51df914 cfc7eb3 10e9b7d 3bce169 4c200bf 188585a 51df914 f21f66c 51df914 cfc7eb3 188585a 3bce169 2c0ba2f 51df914 188585a 2c0ba2f f21f66c 2c0ba2f 188585a 2c0ba2f 188585a f21f66c 188585a 4c200bf 2c0ba2f 188585a 2c0ba2f 188585a 2c0ba2f 188585a 2c0ba2f 4c200bf 2c0ba2f f21f66c 2c0ba2f 188585a 2c0ba2f 188585a 4c200bf 2c0ba2f 4c200bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# app.py – przywrócony layout benchmarku z poprawionym wywołaniem agenta
import os
import requests
import pandas as pd
import gradio as gr
import asyncio
from agent import answer_question
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class GAIALlamaAgent:
def __call__(self, question: str) -> str:
return asyncio.run(answer_question(question))
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile or not profile.username:
return "Please Login to Hugging Face with the button.", None
username = profile.username.strip()
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
agent = GAIALlamaAgent()
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
try:
submitted_answer = agent(question_text)
except Exception as e:
submitted_answer = f"[ERROR] {e}"
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio Interface matching original benchmark ---
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown("""
**Instructions:**
1. Please clone this space and modify the agent logic.
2. Log in to Hugging Face with the button.
3. Click 'Run Evaluation & Submit All Answers' to run the full GAIA test.
""")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers")
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
print("\n===== Application Startup =====")
space_id = os.getenv("SPACE_ID")
if space_id:
print(f"🔗 Space: https://huggingface.co/spaces/{space_id}")
demo.launch(debug=True) |