File size: 3,615 Bytes
3bce169 cfc7eb3 10e9b7d 3bce169 4c200bf 188585a 3bce169 4c200bf cfc7eb3 188585a 3bce169 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 3bce169 188585a 4c200bf 188585a 3bce169 188585a 3bce169 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf 188585a 4c200bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# app.py
import os
import requests
import pandas as pd
import gradio as gr
from agent import answer_question
import asyncio
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class GAIALlamaAgent:
def __init__(self):
print("β
LangChain/LlamaIndex Agent initialized.")
def __call__(self, question: str) -> str:
print(f"π¨ Agent received: {question[:50]}...")
try:
return asyncio.run(answer_question(question))
except Exception as e:
return f"[ERROR] {str(e)}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
username = profile.username if profile else None
if not username:
return "Please log in to Hugging Face.", None
print(f"π€ User: {username}")
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
print(f"π₯ Fetched {len(questions_data)} questions")
except Exception as e:
return f"β Error fetching questions: {e}", None
agent = GAIALlamaAgent()
answers_payload = []
results_log = []
for item in questions_data:
qid = item.get("task_id")
question = item.get("question")
if not qid or not question:
continue
try:
answer = agent(question)
except Exception as e:
answer = f"[AGENT ERROR] {e}"
answers_payload.append({"task_id": qid, "submitted_answer": answer})
results_log.append({"Task ID": qid, "Question": question, "Submitted Answer": answer})
if not answers_payload:
return "No answers to submit.", pd.DataFrame(results_log)
submission_data = {
"username": username,
"agent_code": agent_code,
"answers": answers_payload
}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
status = (
f"β
Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score')}%\n"
f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
f"Message: {result_data.get('message')}"
)
return status, pd.DataFrame(results_log)
except Exception as e:
return f"β Submission failed: {e}", pd.DataFrame(results_log)
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("""
# π§ GAIA Agent Evaluation
This app runs a LlamaIndex + LangChain powered agent through the GAIA benchmark.
1. Login to Hugging Face below
2. Click **Run Evaluation** to test all questions
3. Answers will be submitted and scored
""")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Status", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
print("\nπ App Starting Up...")
if os.getenv("SPACE_ID"):
print(f"π Space: https://huggingface.co/spaces/{os.getenv('SPACE_ID')}")
demo.launch(debug=True) |