File size: 3,615 Bytes
3bce169
cfc7eb3
10e9b7d
3bce169
4c200bf
188585a
3bce169
4c200bf
cfc7eb3
188585a
 
3bce169
188585a
4c200bf
188585a
 
4c200bf
 
 
 
 
188585a
 
 
 
 
4c200bf
188585a
4c200bf
188585a
4c200bf
 
 
188585a
 
4c200bf
188585a
 
4c200bf
188585a
4c200bf
188585a
4c200bf
188585a
 
 
 
4c200bf
 
 
 
188585a
4c200bf
188585a
4c200bf
 
 
 
 
 
188585a
3bce169
 
 
 
 
188585a
 
4c200bf
188585a
 
3bce169
 
188585a
 
 
 
 
3bce169
188585a
4c200bf
188585a
4c200bf
188585a
4c200bf
 
 
 
 
 
 
 
 
188585a
4c200bf
 
 
 
 
188585a
4c200bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# app.py

import os
import requests
import pandas as pd
import gradio as gr
from agent import answer_question
import asyncio

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class GAIALlamaAgent:
    def __init__(self):
        print("βœ… LangChain/LlamaIndex Agent initialized.")

    def __call__(self, question: str) -> str:
        print(f"πŸ“¨ Agent received: {question[:50]}...")
        try:
            return asyncio.run(answer_question(question))
        except Exception as e:
            return f"[ERROR] {str(e)}"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    username = profile.username if profile else None
    if not username:
        return "Please log in to Hugging Face.", None

    print(f"πŸ‘€ User: {username}")
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        print(f"πŸ“₯ Fetched {len(questions_data)} questions")
    except Exception as e:
        return f"❌ Error fetching questions: {e}", None

    agent = GAIALlamaAgent()
    answers_payload = []
    results_log = []

    for item in questions_data:
        qid = item.get("task_id")
        question = item.get("question")
        if not qid or not question:
            continue
        try:
            answer = agent(question)
        except Exception as e:
            answer = f"[AGENT ERROR] {e}"
        answers_payload.append({"task_id": qid, "submitted_answer": answer})
        results_log.append({"Task ID": qid, "Question": question, "Submitted Answer": answer})

    if not answers_payload:
        return "No answers to submit.", pd.DataFrame(results_log)

    submission_data = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        status = (
            f"βœ… Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score')}%\n"
            f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
            f"Message: {result_data.get('message')}"
        )
        return status, pd.DataFrame(results_log)
    except Exception as e:
        return f"❌ Submission failed: {e}", pd.DataFrame(results_log)

# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
    gr.Markdown("""
        # 🧠 GAIA Agent Evaluation

        This app runs a LlamaIndex + LangChain powered agent through the GAIA benchmark.

        1. Login to Hugging Face below
        2. Click **Run Evaluation** to test all questions
        3. Answers will be submitted and scored
        """)
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    print("\nπŸ” App Starting Up...")
    if os.getenv("SPACE_ID"):
        print(f"πŸ”— Space: https://huggingface.co/spaces/{os.getenv('SPACE_ID')}")
    demo.launch(debug=True)