File size: 3,904 Bytes
17268b7
cfc7eb3
10e9b7d
3bce169
4c200bf
188585a
4c200bf
17268b7
 
cfc7eb3
188585a
 
3bce169
17268b7
 
 
 
 
 
 
188585a
17268b7
188585a
 
 
 
 
17268b7
188585a
 
4c200bf
 
 
188585a
 
4c200bf
188585a
 
 
4c200bf
188585a
4c200bf
188585a
 
 
 
4c200bf
 
 
 
188585a
4c200bf
188585a
4c200bf
 
 
 
3bce169
 
 
 
 
188585a
 
4c200bf
188585a
 
3bce169
 
188585a
 
 
 
 
3bce169
188585a
4c200bf
188585a
17268b7
188585a
4c200bf
 
 
17268b7
 
188585a
4c200bf
17268b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188585a
4c200bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# app.py β€” updated for GAIA tools (file upload for audio/Excel)

import os
import requests
import pandas as pd
import gradio as gr
import asyncio
import tempfile
from agent import answer_question, transcribe_audio, extract_excel_total_food_sales

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class GAIALlamaAgent:
    def __call__(self, question: str, file_path: str = None) -> str:
        # Shortcut logic: if file exists and question matches specific types
        if file_path:
            if "mp3" in file_path:
                return transcribe_audio(file_path)
            elif "xlsx" in file_path or "xls" in file_path:
                return extract_excel_total_food_sales(file_path)

        return asyncio.run(answer_question(question))

def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    username = profile.username if profile else None
    if not username:
        return "Please login to Hugging Face.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"❌ Error fetching questions: {e}", None

    agent = GAIALlamaAgent()
    answers_payload = []
    results_log = []

    for item in questions_data:
        qid = item.get("task_id")
        question = item.get("question")
        if not qid or not question:
            continue
        try:
            answer = agent(question)
        except Exception as e:
            answer = f"[AGENT ERROR] {e}"
        answers_payload.append({"task_id": qid, "submitted_answer": answer})
        results_log.append({"Task ID": qid, "Question": question, "Submitted Answer": answer})

    submission_data = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        status = (
            f"βœ… Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score')}%\n"
            f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
            f"Message: {result_data.get('message')}"
        )
        return status, pd.DataFrame(results_log)
    except Exception as e:
        return f"❌ Submission failed: {e}", pd.DataFrame(results_log)

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("""
        # 🧠 GAIA Agent Evaluation

        Upload your files if needed and evaluate the agent's answers.
    """)
    gr.LoginButton()

    with gr.Row():
        question_box = gr.Textbox(label="Manual Question (optional)", lines=3)
        file_input = gr.File(label="Optional File (.mp3 or .xlsx)", file_types=[".mp3", ".xlsx"])
        submit_btn = gr.Button("Ask Agent")

    output_text = gr.Textbox(label="Answer")
    submit_btn.click(
        fn=lambda q, f: GAIALlamaAgent()(q, f.name if f else None),
        inputs=[question_box, file_input],
        outputs=output_text
    )

    gr.Markdown("## Or run full benchmark submission")
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    run_out = gr.Textbox(label="Status", lines=4)
    run_table = gr.DataFrame(label="Questions and Agent Answers")
    run_btn.click(fn=run_and_submit_all, outputs=[run_out, run_table])

if __name__ == "__main__":
    print("\nπŸ” App Starting Up...")
    if os.getenv("SPACE_ID"):
        print(f"πŸ”— Space: https://huggingface.co/spaces/{os.getenv('SPACE_ID')}")
    demo.launch(debug=True)