dawid-lorek commited on
Commit
88fa1a5
·
verified ·
1 Parent(s): 51df914

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +76 -85
agent.py CHANGED
@@ -1,99 +1,90 @@
1
- # agent.py full GAIA-ready agent with working WikipediaQueryRun + tools
2
 
3
  import os
 
 
 
4
  import asyncio
5
 
6
- from llama_index.llms.openai import OpenAI
7
- from llama_index.core.agent.react.base import ReActAgent
8
- from llama_index.core.tools import FunctionTool
9
 
10
- from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
11
- from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
12
- from langchain_experimental.tools.python.tool import PythonREPLTool
13
- from langchain_community.document_loaders import YoutubeLoader
14
 
15
- import whisper
16
- import openpyxl
 
17
 
18
- # Check OpenAI key
19
- if os.getenv("OPENAI_API_KEY"):
20
- print("✅ Detected OPENAI_API_KEY")
21
- else:
22
- print("⚠️ Missing OPENAI_API_KEY – LLM may fail")
23
 
24
- # Tools definitions
 
 
 
25
 
26
- api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
27
- def wikipedia_search(query: str) -> str:
28
- return WikipediaQueryRun(api_wrapper=api_wrapper).run({"query": query})
29
-
30
- def run_python_with_output(code: str) -> str:
31
- if "print(" not in code:
32
- code = f"print({code})"
33
- return PythonREPLTool().run(code)
34
-
35
- def get_youtube_transcript(url: str) -> str:
36
- try:
37
- loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
38
- docs = loader.load()
39
- return " ".join(d.page_content for d in docs)
40
- except Exception as e:
41
- return "[YOUTUBE ERROR] " + str(e)
42
-
43
- def transcribe_audio(file_path: str) -> str:
44
  try:
45
- model = whisper.load_model("base")
46
- res = model.transcribe(file_path)
47
- return res["text"]
48
  except Exception as e:
49
- return "[AUDIO ERROR] " + str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- def extract_excel_total_food_sales(file_path: str) -> str:
52
- try:
53
- wb = openpyxl.load_workbook(file_path)
54
- sheet = wb.active
55
- total = 0.0
56
- for _, category, amount in sheet.iter_rows(min_row=2, values_only=True):
57
- if isinstance(category, str) and "food" in category.lower():
58
- total += float(amount or 0)
59
- return f"${total:.2f}"
60
- except Exception as e:
61
- return "[EXCEL ERROR] " + str(e)
62
-
63
- # Assemble tools
64
- TOOLS = [
65
- FunctionTool.from_defaults(wikipedia_search),
66
- FunctionTool.from_defaults(run_python_with_output),
67
- FunctionTool.from_defaults(get_youtube_transcript),
68
- FunctionTool.from_defaults(transcribe_audio),
69
- FunctionTool.from_defaults(extract_excel_total_food_sales),
70
- ]
71
-
72
- # LLM and Agent
73
- llm = OpenAI(model="gpt-4")
74
- agent = ReActAgent.from_tools(
75
- tools=TOOLS,
76
- llm=llm,
77
- verbose=True,
78
- system_prompt="""
79
- You are an expert AI assistant on the GAIA benchmark.
80
-
81
- Use available tools (Wikipedia, Python, YouTube transcript, audio, Excel).
82
- Output ONLY the final answer. No reasoning or commentary.
83
- Format exactly as requested (list, number, name, chess move, currency).
84
- If tool fails, output "Tool not available".
85
- """,
86
- )
87
-
88
- def answer_question_sync(question: str) -> str:
89
  try:
90
- resp = agent.chat(question)
91
- if hasattr(resp, "response") and hasattr(resp.response, "content"):
92
- return resp.response.content.strip()
93
- return str(resp).strip()
 
 
 
 
 
 
 
94
  except Exception as e:
95
- print(" Agent exception:", e)
96
- return "[ERROR] " + str(e)
97
-
98
- async def answer_question(question: str) -> str:
99
- return answer_question_sync(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py przywrócony layout benchmarku z poprawionym wywołaniem agenta
2
 
3
  import os
4
+ import requests
5
+ import pandas as pd
6
+ import gradio as gr
7
  import asyncio
8
 
9
+ from agent import answer_question
 
 
10
 
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
12
 
13
+ class GAIALlamaAgent:
14
+ def __call__(self, question: str) -> str:
15
+ return asyncio.run(answer_question(question))
16
 
17
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
18
+ space_id = os.getenv("SPACE_ID")
19
+ if not profile or not profile.username:
20
+ return "Please Login to Hugging Face with the button.", None
 
21
 
22
+ username = profile.username.strip()
23
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
24
+ questions_url = f"{DEFAULT_API_URL}/questions"
25
+ submit_url = f"{DEFAULT_API_URL}/submit"
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  try:
28
+ response = requests.get(questions_url, timeout=15)
29
+ response.raise_for_status()
30
+ questions_data = response.json()
31
  except Exception as e:
32
+ return f"Error fetching questions: {e}", None
33
+
34
+ agent = GAIALlamaAgent()
35
+ results_log = []
36
+ answers_payload = []
37
+
38
+ for item in questions_data:
39
+ task_id = item.get("task_id")
40
+ question_text = item.get("question")
41
+ if not task_id or question_text is None:
42
+ continue
43
+ try:
44
+ submitted_answer = agent(question_text)
45
+ except Exception as e:
46
+ submitted_answer = f"[ERROR] {e}"
47
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
48
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
49
+
50
+ submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  try:
53
+ response = requests.post(submit_url, json=submission_data, timeout=60)
54
+ response.raise_for_status()
55
+ result_data = response.json()
56
+ final_status = (
57
+ f"Submission Successful!\n"
58
+ f"User: {result_data.get('username')}\n"
59
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
60
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
61
+ f"Message: {result_data.get('message', 'No message received.')}"
62
+ )
63
+ return final_status, pd.DataFrame(results_log)
64
  except Exception as e:
65
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
66
+
67
+ # --- Gradio Interface matching original benchmark ---
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown("# Basic Agent Evaluation Runner")
70
+ gr.Markdown("""
71
+ **Instructions:**
72
+ 1. Please clone this space and modify the agent logic.
73
+ 2. Log in to Hugging Face with the button.
74
+ 3. Click 'Run Evaluation & Submit All Answers' to run the full GAIA test.
75
+ """)
76
+
77
+ gr.LoginButton()
78
+
79
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
80
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
81
+ results_table = gr.DataFrame(label="Questions and Agent Answers")
82
+
83
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
84
+
85
+ if __name__ == "__main__":
86
+ print("\n===== Application Startup =====")
87
+ space_id = os.getenv("SPACE_ID")
88
+ if space_id:
89
+ print(f"🔗 Space: https://huggingface.co/spaces/{space_id}")
90
+ demo.launch(debug=True)