dawid-lorek commited on
Commit
48d9442
·
verified ·
1 Parent(s): 88fa1a5

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +82 -74
agent.py CHANGED
@@ -1,90 +1,98 @@
1
- # app.py przywrócony layout benchmarku z poprawionym wywołaniem agenta
2
 
3
  import os
4
- import requests
5
- import pandas as pd
6
- import gradio as gr
7
  import asyncio
 
 
 
8
 
9
- from agent import answer_question
 
 
 
10
 
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
12
 
13
- class GAIALlamaAgent:
14
- def __call__(self, question: str) -> str:
15
- return asyncio.run(answer_question(question))
 
 
16
 
17
- def run_and_submit_all(profile: gr.OAuthProfile | None):
18
- space_id = os.getenv("SPACE_ID")
19
- if not profile or not profile.username:
20
- return "Please Login to Hugging Face with the button.", None
21
 
22
- username = profile.username.strip()
23
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
24
- questions_url = f"{DEFAULT_API_URL}/questions"
25
- submit_url = f"{DEFAULT_API_URL}/submit"
26
 
 
 
 
 
 
 
27
  try:
28
- response = requests.get(questions_url, timeout=15)
29
- response.raise_for_status()
30
- questions_data = response.json()
31
  except Exception as e:
32
- return f"Error fetching questions: {e}", None
33
-
34
- agent = GAIALlamaAgent()
35
- results_log = []
36
- answers_payload = []
37
-
38
- for item in questions_data:
39
- task_id = item.get("task_id")
40
- question_text = item.get("question")
41
- if not task_id or question_text is None:
42
- continue
43
- try:
44
- submitted_answer = agent(question_text)
45
- except Exception as e:
46
- submitted_answer = f"[ERROR] {e}"
47
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
48
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
49
-
50
- submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
51
 
 
52
  try:
53
- response = requests.post(submit_url, json=submission_data, timeout=60)
54
- response.raise_for_status()
55
- result_data = response.json()
56
- final_status = (
57
- f"Submission Successful!\n"
58
- f"User: {result_data.get('username')}\n"
59
- f"Overall Score: {result_data.get('score', 'N/A')}% "
60
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
61
- f"Message: {result_data.get('message', 'No message received.')}"
62
- )
63
- return final_status, pd.DataFrame(results_log)
64
  except Exception as e:
65
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
66
-
67
- # --- Gradio Interface matching original benchmark ---
68
- with gr.Blocks() as demo:
69
- gr.Markdown("# Basic Agent Evaluation Runner")
70
- gr.Markdown("""
71
- **Instructions:**
72
- 1. Please clone this space and modify the agent logic.
73
- 2. Log in to Hugging Face with the button.
74
- 3. Click 'Run Evaluation & Submit All Answers' to run the full GAIA test.
75
- """)
76
-
77
- gr.LoginButton()
78
 
79
- run_button = gr.Button("Run Evaluation & Submit All Answers")
80
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
81
- results_table = gr.DataFrame(label="Questions and Agent Answers")
82
-
83
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- if __name__ == "__main__":
86
- print("\n===== Application Startup =====")
87
- space_id = os.getenv("SPACE_ID")
88
- if space_id:
89
- print(f"🔗 Space: https://huggingface.co/spaces/{space_id}")
90
- demo.launch(debug=True)
 
1
+ # agent.py final version without circular imports
2
 
3
  import os
 
 
 
4
  import asyncio
5
+ from llama_index.llms.openai import OpenAI
6
+ from llama_index.core.agent.react.base import ReActAgent
7
+ from llama_index.core.tools import FunctionTool
8
 
9
+ from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
10
+ from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
11
+ from langchain_experimental.tools.python.tool import PythonREPLTool
12
+ from langchain_community.document_loaders import YoutubeLoader
13
 
14
+ import whisper
15
+ import openpyxl
16
 
17
+ # Check OpenAI key
18
+ if os.getenv("OPENAI_API_KEY"):
19
+ print("✅ Detected OPENAI_API_KEY")
20
+ else:
21
+ print("⚠️ Missing OPENAI_API_KEY – LLM may fail")
22
 
23
+ # Tools definitions
24
+ api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
 
 
25
 
26
+ def wikipedia_search(query: str) -> str:
27
+ return WikipediaQueryRun(api_wrapper=api_wrapper).run({"query": query})
 
 
28
 
29
+ def run_python_with_output(code: str) -> str:
30
+ if "print(" not in code:
31
+ code = f"print({code})"
32
+ return PythonREPLTool().run(code)
33
+
34
+ def get_youtube_transcript(url: str) -> str:
35
  try:
36
+ loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
37
+ docs = loader.load()
38
+ return " ".join(d.page_content for d in docs)
39
  except Exception as e:
40
+ return "[YOUTUBE ERROR] " + str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ def transcribe_audio(file_path: str) -> str:
43
  try:
44
+ model = whisper.load_model("base")
45
+ res = model.transcribe(file_path)
46
+ return res["text"]
 
 
 
 
 
 
 
 
47
  except Exception as e:
48
+ return "[AUDIO ERROR] " + str(e)
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def extract_excel_total_food_sales(file_path: str) -> str:
51
+ try:
52
+ wb = openpyxl.load_workbook(file_path)
53
+ sheet = wb.active
54
+ total = 0.0
55
+ for _, category, amount in sheet.iter_rows(min_row=2, values_only=True):
56
+ if isinstance(category, str) and "food" in category.lower():
57
+ total += float(amount or 0)
58
+ return f"${total:.2f}"
59
+ except Exception as e:
60
+ return "[EXCEL ERROR] " + str(e)
61
+
62
+ # Assemble tools
63
+ TOOLS = [
64
+ FunctionTool.from_defaults(wikipedia_search),
65
+ FunctionTool.from_defaults(run_python_with_output),
66
+ FunctionTool.from_defaults(get_youtube_transcript),
67
+ FunctionTool.from_defaults(transcribe_audio),
68
+ FunctionTool.from_defaults(extract_excel_total_food_sales),
69
+ ]
70
+
71
+ # LLM and Agent
72
+ llm = OpenAI(model="gpt-4")
73
+ agent = ReActAgent.from_tools(
74
+ tools=TOOLS,
75
+ llm=llm,
76
+ verbose=True,
77
+ system_prompt="""
78
+ You are an expert AI assistant on the GAIA benchmark.
79
+
80
+ Use available tools (Wikipedia, Python, YouTube transcript, audio, Excel).
81
+ Output ONLY the final answer. No reasoning or commentary.
82
+ Format exactly as requested (list, number, name, chess move, currency).
83
+ If tool fails, output "Tool not available".
84
+ """,
85
+ )
86
+
87
+ def answer_question_sync(question: str) -> str:
88
+ try:
89
+ resp = agent.chat(question)
90
+ if hasattr(resp, "response") and hasattr(resp.response, "content"):
91
+ return resp.response.content.strip()
92
+ return str(resp).strip()
93
+ except Exception as e:
94
+ print("❌ Agent exception:", e)
95
+ return "[ERROR] " + str(e)
96
 
97
+ async def answer_question(question: str) -> str:
98
+ return answer_question_sync(question)