dawid-lorek's picture
Update agent.py
a1bd715 verified
raw
history blame
4.34 kB
# agent.py β€” GAIA-ready async agent with FunctionCallingAgent + run fix
import os
import asyncio
from llama_index.llms.openai import OpenAI
from llama_index.core.agent import FunctionCallingAgent
from llama_index.core.tools import FunctionTool
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
from langchain_community.document_loaders import YoutubeLoader
from langchain_experimental.tools.python.tool import PythonREPLTool
import whisper
import openpyxl
# Check OpenAI key
if os.getenv("OPENAI_API_KEY"):
print("βœ… Detected OPENAI_API_KEY")
else:
print("⚠️ Missing OPENAI_API_KEY – LLM may fail")
# Tool 1 β€” Wikipedia
wiki_api = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
def search_wikipedia(query: str) -> str:
"""Search Wikipedia for a given query and return relevant summary."""
return wiki_api.run(query)
# Tool 2 β€” Python with output
python_tool = PythonREPLTool()
def run_python_code(code: str) -> str:
"""Run Python code and return printed result."""
try:
if "print(" not in code:
code = f"print({code})"
return python_tool.run(code)
except Exception as e:
return f"[PYTHON ERROR] {e}"
# Tool 3 β€” YouTube transcript
def get_youtube_transcript(url: str) -> str:
"""Get transcript from YouTube video."""
try:
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
docs = loader.load()
return " ".join(d.page_content for d in docs)
except Exception as e:
return f"[YOUTUBE ERROR] {e}"
# Tool 4 β€” Whisper transcription
def transcribe_audio(file_path: str) -> str:
"""Transcribe an MP3 file to text using Whisper."""
try:
model = whisper.load_model("base")
res = model.transcribe(file_path)
return res["text"]
except Exception as e:
return f"[AUDIO ERROR] {e}"
# Tool 5 β€” Excel parser
def extract_excel_total_food_sales(file_path: str) -> str:
"""Sum sales from Excel where category is 'food'."""
try:
wb = openpyxl.load_workbook(file_path)
sheet = wb.active
total = 0.0
for _, category, amount in sheet.iter_rows(min_row=2, values_only=True):
if isinstance(category, str) and "food" in category.lower():
total += float(amount or 0)
return f"${total:.2f}"
except Exception as e:
return f"[EXCEL ERROR] {e}"
# Assemble tools with proper descriptions
TOOLS = [
FunctionTool.from_defaults(search_wikipedia, name="search_wikipedia", description="Search Wikipedia for facts and lists."),
FunctionTool.from_defaults(run_python_code, name="run_python", description="Run Python code for logic, math, or set processing."),
FunctionTool.from_defaults(get_youtube_transcript, name="get_youtube_transcript", description="Fetch transcript from YouTube video by URL."),
FunctionTool.from_defaults(transcribe_audio, name="transcribe_audio", description="Transcribe MP3 audio file using Whisper."),
FunctionTool.from_defaults(extract_excel_total_food_sales, name="extract_excel_total_food_sales", description="Sum total sales from Excel where category is 'food'.")
]
# Create agent with improved system prompt
llm = OpenAI(model="gpt-4")
agent = FunctionCallingAgent.from_tools(
tools=TOOLS,
llm=llm,
verbose=True,
system_prompt="""
You are a highly capable AI agent taking the GAIA benchmark test.
You have access to the following tools:
- Wikipedia search for factual lookups
- Python runner for math, logic, or text analysis
- YouTube transcript fetcher (via URL)
- Audio transcriber (Whisper, MP3)
- Excel food sales analyzer
Rules:
1. Always try to use a tool if relevant.
2. Return ONLY the final answer in the requested format.
3. Do not guess. If a tool fails, say "Tool not available".
4. Follow formats strictly: comma-separated lists, numeric values, chess notation, names only, etc.
5. Avoid all explanation unless requested.
"""
)
async def answer_question(question: str) -> str:
try:
response = await agent.get_response_async(question)
return response.message.content.strip() if hasattr(response.message, "content") else str(response.message)
except Exception as e:
print("❌ Agent error:", e)
return "[ERROR] " + str(e)