# agent.py — full GAIA-ready agent with tools for web, audio, Excel, Python import os import asyncio from llama_index.llms.openai import OpenAI from llama_index.core.agent.react.base import ReActAgent from llama_index.core.tools import FunctionTool from langchain_community.tools.wikipedia.tool import WikipediaTool from langchain_experimental.tools.python.tool import PythonREPLTool from langchain_community.document_loaders import YoutubeLoader import openai_whisper as whisper import openpyxl # Confirm OpenAI API key if os.getenv("OPENAI_API_KEY"): print("✅ Detected OPENAI_API_KEY in environment") else: print("⚠️ Missing OPENAI_API_KEY — LLM may not work") # --- Web tools --- def wikipedia_search(query: str) -> str: return WikipediaTool().run(query) # --- Python with output --- def run_python_with_output(code: str) -> str: try: if "print(" not in code: code = f"print({code})" if not code.strip().endswith("\n") else f"print({code.strip()})" return PythonREPLTool().run(code) except Exception as e: return f"[PYTHON ERROR] {e}" # --- YouTube (fallback placeholder) --- def get_youtube_transcript(url: str) -> str: try: loader = YoutubeLoader.from_youtube_url(url, add_video_info=False) docs = loader.load() return " ".join(doc.page_content for doc in docs) except Exception as e: return f"[YOUTUBE ERROR] {e}" # --- Whisper transcription --- def transcribe_audio(file_path: str) -> str: try: model = whisper.load_model("base") result = model.transcribe(file_path) return result['text'] except Exception as e: return f"[AUDIO ERROR] {e}" # --- Excel sales extraction --- def extract_excel_total_food_sales(file_path: str) -> str: try: wb = openpyxl.load_workbook(file_path) sheet = wb.active total = 0 for row in sheet.iter_rows(min_row=2, values_only=True): category, amount = row[1], row[2] if isinstance(category, str) and 'food' in category.lower(): total += float(amount) return f"${total:.2f}" except Exception as e: return f"[EXCEL ERROR] {e}" # --- Tool list --- TOOLS = [ FunctionTool.from_defaults(wikipedia_search), FunctionTool.from_defaults(run_python_with_output), FunctionTool.from_defaults(get_youtube_transcript), FunctionTool.from_defaults(transcribe_audio), FunctionTool.from_defaults(extract_excel_total_food_sales), ] # --- LLM and Agent --- llm = OpenAI(model="gpt-4") agent = ReActAgent.from_tools( tools=TOOLS, llm=llm, verbose=True, system_prompt=""" You are an expert AI assistant participating in the GAIA benchmark. Your goal is to answer 20 diverse questions using available tools: - Wikipedia search - Python code runner - YouTube transcript - MP3 transcription (Whisper) - Excel analysis Rules: 1. Output only the FINAL answer. No explanations. 2. Format must match expected output exactly: comma-separated lists, plain names, numeric values, algebraic notation. 3. Use tools smartly. Don't guess when tools can help. 4. If tools fail (e.g., YouTube blocked), say clearly: "Tool not available". """ ) # --- Run function --- def answer_question_sync(question: str) -> str: try: response = agent.chat(question) if hasattr(response, "response") and hasattr(response.response, "content"): return response.response.content.strip() elif isinstance(response, str): return response.strip() else: return str(response) except Exception as e: print(f"❌ Exception while answering: {e}") return f"[ERROR] {e}" async def answer_question(question: str) -> str: return answer_question_sync(question)