Final_Assignment_Template

Runtime error

App Files Files Community

ZeroTimo commited on Apr 29

Commit

7d74ca3

verified ·

1 Parent(s): a92112f

Update agent.py

Browse files

Files changed (1) hide show

agent.py +157 -254

agent.py CHANGED Viewed

@@ -1,291 +1,195 @@
-import os
-import pandas as pd
-from langgraph.graph import StateGraph, START, MessagesState
-from langgraph.prebuilt import tools_condition, ToolNode
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_core.tools import tool
-from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
-from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
-from langchain_core.messages import SystemMessage, HumanMessage
-import requests
-import tempfile
-# Lade Umgebungsvariablen (Google API Key)
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# === Tools definieren ===
-GAIA_BASE_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
 def fetch_gaia_file(task_id: str) -> str:
-    """
-    Download the file attached to a GAIA task and return the local file-path.
-    Args:
-        task_id: The GAIA task_id (string in the JSON payload).
-    Returns:
-        Absolute path to the downloaded temp-file.
-    """
-    try:
-        url = f"{GAIA_BASE_URL}/files/{task_id}"
-        response = requests.get(url, timeout=20)
-        response.raise_for_status()
-        # Server liefert den echten Dateinamen im Header – fallback auf "download"
-        filename = (
-            response.headers.get("x-filename") or
-            response.headers.get("content-disposition", "download").split("filename=")[-1].strip('"')
-        )
-        if not filename:
-            filename = f"{task_id}.bin"
-        tmp_path = os.path.join(tempfile.gettempdir(), filename)
-        with open(tmp_path, "wb") as f:
-            f.write(response.content)
-        return tmp_path
-    except Exception as e:
-        return f"ERROR: could not download file for task {task_id}: {e}"
 @tool
 def parse_csv(file_path: str, query: str = "") -> str:
-    """
-    Load a CSV file from `file_path` and optionally run a simple analysis query.
-    Args:
-        file_path: absolute path to a CSV file (from fetch_gaia_file)
-        query: optional natural-language instruction, e.g.
-               "sum of column Sales where Category != 'Drinks'"
-    Returns:
-        A concise string with the answer OR a preview of the dataframe
-        if no query given.
-    """
-    try:
-        df = pd.read_csv(file_path)
-        # Auto-preview if kein query
-        if not query:
-            preview = df.head(5).to_markdown(index=False)
-            return f"CSV loaded. First rows:\n\n{preview}\n\nColumns: {', '.join(df.columns)}"
-        # Mini-query-engine (sehr simpel, reicht für Summen / Mittelwerte)
-        query_lc = query.lower()
-        if "sum" in query_lc:
-            # ermitteln, welche Spalte summiert werden soll
-            for col in df.columns:
-                if col.lower() in query_lc:
-                    s = df[col]
-                    if "where" in query_lc:
-                        # naive Filter-Parsing: where <col> != 'Drinks'
-                        cond_part = query_lc.split("where", 1)[1].strip()
-                        # SEHR einfaches  !=  oder ==  Parsing
-                        if "!=" in cond_part:
-                            key, val = [x.strip().strip("'\"") for x in cond_part.split("!=")]
-                            s = df.loc[df[key] != val, col]
-                        elif "==" in cond_part:
-                            key, val = [x.strip().strip("'\"") for x in cond_part.split("==")]
-                            s = df.loc[df[key] == val, col]
-                    return str(round(s.sum(), 2))
-        # Fallback
-        return "Query type not supported by parse_csv."
-    except Exception as e:
-        return f"ERROR parsing CSV: {e}"
 @tool
 def parse_excel(file_path: str, query: str = "") -> str:
-    """
-    Identisch zu parse_csv, nur für XLS/XLSX.
-    """
-    try:
-        df = pd.read_excel(file_path)
-        if not query:
-            preview = df.head(5).to_markdown(index=False)
-            return f"Excel loaded. First rows:\n\n{preview}\n\nColumns: {', '.join(df.columns)}"
-        query_lc = query.lower()
-        if "sum" in query_lc:
-            for col in df.columns:
-                if col.lower() in query_lc:
-                    s = df[col]
-                    if "where" in query_lc:
-                        cond_part = query_lc.split("where", 1)[1].strip()
-                        if "!=" in cond_part:
-                            key, val = [x.strip().strip("'\"") for x in cond_part.split("!=")]
-                            s = df.loc[df[key] != val, col]
-                        elif "==" in cond_part:
-                            key, val = [x.strip().strip("'\"") for x in cond_part.split("==")]
-                            s = df.loc[df[key] == val, col]
-                    return str(round(s.sum(), 2))
-        return "Query type not supported by parse_excel."
-    except Exception as e:
-        return f"ERROR parsing Excel: {e}"
-@tool
-def transcribe_audio(file_path: str, language: str = "en") -> str:
-    """
-    Transcribe an audio file (MP3/WAV/etc.) using Faster-Whisper.
-    Args:
-        file_path: absolute path to an audio file (from fetch_gaia_file)
-        language: ISO language code, default "en"
-    Returns:
-        Full transcription as plain text, or "ERROR …"
-    """
-    try:
-        from faster_whisper import WhisperModel
-        # Tiny model reicht für kurze Sprachmemos, ~75 MB
-        model = WhisperModel("tiny", device="cpu", compute_type="int8")
-        segments, _ = model.transcribe(file_path, language=language)
-        transcript = " ".join(segment.text.strip() for segment in segments).strip()
-        if not transcript:
-            return "ERROR: transcription empty."
-        return transcript
-    except Exception as e:
-        return f"ERROR: audio transcription failed – {e}"
 @tool
-def simple_calculator(operation: str, a: float, b: float) -> float:
-    """Basic maths: add, subtract, multiply, divide and modulo."""
-    if operation == "add":
-        return a + b
-    if operation == "subtract":
-        return a - b
-    if operation == "multiply":
-        return a * b
-    if operation == "divide":
-        return a / b if b else float("inf")
-    if operation == "modulo":
-        return a % b
-    return 0.0
 @tool
-def wiki_search(query: str) -> str:
-    """Search Wikipedia for a query and return the result."""
-    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-    return "\n\n".join(doc.page_content for doc in search_docs)
 @tool
-def arxiv_search(query: str) -> str:
-    """Search Arxiv for academic papers about a query."""
-    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
-    return "\n\n".join(doc.page_content[:1000] for doc in search_docs)
 @tool
 def web_search(query: str) -> str:
-    """Perform a DuckDuckGo web search."""
-    wrapper = DuckDuckGoSearchAPIWrapper(max_results=5)
-    results = wrapper.run(query)
-    return results
-# === System Prompt definieren ===
-system_prompt = SystemMessage(
-    content=(
-        "You are a focused, factual AI agent competing on the GAIA evaluation.\n"
-        "\n"
-        "GENERAL RULES\n"
-        "-------------\n"
-        "1. Always try to answer every question.\n"
-        "2. If you are NOT 100 % certain, prefer using a TOOL.\n"
-        "3. Never invent facts.\n"
-        "\n"
-        "TOOLS\n"
-        "-----\n"
-        "- fetch_gaia_file(task_id): downloads any attachment for the current task.\n"
-        "- parse_csv(file_path, query): analyse CSV files.\n"
-        "- parse_excel(file_path, query): analyse Excel files.\n"
-        "- transcribe_audio(file_path): transcribe MP3 / WAV audio.\n"
-        "- wiki_search(query): query English Wikipedia.\n"
-        "- arxiv_search(query): query arXiv.\n"
-        "- web_search(query): DuckDuckGo web search.\n"
-        "- simple_calculator(operation,a,b): basic maths.\n"
-        "\n"
-        "WHEN TO USE WHICH TOOL\n"
-        "----------------------\n"
-        "・If the prompt or GAIA metadata mentions an *attached* file, FIRST call "
-        "fetch_gaia_file with the given task_id. Then:\n"
-        "   • CSV  → parse_csv\n"
-        "   • XLS/XLSX → parse_excel\n"
-        "   • MP3/WAV → transcribe_audio (language auto-detect is OK)\n"
-        "   • Image → (currently unsupported) answer that image processing is unavailable\n"
-        "・If you need factual data (dates, numbers, names) → wiki_search or web_search.\n"
-        "・If you need a scientific paper → arxiv_search.\n"
-        "・If a numeric operation is required → simple_calculator.\n"
-        "\n"
-        "ERROR HANDLING\n"
-        "--------------\n"
-        "If a tool call returns a string that starts with \"ERROR:\", IMMEDIATELY think of "
-        "an alternative strategy: retry with a different tool or modified parameters. "
-        "Do not repeat the same failing call twice.\n"
-        "\n"
-        "OUTPUT FORMAT\n"
-        "-------------\n"
-        "Follow the exact format asked in the question (e.g. single word, CSV, comma-list). "
-        "Do not add extra commentary.\n"
-    )
-)
-# === LLM definieren ===
-llm = ChatGoogleGenerativeAI(
-    model="gemini-2.0-flash",
-    google_api_key=GOOGLE_API_KEY,
-    temperature=0,
-    max_output_tokens=2048
-)
-# === Tools in LLM einbinden ===
 tools = [
     fetch_gaia_file,
     parse_csv,
     parse_excel,
-    transcribe_audio,
-    wiki_search,
-    arxiv_search,
     web_search,
     simple_calculator,
 ]
 llm_with_tools = llm.bind_tools(tools)
-def safe_llm_invoke(messages):
-    """
-    Ruft LLM einmal auf. Wenn das Ergebnis mit ERROR beginnt,
-    ruft es genau EIN weiteres Mal auf – jetzt weiß das LLM,
-    dass der vorige Tool-Call fehlgeschlagen ist.
-    """
-    max_attempts = 2
-    for attempt in range(max_attempts):
-        result = llm_with_tools.invoke(messages)
-        content = result.content if hasattr(result, "content") else ""
-        if "ERROR:" not in content:
-            return result
-        # Fehler: füge eine System-Korrektur hinzu und versuche erneut
-        messages.append(
-            SystemMessage(
-                content="Previous tool call returned an ERROR. "
-                        "Try a different tool or revise the input."
-            )
         )
-    # nach max_attempts immer noch Fehler → zurückgeben
-    return result
-# === Nodes für LangGraph ===
 def assistant(state: MessagesState):
     msgs = state["messages"]
     if not msgs or msgs[0].type != "system":
         msgs = [system_prompt] + msgs
-    return {"messages": [llm_with_tools.invoke(msgs)]}
-# === LangGraph bauen ===
 builder = StateGraph(MessagesState)
 builder.add_node("assistant", assistant)
 builder.add_node("tools", ToolNode(tools))
@@ -293,5 +197,4 @@ builder.add_edge(START, "assistant")
 builder.add_conditional_edges("assistant", tools_condition)
 builder.add_edge("tools", "assistant")
-# === Agent Executor ===
 agent_executor = builder.compile()

+"""
+agent.py  –  LangGraph-Agent mit
+ • Gemini 2.0 Flash
+ • Datei-Tools (CSV, Excel, Audio, Bild-Describe, OCR)
+ • Fehler-Retry-Logik
+"""
+import os, base64, mimetypes, subprocess, json, tempfile
+from typing import Any
+from langgraph.graph           import START, StateGraph, MessagesState
+from langgraph.prebuilt        import tools_condition, ToolNode
+from langchain_core.tools      import tool
+from langchain_core.messages   import SystemMessage, HumanMessage
+from langchain_google_genai    import ChatGoogleGenerativeAI
+from langchain_community.tools.duckduckgo_search import DuckDuckGoSearchResults as DDGS
+# ----------------------------------------------------------------------
+# 1 ── ENV / LLM
+# ----------------------------------------------------------------------
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+llm = ChatGoogleGenerativeAI(
+    model="gemini-2.0-flash",
+    google_api_key=GOOGLE_API_KEY,
+    temperature=0,
+    max_output_tokens=2048,
+)
+# ----------------------------------------------------------------------
+# 2 ── ERROR-WRAPPER  (garantiert "ERROR:"-String statt Exception)
+# ----------------------------------------------------------------------
+def error_guard(fn):
+    def wrapper(*args, **kwargs):
+        try:
+            return fn(*args, **kwargs)
+        except Exception as e:
+            return f"ERROR: {e}"
+    return wrapper
+# ----------------------------------------------------------------------
+# 3 ── BASIS-TOOLS
+# ----------------------------------------------------------------------
+@tool
+@error_guard
+def simple_calculator(operation: str, a: float, b: float) -> float:
+    """Basic maths: add, subtract, multiply, divide."""
+    ops = {"add": a + b, "subtract": a - b, "multiply": a * b,
+           "divide": a / b if b else float("inf")}
+    return ops.get(operation, "ERROR: unknown operation")
 @tool
+@error_guard
 def fetch_gaia_file(task_id: str) -> str:
+    """Download attachment for current GAIA task_id; returns local file path."""
+    import requests, pathlib, uuid
+    url = f"https://agents-course-unit4-scoring.hf.space/file/{task_id}"
+    r = requests.get(url, timeout=15)
+    r.raise_for_status()
+    suffix = pathlib.Path(url).suffix or ""
+    fp = pathlib.Path(tempfile.gettempdir())/f"{uuid.uuid4().hex}{suffix}"
+    fp.write_bytes(r.content)
+    return str(fp)
 @tool
+@error_guard
 def parse_csv(file_path: str, query: str = "") -> str:
+    """Load CSV & answer query using pandas.eval."""
+    import pandas as pd
+    df = pd.read_csv(file_path)
+    if not query:
+        return df.head().to_markdown()
+    return str(pd.eval(query, local_dict={"df": df}))
 @tool
+@error_guard
 def parse_excel(file_path: str, query: str = "") -> str:
+    """Load first sheet of Excel & answer query using pandas.eval."""
+    import pandas as pd
+    df = pd.read_excel(file_path)
+    if not query:
+        return df.head().to_markdown()
+    return str(pd.eval(query, local_dict={"df": df}))
+# ----------------------------------------------------------------------
+# 4 ── GEMINI MULTIMODAL-TOOLS
+# ----------------------------------------------------------------------
 @tool
+@error_guard
+def describe_image(file_path: str, prompt: str = "Describe the image.") -> str:
+    """Send a local image (base64) to Gemini Vision and return description."""
+    mime, _ = mimetypes.guess_type(file_path)
+    if not (mime and mime.startswith("image/")):
+        return "ERROR: not an image."
+    with open(file_path, "rb") as f:
+        b64 = base64.b64encode(f.read()).decode()
+    content = [
+        {"type": "text", "text": prompt},
+        {"type": "image_url", "image_url": f"data:{mime};base64,{b64}"},
+    ]
+    resp = llm.invoke([HumanMessage(content=content)])
+    return resp.content
 @tool
+@error_guard
+def gemini_transcribe_audio(file_path: str,
+                            prompt: str = "Transcribe the audio.") -> str:
+    """Transcribe audio via Gemini multimodal."""
+    mime, _ = mimetypes.guess_type(file_path)
+    if not (mime and mime.startswith("audio/")):
+        return "ERROR: not audio."
+    with open(file_path, "rb") as f:
+        b64 = base64.b64encode(f.read()).decode()
+    content = [
+        {"type": "text", "text": prompt},
+        {"type": "media", "data": b64, "mime_type": mime},
+    ]
+    resp = llm.invoke([HumanMessage(content=content)])
+    return resp.content
+# ----------------------------------------------------------------------
+# 5 ── OFFLINE OCR-TOOL  (pytesseract)
+# ----------------------------------------------------------------------
 @tool
+@error_guard
+def ocr_image(file_path: str, lang: str = "eng") -> str:
+    """Extract text from image using pytesseract."""
+    from PIL import Image
+    import pytesseract
+    img = Image.open(file_path)
+    return pytesseract.image_to_string(img, lang=lang).strip()
+# ----------------------------------------------------------------------
+# 6 ── WEB / WIKI SEARCH
+# ----------------------------------------------------------------------
 @tool
+@error_guard
 def web_search(query: str) -> str:
+    """DuckDuckGo text search – top 5 results."""
+    with DDGS() as ddgs:
+        results = ddgs.text(query, max_results=5)
+    if not results:
+        return "ERROR: no results."
+    return "\n\n".join(f"{r['title']} – {r['href']}" for r in results)
+# ----------------------------------------------------------------------
+# 7 ── SYSTEM-PROMPT
+# ----------------------------------------------------------------------
+system_prompt = SystemMessage(content=(
+    "You are a precise GAIA challenge agent.\n"
+    "Always attempt a TOOL call before giving up. "
+    "If a tool returns 'ERROR', think of an alternative tool or parameters.\n"
+    "Use fetch_gaia_file(task_id) for any attachment."
+))
+# ----------------------------------------------------------------------
+# 8 ── LangGraph Nodes
+# ----------------------------------------------------------------------
 tools = [
     fetch_gaia_file,
     parse_csv,
     parse_excel,
+    gemini_transcribe_audio,
+    ocr_image,
+    describe_image,
     web_search,
     simple_calculator,
 ]
 llm_with_tools = llm.bind_tools(tools)
+def safe_llm_invoke(msgs):
+    for attempt in range(2):
+        resp = llm_with_tools.invoke(msgs)
+        content = resp.content or ""
+        if not content.startswith("ERROR"):
+            return resp
+        msgs.append(
+            SystemMessage(content="Previous tool call returned ERROR. Try another approach.")
         )
+    return resp
 def assistant(state: MessagesState):
     msgs = state["messages"]
     if not msgs or msgs[0].type != "system":
         msgs = [system_prompt] + msgs
+    return {"messages": [safe_llm_invoke(msgs)]}
+# ----------------------------------------------------------------------
+# 9 ── Graph
+# ----------------------------------------------------------------------
 builder = StateGraph(MessagesState)
 builder.add_node("assistant", assistant)
 builder.add_node("tools", ToolNode(tools))
 builder.add_conditional_edges("assistant", tools_condition)
 builder.add_edge("tools", "assistant")
 agent_executor = builder.compile()