Final_Assignment_Template

Runtime error

App Files Files Community

ZeroTimo commited on Apr 29

Commit

7cc5531

verified ·

1 Parent(s): 0d276c6

Update agent.py

Browse files

Files changed (1) hide show

agent.py +182 -175

agent.py CHANGED Viewed

@@ -1,37 +1,29 @@
-"""
-agent.py  –  LangGraph-Agent mit
- • Gemini 2.0 Flash
- • Datei-Tools (CSV, Excel, Audio, Bild-Describe, OCR)
- • Fehler-Retry-Logik
-"""
-import os, base64, mimetypes, subprocess, json, tempfile
-import functools
-from typing import Any
 from langgraph.graph import START, StateGraph, MessagesState
-from langgraph.prebuilt import tools_condition, ToolNode
-from langchain_core.tools import tool
 from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.tools.tavily_search import TavilySearchResults
-# ----------------------------------------------------------------------
-# 1 ── ENV / LLM
-# ----------------------------------------------------------------------
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-llm = ChatGoogleGenerativeAI(
-    model="gemini-2.0-flash",
-    google_api_key=GOOGLE_API_KEY,
-    temperature=0,
-    max_output_tokens=2048,
-)
-# ----------------------------------------------------------------------
-# 2 ── ERROR-WRAPPER  (garantiert "ERROR:"-String statt Exception)
-# ----------------------------------------------------------------------
 def error_guard(fn):
-    @functools.wraps(fn)          # ➜ übernimmt __doc__, __name__, …
     def wrapper(*args, **kwargs):
         try:
             return fn(*args, **kwargs)
@@ -39,198 +31,213 @@ def error_guard(fn):
             return f"ERROR: {e}"
     return wrapper
-# ----------------------------------------------------------------------
-# 3 ── BASIS-TOOLS
-# ----------------------------------------------------------------------
-@tool
-@error_guard
-def simple_calculator(operation: str, a: float, b: float) -> float:
-    """Basic maths: add, subtract, multiply, divide."""
-    ops = {"add": a + b, "subtract": a - b, "multiply": a * b,
-           "divide": a / b if b else float("inf")}
-    return ops.get(operation, "ERROR: unknown operation")
 @tool
 @error_guard
 def fetch_gaia_file(task_id: str) -> str:
-    """Download attachment for current GAIA task_id; returns local file path."""
-    import requests, pathlib, uuid
-    url = f"https://agents-course-unit4-scoring.hf.space/file/{task_id}"
-    r = requests.get(url, timeout=15)
-    r.raise_for_status()
-    suffix = pathlib.Path(url).suffix or ""
-    fp = pathlib.Path(tempfile.gettempdir())/f"{uuid.uuid4().hex}{suffix}"
-    fp.write_bytes(r.content)
-    return str(fp)
 @tool
 @error_guard
 def parse_csv(file_path: str, query: str = "") -> str:
-    """Load CSV & answer query using pandas.eval."""
-    import pandas as pd
     df = pd.read_csv(file_path)
     if not query:
-        return df.head().to_markdown()
-    return str(pd.eval(query, local_dict={"df": df}))
 @tool
 @error_guard
 def parse_excel(file_path: str, query: str = "") -> str:
-    """Load first sheet of Excel & answer query using pandas.eval."""
-    import pandas as pd
     df = pd.read_excel(file_path)
     if not query:
-        return df.head().to_markdown()
-    return str(pd.eval(query, local_dict={"df": df}))
-# ----------------------------------------------------------------------
-# 4 ── GEMINI MULTIMODAL-TOOLS
-# ----------------------------------------------------------------------
 @tool
 @error_guard
-def describe_image(file_path: str, prompt: str = "Describe the image.") -> str:
-    """Send a local image (base64) to Gemini Vision and return description."""
-    mime, _ = mimetypes.guess_type(file_path)
-    if not (mime and mime.startswith("image/")):
-        return "ERROR: not an image."
     with open(file_path, "rb") as f:
         b64 = base64.b64encode(f.read()).decode()
-    content = [
-        {"type": "text", "text": prompt},
-        {"type": "image_url", "image_url": f"data:{mime};base64,{b64}"},
-    ]
-    resp = llm.invoke([HumanMessage(content=content)])
-    return resp.content
 @tool
 @error_guard
-def gemini_transcribe_audio(file_path: str,
-                            prompt: str = "Transcribe the audio.") -> str:
-    """Transcribe audio via Gemini multimodal."""
-    mime, _ = mimetypes.guess_type(file_path)
-    if not (mime and mime.startswith("audio/")):
-        return "ERROR: not audio."
-    with open(file_path, "rb") as f:
-        b64 = base64.b64encode(f.read()).decode()
-    content = [
-        {"type": "text", "text": prompt},
-        {"type": "media", "data": b64, "mime_type": mime},
-    ]
-    resp = llm.invoke([HumanMessage(content=content)])
     return resp.content
-# ----------------------------------------------------------------------
-# 5 ── OFFLINE OCR-TOOL  (pytesseract)
-# ----------------------------------------------------------------------
 @tool
 @error_guard
 def ocr_image(file_path: str, lang: str = "eng") -> str:
-    """Extract text from image using pytesseract."""
-    from PIL import Image
-    import pytesseract
-    img = Image.open(file_path)
-    return pytesseract.image_to_string(img, lang=lang).strip()
-# ----------------------------------------------------------------------
-# 6 ── WEB / WIKI SEARCH
-# ----------------------------------------------------------------------
 @tool
 @error_guard
 def web_search(query: str, max_results: int = 5) -> str:
-    """Tavily web search – returns markdown list of results."""
-    search = TavilySearchResults(max_results=max_results)
-    hits = search.invoke(query)
     if not hits:
-        return "ERROR: no results."
-    return "\n\n".join(f"{hit['title']} – {hit['url']}" for hit in hits)
-# ----------------------------------------------------------------------
-# 7 ── SYSTEM-PROMPT
-# ----------------------------------------------------------------------
-system_prompt = SystemMessage(content=(
-""""
-You are GAIA-Assist, an accurate, tool-using agent.
-TOOLS YOU CAN CALL
-------------------
-• fetch_gaia_file(task_id)   – download the current task’s attachment
-• parse_csv(file_path, query="")
-• parse_excel(file_path, query="")
-• gemini_transcribe_audio(file_path[, prompt])
-• describe_image(file_path[, prompt])
-• ocr_image(file_path[, lang="eng"])
-• web_search(query [, max_results=5])
-• simple_calculator(operation, a, b)
-WORKFLOW RULES
---------------
-1. **If** the question mentions an attachment, first call
-   fetch_gaia_file(task_id).
-   – After it returns a path, choose exactly one specialised parser.
-2. **Otherwise**, think whether a web_search or calculator is needed.
-3. **NEVER** call the same tool twice in a row with the same input.
-ANSWER FORMAT
--------------
-*If a tool is needed*
-Thought: Do I need to use a tool? **Yes**
-Action: <tool name>
-Action Input: <JSON-encoded arguments>
-*If no tool is needed*
-Thought: Do I need to use a tool? **No**
-Final Answer: <your concise answer here>
-Once you have written **Final Answer:** you are done – do **not** call any further tool.
-"""
-))
-# ----------------------------------------------------------------------
-# 8 ── LangGraph Nodes
-# ----------------------------------------------------------------------
-tools = [
-    fetch_gaia_file,
-    parse_csv,
-    parse_excel,
-    gemini_transcribe_audio,
-    ocr_image,
-    describe_image,
-    web_search,
-    simple_calculator,
-]
-llm_with_tools = llm.bind_tools(tools)
-def safe_llm_invoke(msgs):
-    for attempt in range(2):
-        resp = llm_with_tools.invoke(msgs)
-        content = resp.content or ""
-        if not content.startswith("ERROR"):
-            return resp
-        msgs.append(
-            SystemMessage(content="Previous tool call returned ERROR. Try another approach.")
-        )
-    return resp
 def assistant(state: MessagesState):
     msgs = state["messages"]
-    if not msgs or msgs[0].type != "system":
         msgs = [system_prompt] + msgs
-    return {"messages": [safe_llm_invoke(msgs)]}
-# ----------------------------------------------------------------------
-# 9 ── Graph
-# ----------------------------------------------------------------------
 builder = StateGraph(MessagesState)
 builder.add_node("assistant", assistant)
 builder.add_node("tools", ToolNode(tools))
 builder.add_edge(START, "assistant")
-builder.add_conditional_edges("assistant", tools_condition)
-builder.add_edge("tools", "assistant")
 agent_executor = builder.compile()

+# agent.py  –  Gemini 2.0 Flash · LangGraph · Mehrere Tools
+# =========================================================
+import os, asyncio, base64, mimetypes, tempfile, functools, json
+from typing import Dict, Any, List, Optional
 from langgraph.graph import START, StateGraph, MessagesState
+from langgraph.prebuilt import tools_condition, ToolNode, END
 from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.tools.tavily_search import TavilySearchResults
+# ---------------------------------------------------------------------
+# Konstanten / API-Keys
+# ---------------------------------------------------------------------
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+TAVILY_KEY     = os.getenv("TAVILY_API_KEY")
+# ---------------------------------------------------------------------
+# Fehler-Wrapper – behält Doc-String dank wraps
+# ---------------------------------------------------------------------
+import functools
 def error_guard(fn):
+    @functools.wraps(fn)
     def wrapper(*args, **kwargs):
         try:
             return fn(*args, **kwargs)
             return f"ERROR: {e}"
     return wrapper
+# ---------------------------------------------------------------------
+# 1) fetch_gaia_file   – Datei vom GAIA-Server holen
+# ---------------------------------------------------------------------
+GAIA_FILE_ENDPOINT = "https://agents-course-unit4-scoring.hf.space/file"
 @tool
 @error_guard
 def fetch_gaia_file(task_id: str) -> str:
+    """Download the attachment for the given GAIA task_id and return local path."""
+    url = f"{GAIA_FILE_ENDPOINT}/{task_id}"
+    try:
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        file_name = response.headers.get("x-gaia-filename", f"{task_id}")
+        tmp_path = tempfile.gettempdir() + "/" + file_name
+        with open(tmp_path, "wb") as f:
+            f.write(response.content)
+        return tmp_path
+    except Exception as e:
+        return f"ERROR: could not fetch file – {e}"
+# ---------------------------------------------------------------------
+# 2) CSV-Parser
+# ---------------------------------------------------------------------
+import pandas as pd
 @tool
 @error_guard
 def parse_csv(file_path: str, query: str = "") -> str:
+    """Load a CSV file and answer a quick pandas query (optional)."""
     df = pd.read_csv(file_path)
     if not query:
+        return f"Loaded CSV with {len(df)} rows and {len(df.columns)} cols.\nColumns: {list(df.columns)}"
+    try:
+        result = df.query(query)
+        return result.to_markdown()
+    except Exception as e:
+        return f"ERROR in pandas query: {e}"
+# ---------------------------------------------------------------------
+# 3) Excel-Parser
+# ---------------------------------------------------------------------
 @tool
 @error_guard
 def parse_excel(file_path: str, query: str = "") -> str:
+    """Load an Excel file (first sheet) and answer a pandas query (optional)."""
     df = pd.read_excel(file_path)
     if not query:
+        return f"Loaded Excel with {len(df)} rows and {len(df.columns)} cols.\nColumns: {list(df.columns)}"
+    try:
+        result = df.query(query)
+        return result.to_markdown()
+    except Exception as e:
+        return f"ERROR in pandas query: {e}"
+# ---------------------------------------------------------------------
+# 4) Gemini-Audio-Transkription
+# ---------------------------------------------------------------------
 @tool
 @error_guard
+def gemini_transcribe_audio(file_path: str, prompt: str = "Transcribe the audio.") -> str:
+    """Use Gemini to transcribe an audio file."""
     with open(file_path, "rb") as f:
         b64 = base64.b64encode(f.read()).decode()
+    mime = mimetypes.guess_type(file_path)[0] or "audio/mpeg"
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": prompt},
+            {"type": "media", "data": b64, "mime_type": mime},
+        ]
+    )
+    resp = asyncio.run(gemini_llm.invoke([message]))
+    return resp.content if hasattr(resp, "content") else str(resp)
+# ---------------------------------------------------------------------
+# 5) Bild-Beschreibung
+# ---------------------------------------------------------------------
 @tool
 @error_guard
+def describe_image(file_path: str, prompt: str = "Describe this image.") -> str:
+    """Gemini vision – Bild beschreiben."""
+    from PIL import Image
+    img = Image.open(file_path)
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": prompt},
+            img,  # langchain übernimmt Encoding
+        ]
+    )
+    resp = asyncio.run(gemini_llm.invoke([message]))
     return resp.content
+# ---------------------------------------------------------------------
+# 6) OCR-Tool
+# ---------------------------------------------------------------------
 @tool
 @error_guard
 def ocr_image(file_path: str, lang: str = "eng") -> str:
+    """Extract text from an image via pytesseract."""
+    try:
+        import pytesseract
+        from PIL import Image
+        text = pytesseract.image_to_string(Image.open(file_path), lang=lang)
+        return text.strip() or "No text found."
+    except Exception as e:
+        return f"ERROR: {e}"
+# ---------------------------------------------------------------------
+# 7) Tavily-Web-Suche
+# ---------------------------------------------------------------------
 @tool
 @error_guard
 def web_search(query: str, max_results: int = 5) -> str:
+    """Search the web via Tavily and return a markdown list of results."""
+    hits = TavilySearchResults(max_results=max_results, api_key=TAVILY_KEY).invoke(query)
     if not hits:
+        return "No results."
+    return "\n\n".join(f"{h['title']} – {h['url']}" for h in hits)
+# ---------------------------------------------------------------------
+# 8) Kleiner Rechner
+# ---------------------------------------------------------------------
+@tool
+@error_guard
+def simple_calculator(operation: str, a: float, b: float) -> float:
+    """Basic maths (add, subtract, multiply, divide)."""
+    ops = {
+        "add": a + b,
+        "subtract": a - b,
+        "multiply": a * b,
+        "divide": a / b if b else float("inf"),
+    }
+    return ops.get(operation, f"ERROR: unknown op '{operation}'")
+# ---------------------------------------------------------------------
+# LLM + Semaphore-Throttle  (Gemini 2.0 Flash)
+# ---------------------------------------------------------------------
+gemini_llm = ChatGoogleGenerativeAI(
+    model="gemini-2.0-flash",
+    google_api_key=GOOGLE_API_KEY,
+    temperature=0,
+    max_output_tokens=2048,
+).bind_tools([
+    fetch_gaia_file, parse_csv, parse_excel,
+    gemini_transcribe_audio, describe_image, ocr_image,
+    web_search, simple_calculator,
+])
+LLM_SEMA = asyncio.Semaphore(3)           # 3 gleichz. Anfragen ≈ < 15/min
+async def safe_invoke(msgs: List[Any]):
+    async with LLM_SEMA:
+        return gemini_llm.invoke(msgs)
+# ---------------------------------------------------------------------
+# System-Prompt
+# ---------------------------------------------------------------------
+system_prompt = SystemMessage(content="""
+You are GAIA-Assist, a precise, tool-using agent.
+If a question mentions an attachment:
+1. Call fetch_gaia_file(task_id)
+2. Use exactly one specialised parser tool on the returned path.
+Otherwise decide between web_search or simple_calculator.
+Format for a tool call:
+Thought: Do I need to use a tool? Yes
+Action: <tool name>
+Action Input: <JSON arguments>
+Format for final answer:
+Thought: Do I need to use a tool? No
+Final Answer: <your answer>
+Stop once you output "Final Answer:".
+""")
+# ---------------------------------------------------------------------
+# LangGraph – Assistant-Node
+# ---------------------------------------------------------------------
 def assistant(state: MessagesState):
     msgs = state["messages"]
+    if msgs[0].type != "system":
         msgs = [system_prompt] + msgs
+    resp = asyncio.run(safe_invoke(msgs))
+    finished = resp.content.lower().lstrip().startswith("final answer") or not resp.tool_calls
+    return {"messages": [resp], "should_end": finished}
+def route(state):
+    return "END" if state["should_end"] else "tools"
+# ---------------------------------------------------------------------
+# Tools-Liste & Graph
+# ---------------------------------------------------------------------
+tools = [
+    fetch_gaia_file, parse_csv, parse_excel,
+    gemini_transcribe_audio, describe_image, ocr_image,
+    web_search, simple_calculator,
+]
 builder = StateGraph(MessagesState)
 builder.add_node("assistant", assistant)
 builder.add_node("tools", ToolNode(tools))
 builder.add_edge(START, "assistant")
+builder.add_conditional_edges("assistant", route, {"tools": "tools", "END": END})
+# Compile
 agent_executor = builder.compile()