Final_Assignment_Template

Sleeping

App Files Files Community

Tesvia commited on May 27

Commit

4191a9b

verified ·

1 Parent(s): 6e06cc8

Upload 5 files

Browse files

Files changed (4) hide show

agent.py +97 -118
app.py +8 -21
requirements.txt +2 -4
tools.py +158 -130

agent.py CHANGED Viewed

@@ -1,150 +1,129 @@
-"""
-GAIA benchmark agent using the OpenAI Agents SDK.
-"""
-from __future__ import annotations
-import asyncio
 import os
-from typing import Any, Sequence, Callable, List
-from datetime import datetime
-from agents import RunHooks  # for lifecycle hooks
 from dotenv import load_dotenv
-from agents import Agent, Runner, FunctionTool, Tool
-# Import all function tools
 from tools import (
-    python_run,
-    load_spreadsheet,
-    youtube_transcript,
-    transcribe_audio,
-    image_ocr,
-    duckduckgo_search,
 )
 # ---------------------------------------------------------------------------
-# Load the added system prompt
 # ---------------------------------------------------------------------------
 ADDED_PROMPT_PATH = os.path.join(os.path.dirname(__file__), "added_prompt.txt")
 with open(ADDED_PROMPT_PATH, "r", encoding="utf-8") as f:
     ADDED_PROMPT = f.read().strip()
-load_dotenv()
-def _select_model() -> str:
-    """Return a model identifier appropriate for the Agents SDK based on environment settings."""
-    provider = os.getenv("MODEL_PROVIDER", "hf").lower()
-    if provider == "openai":
-        model_name = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
-        return f"openai/{model_name}"
     if provider == "hf":
-        hf_model_id = os.getenv("HF_MODEL", "Qwen/Qwen2.5-Coder-32B-Instruct")
-        return f"litellm/huggingface/{hf_model_id}"
     raise ValueError(
-        f"Unsupported MODEL_PROVIDER: {provider!r}. Expected 'openai' or 'hf'."
     )
-DEFAULT_TOOLS: List[FunctionTool] = [
-    python_run,
-    load_spreadsheet,
-    youtube_transcript,
-    transcribe_audio,
-    image_ocr,
-    duckduckgo_search,
 ]
-def _build_agent(extra_tools: Sequence[FunctionTool] | None = None) -> Agent:
-    """Construct the underlying Agents SDK `Agent` instance."""
-    instructions = (
-        "You are a helpful assistant tasked with answering questions using the available tools.\n\n"
-        + ADDED_PROMPT
-    )
-    tools: Sequence[Tool] = list(DEFAULT_TOOLS)
     if extra_tools:
-        tools = list(tools) + list(extra_tools)
-    return Agent(
-        name="GAIA Agent",
-        instructions=instructions,
-        tools=tools,
-        model=_select_model(),
-    )
-class LoggingHooks(RunHooks):
-    """RunHooks to log question start, model used, and each tool‐call step."""
-    def __init__(self):
-        self.step_counter = 0
-    async def on_agent_start(self, context, agent):
-        qnum = context.context.get("question_number")
-        qtext = context.context.get("question_text")
-        model = agent.model
-        ts = datetime.now().isoformat()
-        print(f"[{ts}] [Question {qnum}] Starting agent (model={model}) for question: '{qtext}'")
-    async def on_tool_start(self, context, agent, tool):
-        self.step_counter += 1
-        qnum = context.context.get("question_number")
-        ts = datetime.now().isoformat()
-        print(f"[{ts}] [Question {qnum}] Step {self.step_counter}: Invoking tool '{tool.name}'")
-    async def on_tool_end(self, context, agent, tool, result):
-        qnum = context.context.get("question_number")
-        ts = datetime.now().isoformat()
-        print(f"[{ts}] [Question {qnum}] Step {self.step_counter}: Tool '{tool.name}' completed")
-class GAIAAgent:
-    """Thin synchronous wrapper around an asynchronous Agents SDK agent."""
-    def __init__(self, *, extra_tools: Sequence[FunctionTool] | None = None):
-        self._agent = _build_agent(extra_tools=extra_tools)
-    async def _arun(self, question: str, context_data=None, hooks=None) -> str:
-        # Pass context and hooks to Runner.run if provided
-        if context_data is not None and hooks is not None:
-            result = await Runner.run(
-                self._agent,
-                question,
-                context=context_data,
-                hooks=hooks
-            )
-        else:
-            result = await Runner.run(self._agent, question)
-        return str(result.final_output).strip()
-    def __call__(self, question: str, question_number: int | None = None, **_kwargs) -> str:
-        # Prepare logging context if a question_number is given
-        context_data = None
-        hooks = None
-        if question_number is not None:
-            context_data = {
-                "question_number": question_number,
-                "question_text": question
-            }
-            hooks = LoggingHooks()
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            # No running loop: use asyncio.run
-            return asyncio.run(self._arun(question, context_data, hooks))
-        else:
-            return loop.run_until_complete(self._arun(question, context_data, hooks))
-def gaia_agent(*, extra_tools: Sequence[FunctionTool] | None = None) -> GAIAAgent:
-    """Factory returning a ready‑to‑use GAIAAgent instance."""
-    return GAIAAgent(extra_tools=extra_tools)
 __all__ = ["GAIAAgent", "gaia_agent"]

+"""GAIA benchmark agent using *smolagents*.
+This module exposes:
+* ``gaia_agent()`` – factory returning a ready‑to‑use agent instance.
+* ``GAIAAgent``  – subclass of ``smolagents.CodeAgent``.
+The LLM backend is chosen at runtime via the ``MODEL_PROVIDER``
+environment variable (``hf`` or ``openai``) exactly like *example.py*.
+"""
 import os
+from typing import Any, Sequence
 from dotenv import load_dotenv
+# SmolAgents Tools
+from smolagents import (
+    CodeAgent,
+    DuckDuckGoSearchTool,
+    Tool
+)
+# Custom Tools from tools.py
 from tools import (
+    PythonRunTool,
+    ExcelLoaderTool,
+    YouTubeTranscriptTool,
+    AudioTranscriptionTool,
+    SimpleOCRTool,
 )
 # ---------------------------------------------------------------------------
+# Load the added system prompt from system_prompt.txt (located in the same directory)
 # ---------------------------------------------------------------------------
 ADDED_PROMPT_PATH = os.path.join(os.path.dirname(__file__), "added_prompt.txt")
 with open(ADDED_PROMPT_PATH, "r", encoding="utf-8") as f:
     ADDED_PROMPT = f.read().strip()
+# ---------------------------------------------------------------------------
+# Model selection helper
+# ---------------------------------------------------------------------------
+load_dotenv()  # Make sure we read credentials from .env when running locally
+def _select_model():
+    """Return a smolagents *model* as configured by the ``MODEL_PROVIDER`` env."""
+    provider = os.getenv("MODEL_PROVIDER", "hf").lower()
     if provider == "hf":
+        from smolagents import InferenceClientModel
+        hf_model_id = os.getenv("HF_MODEL", "HuggingFaceH4/zephyr-7b-beta")
+        hf_token = os.getenv("HF_API_KEY")
+        return InferenceClientModel(
+            model_id=hf_model_id,
+            token=hf_token
+        )
+    if provider == "openai":
+        from smolagents import OpenAIServerModel
+        openai_model_id = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
+        openai_token = os.getenv("OPENAI_API_KEY")
+        return OpenAIServerModel(
+            model_id=openai_model_id,
+            api_key=openai_token
+        )
     raise ValueError(
+        f"Unsupported MODEL_PROVIDER: {provider!r}. "
+        "Use 'hf' (default) or 'openai'."
     )
+# ---------------------------------------------------------------------------
+# Core Agent implementation
+# ---------------------------------------------------------------------------
+DEFAULT_TOOLS = [
+    DuckDuckGoSearchTool(),
+    PythonRunTool(),
+    ExcelLoaderTool(),
+    YouTubeTranscriptTool(),
+    AudioTranscriptionTool(),
+    SimpleOCRTool(),
 ]
+class GAIAAgent(CodeAgent):
+    def __init__(
+        self,
+        tools=None
+    ):
+        super().__init__(
+            tools=tools or DEFAULT_TOOLS,
+            model=_select_model()
+        )
+        # Append the additional prompt to the existing system prompt
+        self.prompt_templates["system_prompt"] += f"\n\n{ADDED_PROMPT}"
+    # Convenience so the object itself can be *called* directly
+    def __call__(self, question: str, **kwargs: Any) -> str:
+        steps = self.run(question, **kwargs)
+        # If steps is a primitive, just return it
+        if isinstance(steps, (int, float, str)):
+            return str(steps).strip()
+        last_step = None
+        for step in steps:
+            last_step = step
+        # Defensive: handle int/float/str directly
+        if isinstance(last_step, (int, float, str)):
+            return str(last_step).strip()
+        answer = getattr(last_step, "answer", None)
+        if answer is not None:
+            return str(answer).strip()
+        return str(last_step).strip()
+# ---------------------------------------------------------------------------
+# Factory helpers expected by app.py
+# ---------------------------------------------------------------------------
+def gaia_agent(*, extra_tools: Sequence[Tool] | None = None) -> GAIAAgent:
+    # Compose the toolset: always include all default tools, plus any extras
+    toolset = list(DEFAULT_TOOLS)
     if extra_tools:
+        toolset.extend(extra_tools)
+    return GAIAAgent(tools=toolset)
 __all__ = ["GAIAAgent", "gaia_agent"]

app.py CHANGED Viewed

@@ -32,10 +32,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent (now using OpenAI Agents SDK)
     try:
         agent = gaia_agent()
-        print("OpenAI Agent instantiated successfully.")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -70,16 +70,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for idx, item in enumerate(questions_data, start=1):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            # pass in question_number for logging hooks
-            submitted_answer = agent(question_text, question_number=idx)
             # --- DEBUG LOGGING ---
             if DEBUG:
                 print(f"[DEBUG] Task {task_id}: Answer type: {type(submitted_answer)}, Value: {repr(submitted_answer)}")
@@ -88,22 +86,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             # Force string type here just in case (defensive)
             submitted_answer = str(submitted_answer).strip()
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": submitted_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": submitted_answer
-            })
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": f"AGENT ERROR: {e}"
-            })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent (now using smolagents)
     try:
         agent = gaia_agent()
+        print("SmolAgent instantiated successfully.")
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(question_text)
             # --- DEBUG LOGGING ---
             if DEBUG:
                 print(f"[DEBUG] Task {task_id}: Answer type: {type(submitted_answer)}, Value: {repr(submitted_answer)}")
             # Force string type here just in case (defensive)
             submitted_answer = str(submitted_answer).strip()
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

requirements.txt CHANGED Viewed

@@ -1,10 +1,8 @@
 gradio
 requests
 pandas
-openai-agents[litellm]
-openai>=1.3
 duckduckgo-search
 youtube-transcript-api
 pytesseract
-pillow
-python-dotenv

 gradio
 requests
 pandas
+smolagents[openai]
 duckduckgo-search
 youtube-transcript-api
 pytesseract
+pillow

tools.py CHANGED Viewed

@@ -1,142 +1,170 @@
-"""
-Custom function tools for OpenAI Agents SDK GAIA agent.
-"""
 from __future__ import annotations
 import contextlib
 import io
 import os
-from typing import List, Dict
-from agents import function_tool
-# 1. --------------------------------------------------------------------
-@function_tool
-def python_run(code: str) -> str:
-    """Execute trusted Python code and return the captured stdout together with
-    the repr() of the last expression (or `_result` variable if set).
-    Args:
-        code: Python code to execute.
     """
-    buf = io.StringIO()
-    ns: dict = {}
-    last = None
-    try:
-        with contextlib.redirect_stdout(buf):
-            exec(compile(code, "<agent-python>", "exec"), {}, ns)
-        last = ns.get("_result")
-    except Exception as e:
-        raise RuntimeError(f"python_run error: {e}") from e
-    out = buf.getvalue()
-    return (out + (repr(last) if last is not None else "")).strip()
-# 2. --------------------------------------------------------------------
-@function_tool
-def load_spreadsheet(path: str, sheet: str | int | None = None) -> list[Dict[str, str]]:
-    """Read .csv, .xls or .xlsx from disk and return rows as list of dictionaries.
-    Args:
-        path: Path to spreadsheet file.
-        sheet: Sheet name or index (for Excel files only).
     """
-    import pandas as pd
-    if not os.path.isfile(path):
-        raise FileNotFoundError(path)
-    ext = os.path.splitext(path)[1].lower()
-    if ext == ".csv":
-        df = pd.read_csv(path)
-        dfs = [df]
-    else:
-        sheets = pd.read_excel(path, sheet_name=sheet if sheet not in ("", None) else None)
-        if isinstance(sheets, dict):
-            dfs = sheets.values()
         else:
-            dfs = [sheets]
-    results = []
-    for df in dfs:
-        results.extend([{str(k): v for k, v in row.items()} for row in df.to_dict(orient="records")])
-    return results
-# 3. --------------------------------------------------------------------
-@function_tool
-def youtube_transcript(url: str, lang: str = "en") -> str:
-    """Fetch the subtitles of a YouTube video.
-    Args:
-        url: YouTube video URL.
-        lang: Preferred transcript language code (default "en").
-    """
-    from urllib.parse import urlparse, parse_qs
-    from youtube_transcript_api._api import YouTubeTranscriptApi
-    vid = parse_qs(urlparse(url).query).get("v", [None])[0] or url.split("/")[-1]
-    data = YouTubeTranscriptApi.get_transcript(
-        vid, languages=[lang, "en", "en-US", "en-GB"]
-    )
-    return " ".join(chunk["text"] for chunk in data).strip()
-# 4. --------------------------------------------------------------------
-@function_tool
-def transcribe_audio(path: str, model: str = "whisper-1") -> str:
-    """Transcribe an audio file using OpenAI Whisper.
-    Args:
-        path: Path to audio file (wav / mp3 / m4a / etc.).
-        model: Whisper model name (default "whisper-1").
     """
-    import openai
-    if not os.path.isfile(path):
-        raise FileNotFoundError(path)
-    client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    with open(path, "rb") as fp:
-        transcript = client.audio.transcriptions.create(model=model, file=fp)
-    return transcript.text.strip()
-# 5. --------------------------------------------------------------------
-@function_tool
-def image_ocr(path: str) -> str:
-    """Perform OCR on an image using Tesseract.
-    Args:
-        path: Path to image file.
     """
-    from PIL import Image
-    import pytesseract
-    if not os.path.isfile(path):
-        raise FileNotFoundError(path)
-    return pytesseract.image_to_string(Image.open(path)).strip()
-# 6. --------------------------------------------------------------------
-@function_tool
-def duckduckgo_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
-    """Search DuckDuckGo and return a list of result dicts with title, href and body.
-    Args:
-        query: The search query.
-        max_results: Maximum results to return (default 5).
     """
-    from duckduckgo_search import DDGS
-    results = []
-    with DDGS() as ddgs:
-        for r in ddgs.text(query, max_results=max_results):
-            results.append(
-                {
-                    "title": r.get("title", ""),
-                    "href": r.get("href", ""),
-                    "body": r.get("body", ""),
-                }
-            )
-    return results

+# Custom tools for smolagents GAIA agent
 from __future__ import annotations
 import contextlib
 import io
 import os
+from typing import Any, Dict, List
+from smolagents import Tool
+# ---- 1. PythonRunTool ------------------------------------------------------
+class PythonRunTool(Tool):
+    name = "python_run"
+    description = """
+        Execute trusted Python code and return printed output + repr() of the last expression (or _result variable).
     """
+    inputs = {
+        "code": {
+            "type": "string",
+            "description": "Python code to execute",
+            "required": True
+        }
+    }
+    output_type = "string"
+    def forward(self, code: str) -> str:
+        buf, ns = io.StringIO(), {}
+        last = None
+        try:
+            with contextlib.redirect_stdout(buf):
+                exec(compile(code, "<agent-python>", "exec"), {}, ns)
+            last = ns.get("_result", None)
+        except Exception as e:
+            raise RuntimeError(f"PythonRunTool error: {e}") from e
+        out = buf.getvalue()
+        # Always return a string
+        result = (out + (repr(last) if last is not None else "")).strip()
+        return str(result)
+# ---- 2. ExcelLoaderTool ----------------------------------------------------
+class ExcelLoaderTool(Tool):
+    name = "load_spreadsheet"
+    description = """
+        Read .xlsx/.xls/.csv from disk and return rows as a list of dictionaries with string keys.
     """
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "Path to .csv/.xls/.xlsx file",
+            "required": True
+        },
+        "sheet": {
+            "type": "string",
+            "description": "Sheet name or index (optional, required for Excel files only)",
+            "required": False,
+            "default": "",
+            "nullable": True
+        }
+    }
+    output_type = "array"
+    def forward(self, path: str, sheet: str | int | None = None) -> str:
+        import pandas as pd
+        if not os.path.isfile(path):
+            raise FileNotFoundError(path)
+        ext = os.path.splitext(path)[1].lower()
+        if sheet == "":
+            sheet = None
+        if ext == ".csv":
+            df = pd.read_csv(path)
         else:
+            df = pd.read_excel(path, sheet_name=sheet)
+        if isinstance(df, dict):
+            # If user did not specify a sheet, use the first one found
+            first_sheet = next(iter(df))
+            df = df[first_sheet]
+        records = [{str(k): v for k, v in row.items()} for row in df.to_dict(orient="records")]
+        # Always return a string
+        return str(records)
+# ---- 3. YouTubeTranscriptTool ---------------------------------------------
+class YouTubeTranscriptTool(Tool):
+    name = "youtube_transcript"
+    description = """
+        Return the subtitles of a YouTube URL using youtube-transcript-api.
     """
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "YouTube URL",
+            "required": True
+        },
+        "lang": {
+            "type": "string",
+            "description": "Transcript language (default: en)",
+            "required": False,
+            "default": "en",
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def forward(self, url: str, lang: str = "en") -> str:
+        from urllib.parse import urlparse, parse_qs
+        from youtube_transcript_api._api import YouTubeTranscriptApi
+        vid = parse_qs(urlparse(url).query).get("v", [None])[0] or url.split("/")[-1]
+        data = YouTubeTranscriptApi.get_transcript(vid, languages=[lang, "en", "en-US", "en-GB"])
+        text = " ".join(d["text"] for d in data).strip()
+        return str(text)
+# ---- 4. AudioTranscriptionTool --------------------------------------------
+class AudioTranscriptionTool(Tool):
+    name = "transcribe_audio"
+    description = """
+        Transcribe an audio file with OpenAI Whisper, returns plain text."
     """
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "Path to audio file",
+            "required": True
+        },
+        "model": {
+            "type": "string",
+            "description": "Model name for transcription (default: whisper-1)",
+            "required": False,
+            "default": "whisper-1",
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def forward(self, path: str, model: str = "whisper-1") -> str:
+        import openai
+        if not os.path.isfile(path):
+            raise FileNotFoundError(path)
+        client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        with open(path, "rb") as fp:
+            transcript = client.audio.transcriptions.create(model=model, file=fp)
+        return str(transcript.text.strip())
+# ---- 5. SimpleOCRTool ------------------------------------------------------
+class SimpleOCRTool(Tool):
+    name = "image_ocr"
+    description = """
+        Return any text spotted in an image via pytesseract OCR.
     """
+    inputs = {
+        "path": {
+            "type": "string",
+            "description": "Path to image file",
+            "required": True
+        }
+    }
+    output_type = "string"
+    def forward(self, path: str) -> str:
+        from PIL import Image
+        import pytesseract
+        if not os.path.isfile(path):
+            raise FileNotFoundError(path)
+        return str(pytesseract.image_to_string(Image.open(path)).strip())
+# ---------------------------------------------------------------------------
+__all__ = [
+    "PythonRunTool",
+    "ExcelLoaderTool",
+    "YouTubeTranscriptTool",
+    "AudioTranscriptionTool",
+    "SimpleOCRTool",
+]