Tesvia commited on
Commit
73bb16b
·
verified ·
1 Parent(s): db0abac

Upload 5 files

Browse files
Files changed (4) hide show
  1. agent.py +81 -66
  2. app.py +17 -10
  3. requirements.txt +1 -1
  4. tools.py +107 -142
agent.py CHANGED
@@ -1,36 +1,37 @@
1
- """agent.py – GAIA benchmark agent using *smolagents*.
2
 
3
  This module exposes:
4
 
5
  * ``gaia_agent()`` – factory returning a ready‑to‑use agent instance.
6
- * ``GAIAAgent`` – subclass of ``smolagents.CodeAgent``.
7
 
8
  The LLM backend is chosen at runtime via the ``MODEL_PROVIDER``
9
- environment variable (``hf`` or ``openai``) exactly like *example.py*.
10
  """
11
 
12
  import os
13
- from typing import Any, Sequence
 
14
 
15
  from dotenv import load_dotenv
16
 
17
- # SmolAgents Tools
18
- from smolagents import (
19
- CodeAgent,
20
- DuckDuckGoSearchTool,
21
- Tool
22
- )
23
 
24
- # Custom Tools from tools.py
25
  from tools import (
26
- PythonRunTool,
27
- ExcelLoaderTool,
28
- YouTubeTranscriptTool,
29
- AudioTranscriptionTool,
30
- SimpleOCRTool,
 
31
  )
32
 
33
-
34
  # ---------------------------------------------------------------------------
35
  # Load the added system prompt from system_prompt.txt (located in the same directory)
36
  # ---------------------------------------------------------------------------
@@ -38,34 +39,38 @@ ADDED_PROMPT_PATH = os.path.join(os.path.dirname(__file__), "added_prompt.txt")
38
  with open(ADDED_PROMPT_PATH, "r", encoding="utf-8") as f:
39
  ADDED_PROMPT = f.read().strip()
40
 
41
-
42
  # ---------------------------------------------------------------------------
43
  # Model selection helper
44
  # ---------------------------------------------------------------------------
45
 
46
- load_dotenv() # Make sure we read credentials from .env when running locally
47
 
48
- def _select_model():
49
- """Return a smolagents *model* as configured by the ``MODEL_PROVIDER`` env."""
50
 
51
  provider = os.getenv("MODEL_PROVIDER", "hf").lower()
 
 
 
52
 
53
  if provider == "hf":
54
- from smolagents import InferenceClientModel
55
- hf_model_id = os.getenv("HF_MODEL", "HuggingFaceH4/zephyr-7b-beta")
56
- hf_token = os.getenv("HF_API_KEY")
57
- return InferenceClientModel(
58
- model_id=hf_model_id,
59
- token=hf_token
60
- )
 
 
 
61
 
62
  if provider == "openai":
63
- from smolagents import OpenAIServerModel
64
  openai_model_id = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
65
- openai_token = os.getenv("OPENAI_API_KEY")
66
- return OpenAIServerModel(
67
- model_id=openai_model_id,
68
- api_key=openai_token
69
  )
70
 
71
  raise ValueError(
@@ -77,50 +82,60 @@ def _select_model():
77
  # Core Agent implementation
78
  # ---------------------------------------------------------------------------
79
 
80
- DEFAULT_TOOLS = [
81
- DuckDuckGoSearchTool(),
82
- PythonRunTool(),
83
- ExcelLoaderTool(),
84
- YouTubeTranscriptTool(),
85
- AudioTranscriptionTool(),
86
- SimpleOCRTool(),
87
  ]
88
 
89
- class GAIAAgent(CodeAgent):
90
  def __init__(
91
  self,
92
- tools=None
93
  ):
94
- super().__init__(
95
- tools=tools or DEFAULT_TOOLS,
96
- model=_select_model()
 
 
 
 
 
 
 
 
97
  )
98
- # Append the additional prompt to the existing system prompt
99
- self.prompt_templates["system_prompt"] += f"\n\n{ADDED_PROMPT}"
100
-
101
- # Convenience so the object itself can be *called* directly
102
- def __call__(self, question: str, **kwargs: Any) -> str:
103
- steps = self.run(question, **kwargs)
104
- # If steps is a primitive, just return it
105
- if isinstance(steps, (int, float, str)):
106
- return str(steps).strip()
107
- last_step = None
108
- for step in steps:
109
- last_step = step
110
- # Defensive: handle int/float/str directly
111
- if isinstance(last_step, (int, float, str)):
112
- return str(last_step).strip()
113
- answer = getattr(last_step, "answer", None)
114
- if answer is not None:
115
- return str(answer).strip()
116
- return str(last_step).strip()
117
 
118
  # ---------------------------------------------------------------------------
119
  # Factory helpers expected by app.py
120
  # ---------------------------------------------------------------------------
121
 
122
- def gaia_agent(*, extra_tools: Sequence[Tool] | None = None) -> GAIAAgent:
123
- # Compose the toolset: always include all default tools, plus any extras
 
 
124
  toolset = list(DEFAULT_TOOLS)
125
  if extra_tools:
126
  toolset.extend(extra_tools)
 
1
+ """GAIA benchmark agent using OpenAI Agents SDK.
2
 
3
  This module exposes:
4
 
5
  * ``gaia_agent()`` – factory returning a ready‑to‑use agent instance.
6
+ * ``GAIAAgent`` – a class that wraps ``openai_agents.Agent``.
7
 
8
  The LLM backend is chosen at runtime via the ``MODEL_PROVIDER``
9
+ environment variable (``hf`` or ``openai``).
10
  """
11
 
12
  import os
13
+ import asyncio # Added for potential direct asyncio.run if needed, and for async def
14
+ from typing import Any, Sequence, Callable, Union # Added Callable and Union
15
 
16
  from dotenv import load_dotenv
17
 
18
+ # OpenAI Agents SDK imports
19
+ from openai_agents import Agent, Runner
20
+ from openai_agents.models.openai_chat_completions import OpenAIChatCompletionsModel
21
+ from openai_agents.extensions.models.litellm_model import LitellmModel
22
+ # FunctionToolType could be imported if it's a public type, for now using Callable
23
+ # from openai_agents import FunctionToolType # Example if such type exists
24
 
25
+ # Custom Tools from tools.py (now functions)
26
  from tools import (
27
+ python_run,
28
+ load_spreadsheet,
29
+ youtube_transcript,
30
+ transcribe_audio,
31
+ image_ocr,
32
+ duckduckgo_search, # Added the new tool
33
  )
34
 
 
35
  # ---------------------------------------------------------------------------
36
  # Load the added system prompt from system_prompt.txt (located in the same directory)
37
  # ---------------------------------------------------------------------------
 
39
  with open(ADDED_PROMPT_PATH, "r", encoding="utf-8") as f:
40
  ADDED_PROMPT = f.read().strip()
41
 
 
42
  # ---------------------------------------------------------------------------
43
  # Model selection helper
44
  # ---------------------------------------------------------------------------
45
 
46
+ load_dotenv() # Make sure we read credentials from .env
47
 
48
+ def _select_model() -> Union[OpenAIChatCompletionsModel, LitellmModel]:
49
+ """Return an OpenAI Agents SDK model instance as configured by env variables."""
50
 
51
  provider = os.getenv("MODEL_PROVIDER", "hf").lower()
52
+ # Ensure API keys are loaded if not directly passed to model constructors
53
+ # OpenAI API key is typically read by the library from OPENAI_API_KEY env var
54
+ # LiteLLM also often relies on environment variables for keys
55
 
56
  if provider == "hf":
57
+ hf_model_id = os.getenv("HF_MODEL", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO") # Example, ensure this is a valid LiteLLM model ID
58
+ # LiteLLM typically requires a prefix for HuggingFace models
59
+ if not hf_model_id.startswith("huggingface/"):
60
+ hf_model_id = f"huggingface/{hf_model_id}"
61
+ hf_token = os.getenv("HF_API_KEY") # LiteLLM might use this or HUGGINGFACE_API_KEY
62
+ # For LiteLLM, api_key parameter might be used for specific providers,
63
+ # but often it relies on env vars like HUGGINGFACE_API_KEY.
64
+ # Passing token explicitly if LitellmModel supports it, or ensuring env var is set.
65
+ return LitellmModel(model=hf_model_id, api_key=hf_token if hf_token else None)
66
+
67
 
68
  if provider == "openai":
 
69
  openai_model_id = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
70
+ openai_token = os.getenv("OPENAI_API_KEY") # OpenAIChatCompletionsModel will use this by default if set in env
71
+ return OpenAIChatCompletionsModel(
72
+ model=openai_model_id,
73
+ api_key=openai_token # Explicitly passing, though often picked from env
74
  )
75
 
76
  raise ValueError(
 
82
  # Core Agent implementation
83
  # ---------------------------------------------------------------------------
84
 
85
+ DEFAULT_TOOLS: Sequence[Callable] = [
86
+ duckduckgo_search,
87
+ python_run,
88
+ load_spreadsheet,
89
+ youtube_transcript,
90
+ transcribe_audio,
91
+ image_ocr,
92
  ]
93
 
94
+ class GAIAAgent:
95
  def __init__(
96
  self,
97
+ tools: Sequence[Callable] | None = None
98
  ):
99
+ self.model = _select_model()
100
+ self.tools = tools or DEFAULT_TOOLS
101
+
102
+ base_system_prompt = "You are a helpful assistant designed to answer questions and complete tasks. You have access to a variety of tools to help you."
103
+ full_system_prompt = f"{base_system_prompt}\n\n{ADDED_PROMPT}"
104
+
105
+ self.agent = Agent(
106
+ model=self.model,
107
+ tools=self.tools,
108
+ instructions=full_system_prompt,
109
+ name="GAIAAgent"
110
  )
111
+
112
+ async def __call__(self, question: str, **kwargs: Any) -> str:
113
+ """
114
+ Asynchronously processes a question using the agent and returns the final answer.
115
+ kwargs are passed to Runner.run if supported, currently ignored as per plan.
116
+ """
117
+ # As per plan, Runner.run(self.agent, question) is used.
118
+ # If session_id or other kwargs are needed by Runner.run, this might need adjustment.
119
+ response = await Runner.run(self.agent, question)
120
+
121
+ # Extract the final output. Assuming response.final_output is the way.
122
+ # The type of final_output needs to be handled (e.g. if it's a message object or just text)
123
+ final_answer = response.final_output
124
+ if hasattr(final_answer, 'content'): # Example if final_output is a message object
125
+ final_answer_text = str(final_answer.content)
126
+ else:
127
+ final_answer_text = str(final_answer)
128
+
129
+ return final_answer_text.strip()
130
 
131
  # ---------------------------------------------------------------------------
132
  # Factory helpers expected by app.py
133
  # ---------------------------------------------------------------------------
134
 
135
+ def gaia_agent(*, extra_tools: Sequence[Callable] | None = None) -> GAIAAgent:
136
+ """
137
+ Factory function to create a GAIAAgent instance with default and optional extra tools.
138
+ """
139
  toolset = list(DEFAULT_TOOLS)
140
  if extra_tools:
141
  toolset.extend(extra_tools)
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
 
6
  # --- Our Agent ---
7
  from agent import gaia_agent
@@ -13,10 +14,11 @@ DEBUG = os.getenv("DEBUG", "0") == "1"
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
17
  """
18
- Fetches all questions, runs the BasicAgent on them, submits all answers,
19
- and displays the results.
20
  """
21
  # --- Determine HF Space Runtime URL and Repo URL ---
22
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
@@ -32,10 +34,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
32
  questions_url = f"{api_url}/questions"
33
  submit_url = f"{api_url}/submit"
34
 
35
- # 1. Instantiate Agent (now using smolagents)
36
  try:
37
  agent = gaia_agent()
38
- print("SmolAgent instantiated successfully.")
39
  except Exception as e:
40
  print(f"Error instantiating agent: {e}")
41
  return f"Error initializing agent: {e}", None
@@ -48,7 +50,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
48
  import json
49
 
50
  try:
51
- response = requests.get(questions_url, timeout=15)
 
 
52
  response.raise_for_status()
53
  questions_data = response.json()
54
  if not questions_data:
@@ -57,7 +61,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
57
  print(f"Fetched {len(questions_data)} questions.")
58
  except json.JSONDecodeError as e:
59
  print(f"Error decoding JSON response from questions endpoint: {e}")
60
- print(f"Response text: {response.text[:500]}")
61
  return f"Error decoding server response for questions: {e}", None
62
  except requests.exceptions.RequestException as e:
63
  print(f"Error fetching questions: {e}")
@@ -77,7 +81,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
77
  print(f"Skipping item with missing task_id or question: {item}")
78
  continue
79
  try:
80
- submitted_answer = agent(question_text)
 
81
  # --- DEBUG LOGGING ---
82
  if DEBUG:
83
  print(f"[DEBUG] Task {task_id}: Answer type: {type(submitted_answer)}, Value: {repr(submitted_answer)}")
@@ -104,7 +109,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
104
  # 5. Submit
105
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
106
  try:
107
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
108
  response.raise_for_status()
109
  result_data = response.json()
110
  final_status = (
@@ -122,7 +128,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
122
  try:
123
  error_json = e.response.json()
124
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
125
- except requests.exceptions.JSONDecodeError:
126
  error_detail += f" Response: {e.response.text[:500]}"
127
  status_message = f"Submission Failed: {error_detail}"
128
  print(status_message)
@@ -170,6 +176,7 @@ with gr.Blocks() as demo:
170
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
171
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
172
 
 
173
  run_button.click(
174
  fn=run_and_submit_all,
175
  outputs=[status_output, results_table]
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import asyncio
6
 
7
  # --- Our Agent ---
8
  from agent import gaia_agent
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
+ # 2. Modified function definition to be async def
18
+ async def run_and_submit_all( profile: gr.OAuthProfile | None):
19
  """
20
+ Fetches all questions, runs the GAIAAgent on them, submits all answers,
21
+ and displays the results. Now an async function.
22
  """
23
  # --- Determine HF Space Runtime URL and Repo URL ---
24
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
34
  questions_url = f"{api_url}/questions"
35
  submit_url = f"{api_url}/submit"
36
 
37
+ # 1. Instantiate Agent
38
  try:
39
  agent = gaia_agent()
40
+ print("GAIAAgent instantiated successfully.")
41
  except Exception as e:
42
  print(f"Error instantiating agent: {e}")
43
  return f"Error initializing agent: {e}", None
 
50
  import json
51
 
52
  try:
53
+ # Using asyncio.to_thread to run synchronous requests.get in a separate thread
54
+ # to avoid blocking the asyncio event loop.
55
+ response = await asyncio.to_thread(requests.get, questions_url, timeout=15)
56
  response.raise_for_status()
57
  questions_data = response.json()
58
  if not questions_data:
 
61
  print(f"Fetched {len(questions_data)} questions.")
62
  except json.JSONDecodeError as e:
63
  print(f"Error decoding JSON response from questions endpoint: {e}")
64
+ print(f"Response text: {response.text[:500]}") # type: ignore
65
  return f"Error decoding server response for questions: {e}", None
66
  except requests.exceptions.RequestException as e:
67
  print(f"Error fetching questions: {e}")
 
81
  print(f"Skipping item with missing task_id or question: {item}")
82
  continue
83
  try:
84
+ # 3. Changed agent invocation to await agent call
85
+ submitted_answer = await agent(question_text)
86
  # --- DEBUG LOGGING ---
87
  if DEBUG:
88
  print(f"[DEBUG] Task {task_id}: Answer type: {type(submitted_answer)}, Value: {repr(submitted_answer)}")
 
109
  # 5. Submit
110
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
111
  try:
112
+ # Using asyncio.to_thread for synchronous requests.post
113
+ response = await asyncio.to_thread(requests.post, submit_url, json=submission_data, timeout=60)
114
  response.raise_for_status()
115
  result_data = response.json()
116
  final_status = (
 
128
  try:
129
  error_json = e.response.json()
130
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
131
+ except requests.exceptions.JSONDecodeError: # Changed from requests.JSONDecodeError
132
  error_detail += f" Response: {e.response.text[:500]}"
133
  status_message = f"Submission Failed: {error_detail}"
134
  print(status_message)
 
176
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
177
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
178
 
179
+ # 5. Gradio's click call remains the same, it should handle async functions.
180
  run_button.click(
181
  fn=run_and_submit_all,
182
  outputs=[status_output, results_table]
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  gradio
2
  requests
3
  pandas
4
- smolagents[openai]
5
  duckduckgo-search
6
  youtube-transcript-api
7
  pytesseract
 
1
  gradio
2
  requests
3
  pandas
4
+ openai-agents
5
  duckduckgo-search
6
  youtube-transcript-api
7
  pytesseract
tools.py CHANGED
@@ -1,166 +1,131 @@
1
- # Custom tools for smolagents GAIA agent
2
  from __future__ import annotations
 
3
  import contextlib
4
  import io
5
  import os
6
- from typing import Any, Dict, List
 
 
 
 
 
 
 
 
 
7
 
8
- from smolagents import Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # ---- 1. PythonRunTool ------------------------------------------------------
11
- class PythonRunTool(Tool):
12
- name = "python_run"
13
- description = """
14
- Execute trusted Python code and return printed output + repr() of the last expression (or _result variable).
15
  """
16
- inputs = {
17
- "code": {
18
- "type": "string",
19
- "description": "Python code to execute",
20
- "required": True
21
- }
22
- }
23
- output_type = "string"
24
 
25
- def forward(self, code: str) -> str:
26
- buf, ns = io.StringIO(), {}
27
- last = None
28
- try:
29
- with contextlib.redirect_stdout(buf):
30
- exec(compile(code, "<agent-python>", "exec"), {}, ns)
31
- last = ns.get("_result", None)
32
- except Exception as e:
33
- raise RuntimeError(f"PythonRunTool error: {e}") from e
34
- out = buf.getvalue()
35
- # Always return a string
36
- result = (out + (repr(last) if last is not None else "")).strip()
37
- return str(result)
 
 
 
38
 
39
- # ---- 2. ExcelLoaderTool ----------------------------------------------------
40
- class ExcelLoaderTool(Tool):
41
- name = "load_spreadsheet"
42
- description = """
43
- Read .xlsx/.xls/.csv from disk and return rows as a list of dictionaries with string keys.
44
  """
45
- inputs = {
46
- "path": {
47
- "type": "string",
48
- "description": "Path to .csv/.xls/.xlsx file",
49
- "required": True
50
- },
51
- "sheet": {
52
- "type": "string",
53
- "description": "Sheet name or index (optional, required for Excel files only)",
54
- "required": False,
55
- "default": "",
56
- "nullable": True
57
- }
58
- }
59
- output_type = "array"
60
 
61
- def forward(self, path: str, sheet: str | int | None = None) -> str:
62
- import pandas as pd
63
- if not os.path.isfile(path):
64
- raise FileNotFoundError(path)
65
- ext = os.path.splitext(path)[1].lower()
66
- if sheet == "":
67
- sheet = None
68
- if ext == ".csv":
69
- df = pd.read_csv(path)
70
- else:
71
- df = pd.read_excel(path, sheet_name=sheet)
72
- records = [{str(k): v for k, v in row.items()} for row in df.to_dict(orient="records")]
73
- # Always return a string
74
- return str(records)
75
 
76
- # ---- 3. YouTubeTranscriptTool ---------------------------------------------
77
- class YouTubeTranscriptTool(Tool):
78
- name = "youtube_transcript"
79
- description = """
80
- Return the subtitles of a YouTube URL using youtube-transcript-api.
81
  """
82
- inputs = {
83
- "url": {
84
- "type": "string",
85
- "description": "YouTube URL",
86
- "required": True
87
- },
88
- "lang": {
89
- "type": "string",
90
- "description": "Transcript language (default: en)",
91
- "required": False,
92
- "default": "en",
93
- "nullable": True
94
- }
95
- }
96
- output_type = "string"
97
 
98
- def forward(self, url: str, lang: str = "en") -> str:
99
- from urllib.parse import urlparse, parse_qs
100
- from youtube_transcript_api._api import YouTubeTranscriptApi
101
- vid = parse_qs(urlparse(url).query).get("v", [None])[0] or url.split("/")[-1]
102
- data = YouTubeTranscriptApi.get_transcript(vid, languages=[lang, "en", "en-US", "en-GB"])
103
- text = " ".join(d["text"] for d in data).strip()
104
- return str(text)
 
 
 
105
 
106
- # ---- 4. AudioTranscriptionTool --------------------------------------------
107
- class AudioTranscriptionTool(Tool):
108
- name = "transcribe_audio"
109
- description = """
110
- Transcribe an audio file with OpenAI Whisper, returns plain text."
111
  """
112
- inputs = {
113
- "path": {
114
- "type": "string",
115
- "description": "Path to audio file",
116
- "required": True
117
- },
118
- "model": {
119
- "type": "string",
120
- "description": "Model name for transcription (default: whisper-1)",
121
- "required": False,
122
- "default": "whisper-1",
123
- "nullable": True
124
- }
125
- }
126
- output_type = "string"
127
 
128
- def forward(self, path: str, model: str = "whisper-1") -> str:
129
- import openai
130
- if not os.path.isfile(path):
131
- raise FileNotFoundError(path)
132
- client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
133
- with open(path, "rb") as fp:
134
- transcript = client.audio.transcriptions.create(model=model, file=fp)
135
- return str(transcript.text.strip())
136
 
137
- # ---- 5. SimpleOCRTool ------------------------------------------------------
138
- class SimpleOCRTool(Tool):
139
- name = "image_ocr"
140
- description = """
141
- Return any text spotted in an image via pytesseract OCR.
142
  """
143
- inputs = {
144
- "path": {
145
- "type": "string",
146
- "description": "Path to image file",
147
- "required": True
148
- }
149
- }
150
- output_type = "string"
151
 
152
- def forward(self, path: str) -> str:
153
- from PIL import Image
154
- import pytesseract
155
- if not os.path.isfile(path):
156
- raise FileNotFoundError(path)
157
- return str(pytesseract.image_to_string(Image.open(path)).strip())
 
158
 
159
  # ---------------------------------------------------------------------------
160
  __all__ = [
161
- "PythonRunTool",
162
- "ExcelLoaderTool",
163
- "YouTubeTranscriptTool",
164
- "AudioTranscriptionTool",
165
- "SimpleOCRTool",
 
166
  ]
 
1
+ # Custom tools for OpenAI Agents
2
  from __future__ import annotations
3
+
4
  import contextlib
5
  import io
6
  import os
7
+ from typing import Any, List, Union
8
+
9
+ from openai_agents import function_tool # Using openai_agents
10
+ import pandas as pd
11
+ import openai
12
+ from PIL import Image
13
+ import pytesseract
14
+ from duckduckgo_search import DDGS
15
+ from urllib.parse import urlparse, parse_qs # For youtube_transcript
16
+ from youtube_transcript_api import YouTubeTranscriptApi # For youtube_transcript, corrected import
17
 
18
+ # ---- 1. PythonRunTool -> python_run function ----------------------------------
19
+ @function_tool
20
+ def python_run(code: str) -> str:
21
+ """
22
+ Execute trusted Python code and return printed output + repr() of the last expression (or _result variable).
23
+
24
+ Args:
25
+ code (str): Python code to execute.
26
+ """
27
+ buf, ns = io.StringIO(), {}
28
+ last = None
29
+ try:
30
+ with contextlib.redirect_stdout(buf):
31
+ exec(compile(code, "<agent-python>", "exec"), {}, ns)
32
+ last = ns.get("_result", None)
33
+ except Exception as e:
34
+ raise RuntimeError(f"PythonRunTool error: {e}") from e
35
+ out = buf.getvalue()
36
+ # Always return a string
37
+ result = (out + (repr(last) if last is not None else "")).strip()
38
+ return str(result)
39
 
40
+ # ---- 2. ExcelLoaderTool -> load_spreadsheet function --------------------------
41
+ @function_tool
42
+ def load_spreadsheet(path: str, sheet: Union[str, int, None] = None) -> str:
 
 
43
  """
44
+ Read .xlsx/.xls/.csv from disk and return rows as a list of dictionaries with string keys.
 
 
 
 
 
 
 
45
 
46
+ Args:
47
+ path (str): Path to .csv/.xls/.xlsx file.
48
+ sheet (Union[str, int, None], optional): Sheet name or index (optional, required for Excel files only). Defaults to None.
49
+ """
50
+ if not os.path.isfile(path):
51
+ raise FileNotFoundError(path)
52
+ ext = os.path.splitext(path)[1].lower()
53
+ if sheet == "": # Treat empty string as None for sheet name
54
+ sheet = None
55
+ if ext == ".csv":
56
+ df = pd.read_csv(path)
57
+ else:
58
+ df = pd.read_excel(path, sheet_name=sheet)
59
+ records = [{str(k): v for k, v in row.items()} for row in df.to_dict(orient="records")]
60
+ # Always return a string
61
+ return str(records)
62
 
63
+ # ---- 3. YouTubeTranscriptTool -> youtube_transcript function ------------------
64
+ @function_tool
65
+ def youtube_transcript(url: str, lang: str = "en") -> str:
 
 
66
  """
67
+ Return the subtitles of a YouTube URL using youtube-transcript-api.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ Args:
70
+ url (str): YouTube URL.
71
+ lang (str, optional): Transcript language. Defaults to "en".
72
+ """
73
+ vid = parse_qs(urlparse(url).query).get("v", [None])[0] or url.split("/")[-1]
74
+ # Corrected import: from youtube_transcript_api import YouTubeTranscriptApi
75
+ data = YouTubeTranscriptApi.get_transcript(vid, languages=[lang, "en", "en-US", "en-GB"])
76
+ text = " ".join(d["text"] for d in data).strip()
77
+ return str(text)
 
 
 
 
 
78
 
79
+ # ---- 4. AudioTranscriptionTool -> transcribe_audio function -------------------
80
+ @function_tool
81
+ def transcribe_audio(path: str, model: str = "whisper-1") -> str:
 
 
82
  """
83
+ Transcribe an audio file with OpenAI Whisper, returns plain text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ Args:
86
+ path (str): Path to audio file.
87
+ model (str, optional): Model name for transcription. Defaults to "whisper-1".
88
+ """
89
+ if not os.path.isfile(path):
90
+ raise FileNotFoundError(path)
91
+ client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
92
+ with open(path, "rb") as fp:
93
+ transcript_data = client.audio.transcriptions.create(model=model, file=fp) # Renamed to transcript_data
94
+ return str(transcript_data.text.strip())
95
 
96
+ # ---- 5. SimpleOCRTool -> image_ocr function ------------------------------------
97
+ @function_tool
98
+ def image_ocr(path: str) -> str:
 
 
99
  """
100
+ Return any text spotted in an image via pytesseract OCR.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ Args:
103
+ path (str): Path to image file.
104
+ """
105
+ if not os.path.isfile(path):
106
+ raise FileNotFoundError(path)
107
+ return str(pytesseract.image_to_string(Image.open(path)).strip())
 
 
108
 
109
+ # ---- 6. New DuckDuckGo Search Tool ---------------------------------------------
110
+ @function_tool
111
+ def duckduckgo_search(query: str) -> str:
 
 
112
  """
113
+ Searches the web using DuckDuckGo and returns a summary of results.
 
 
 
 
 
 
 
114
 
115
+ Args:
116
+ query (str): The search query.
117
+ """
118
+ with DDGS() as ddgs:
119
+ results = ddgs.text(query, max_results=5) # Get top 5 results
120
+ summary = "\n".join([f"{r['title']}: {r['body']}" for r in results]) if results else "No results found."
121
+ return summary
122
 
123
  # ---------------------------------------------------------------------------
124
  __all__ = [
125
+ "python_run",
126
+ "load_spreadsheet",
127
+ "youtube_transcript",
128
+ "transcribe_audio",
129
+ "image_ocr",
130
+ "duckduckgo_search",
131
  ]