Tesvia commited on
Commit
365b711
·
verified ·
1 Parent(s): 75a272e

Upload 4 files

Browse files
Files changed (1) hide show
  1. tools.py +30 -12
tools.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
  import contextlib
4
  import io
5
  import os
6
- from typing import Any, Dict, List, Hashable
7
 
8
  from smolagents import Tool
9
 
@@ -14,8 +14,11 @@ class PythonRunTool(Tool):
14
  "Execute trusted Python code and return printed output "
15
  "+ repr() of the last expression (or _result variable)."
16
  )
 
 
 
17
 
18
- def forward(self, code: str) -> str: # type: ignore[override]
19
  buf, ns = io.StringIO(), {}
20
  last = None
21
  try:
@@ -34,8 +37,17 @@ class ExcelLoaderTool(Tool):
34
  "Read .xlsx/.xls/.csv from disk and return "
35
  "rows as a list of dictionaries with string keys."
36
  )
 
 
 
 
 
 
 
 
 
37
 
38
- def forward(self, path: str, sheet: str | int | None = None) -> List[Dict[str, Any]]: # type: ignore[override]
39
  import pandas as pd
40
  if not os.path.isfile(path):
41
  raise FileNotFoundError(path)
@@ -44,7 +56,6 @@ class ExcelLoaderTool(Tool):
44
  df = pd.read_csv(path)
45
  else:
46
  df = pd.read_excel(path, sheet_name=sheet)
47
- # Ensure all keys are str for type safety
48
  records = [{str(k): v for k, v in row.items()} for row in df.to_dict(orient="records")]
49
  return records
50
 
@@ -52,10 +63,13 @@ class ExcelLoaderTool(Tool):
52
  class YouTubeTranscriptTool(Tool):
53
  name = "youtube_transcript"
54
  description = "Return the subtitles of a YouTube URL using youtube-transcript-api."
 
 
 
 
55
 
56
- def forward(self, url: str, lang: str = "en") -> str: # type: ignore[override]
57
  from urllib.parse import urlparse, parse_qs
58
- # Per Pylance, import from private API
59
  from youtube_transcript_api._api import YouTubeTranscriptApi
60
  vid = parse_qs(urlparse(url).query).get("v", [None])[0] or url.split("/")[-1]
61
  data = YouTubeTranscriptApi.get_transcript(vid, languages=[lang, "en", "en-US", "en-GB"])
@@ -65,29 +79,33 @@ class YouTubeTranscriptTool(Tool):
65
  class AudioTranscriptionTool(Tool):
66
  name = "transcribe_audio"
67
  description = "Transcribe an audio file with OpenAI Whisper, returns plain text."
 
 
 
 
68
 
69
- def forward(self, path: str, model: str = "whisper-1") -> str: # type: ignore[override]
70
  import openai
71
- import os
72
  if not os.path.isfile(path):
73
  raise FileNotFoundError(path)
74
  openai.api_key = os.getenv("OPENAI_API_KEY")
75
- # Version/API guard for openai.Audio
76
  if not hasattr(openai, "Audio"):
77
  raise ImportError(
78
  "Your OpenAI package does not support Audio. "
79
  "Please upgrade it with: pip install --upgrade openai"
80
  )
81
  with open(path, "rb") as fp:
82
- # type: ignore[attr-defined]
83
  return openai.Audio.transcribe(model=model, file=fp)["text"].strip()
84
 
85
  # ---- 5. SimpleOCRTool ------------------------------------------------------
86
  class SimpleOCRTool(Tool):
87
  name = "image_ocr"
88
  description = "Return any text spotted in an image via pytesseract OCR."
 
 
 
89
 
90
- def forward(self, path: str) -> str: # type: ignore[override]
91
  from PIL import Image
92
  import pytesseract
93
  if not os.path.isfile(path):
@@ -101,4 +119,4 @@ __all__ = [
101
  "YouTubeTranscriptTool",
102
  "AudioTranscriptionTool",
103
  "SimpleOCRTool",
104
- ]
 
3
  import contextlib
4
  import io
5
  import os
6
+ from typing import Any, Dict, List
7
 
8
  from smolagents import Tool
9
 
 
14
  "Execute trusted Python code and return printed output "
15
  "+ repr() of the last expression (or _result variable)."
16
  )
17
+ inputs = {
18
+ "code": {"type": str, "description": "Python code to execute", "required": True}
19
+ }
20
 
21
+ def forward(self, code: str) -> str:
22
  buf, ns = io.StringIO(), {}
23
  last = None
24
  try:
 
37
  "Read .xlsx/.xls/.csv from disk and return "
38
  "rows as a list of dictionaries with string keys."
39
  )
40
+ inputs = {
41
+ "path": {"type": str, "description": "Path to .csv/.xls/.xlsx file", "required": True},
42
+ "sheet": {
43
+ "type": str,
44
+ "description": "Sheet name or index (optional, required for Excel files only)",
45
+ "required": False,
46
+ "default": None,
47
+ }
48
+ }
49
 
50
+ def forward(self, path: str, sheet: str | int | None = None) -> List[Dict[str, Any]]:
51
  import pandas as pd
52
  if not os.path.isfile(path):
53
  raise FileNotFoundError(path)
 
56
  df = pd.read_csv(path)
57
  else:
58
  df = pd.read_excel(path, sheet_name=sheet)
 
59
  records = [{str(k): v for k, v in row.items()} for row in df.to_dict(orient="records")]
60
  return records
61
 
 
63
  class YouTubeTranscriptTool(Tool):
64
  name = "youtube_transcript"
65
  description = "Return the subtitles of a YouTube URL using youtube-transcript-api."
66
+ inputs = {
67
+ "url": {"type": str, "description": "YouTube URL", "required": True},
68
+ "lang": {"type": str, "description": "Transcript language (default: en)", "required": False, "default": "en"}
69
+ }
70
 
71
+ def forward(self, url: str, lang: str = "en") -> str:
72
  from urllib.parse import urlparse, parse_qs
 
73
  from youtube_transcript_api._api import YouTubeTranscriptApi
74
  vid = parse_qs(urlparse(url).query).get("v", [None])[0] or url.split("/")[-1]
75
  data = YouTubeTranscriptApi.get_transcript(vid, languages=[lang, "en", "en-US", "en-GB"])
 
79
  class AudioTranscriptionTool(Tool):
80
  name = "transcribe_audio"
81
  description = "Transcribe an audio file with OpenAI Whisper, returns plain text."
82
+ inputs = {
83
+ "path": {"type": str, "description": "Path to audio file", "required": True},
84
+ "model": {"type": str, "description": "Model name for transcription (default: whisper-1)", "required": False, "default": "whisper-1"}
85
+ }
86
 
87
+ def forward(self, path: str, model: str = "whisper-1") -> str:
88
  import openai
 
89
  if not os.path.isfile(path):
90
  raise FileNotFoundError(path)
91
  openai.api_key = os.getenv("OPENAI_API_KEY")
 
92
  if not hasattr(openai, "Audio"):
93
  raise ImportError(
94
  "Your OpenAI package does not support Audio. "
95
  "Please upgrade it with: pip install --upgrade openai"
96
  )
97
  with open(path, "rb") as fp:
 
98
  return openai.Audio.transcribe(model=model, file=fp)["text"].strip()
99
 
100
  # ---- 5. SimpleOCRTool ------------------------------------------------------
101
  class SimpleOCRTool(Tool):
102
  name = "image_ocr"
103
  description = "Return any text spotted in an image via pytesseract OCR."
104
+ inputs = {
105
+ "path": {"type": str, "description": "Path to image file", "required": True}
106
+ }
107
 
108
+ def forward(self, path: str) -> str:
109
  from PIL import Image
110
  import pytesseract
111
  if not os.path.isfile(path):
 
119
  "YouTubeTranscriptTool",
120
  "AudioTranscriptionTool",
121
  "SimpleOCRTool",
122
+ ]