laverdes commited on
Commit
8ae2625
·
verified ·
1 Parent(s): bfd8491

feat: add audio transcription, youtube search and fileRead

Browse files
Files changed (1) hide show
  1. tools.py +63 -0
tools.py CHANGED
@@ -16,7 +16,10 @@ from langchain_tavily import TavilySearch, TavilyExtract
16
  from langchain_google_genai import ChatGoogleGenerativeAI
17
  from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
18
  from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
 
 
19
  from youtube_transcript_api import YouTubeTranscriptApi
 
20
 
21
  from basic_agent import print_conversation
22
 
@@ -111,6 +114,22 @@ def search_and_extract(query: str) -> list[dict]:
111
  return structured_results
112
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  def extract_video_id(url: str) -> str:
116
  parsed = urlparse(url)
@@ -198,4 +217,48 @@ def search_and_extract_from_wikipedia(query: str) -> list:
198
  response = wiki_tool.invoke(query)
199
  if CUSTOM_DEBUG:
200
  print_tool_response(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  return response
 
16
  from langchain_google_genai import ChatGoogleGenerativeAI
17
  from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
18
  from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
19
+ from langchain_google_community import SpeechToTextLoader
20
+ from langchain_community.tools import YouTubeSearchTool
21
  from youtube_transcript_api import YouTubeTranscriptApi
22
+ from langchain_community.tools.file_management.read import ReadFileTool
23
 
24
  from basic_agent import print_conversation
25
 
 
114
  return structured_results
115
 
116
 
117
+ youtube_search_api = YouTubeSearchTool()
118
+
119
+ @tool
120
+ def youtube_search_tool(query: str, number_of_results:int=3) -> list:
121
+ """Search YouTube for a query and return the top number_of_results."""
122
+ if CUSTOM_DEBUG:
123
+ print_tool_call(
124
+ youtube_search_tool,
125
+ tool_name='youtube_search_tool',
126
+ args={'query': query, number_of_results: number_of_results},
127
+ )
128
+ response = youtube_search_api.run(f"{query},{number_of_results}")
129
+ if CUSTOM_DEBUG:
130
+ print_tool_response(response)
131
+ return response
132
+
133
 
134
  def extract_video_id(url: str) -> str:
135
  parsed = urlparse(url)
 
217
  response = wiki_tool.invoke(query)
218
  if CUSTOM_DEBUG:
219
  print_tool_response(response)
220
+ return response
221
+
222
+
223
+ @tool
224
+ def transcribe_audio(file_path: str) -> list:
225
+ """Transcribe audio from a file using Google Speech-to-Text."""
226
+ if CUSTOM_DEBUG:
227
+ print_tool_call(
228
+ transcribe_audio,
229
+ tool_name='transcribe_audio',
230
+ args={'file_path': file_path},
231
+ )
232
+ project_id = os.getenv("GOOGLE_CLOUD_PROJECT_ID")
233
+ loader = SpeechToTextLoader(
234
+ project_id=project_id,
235
+ file_path=file_path,
236
+ is_long = False, # Set to True for long audio files
237
+ )
238
+
239
+ docs = loader.load()
240
+ docs_content = [doc.page_content for doc in docs]
241
+
242
+ if CUSTOM_DEBUG:
243
+ print_tool_response(docs_content)
244
+ return docs_content
245
+
246
+
247
+ read_tool = ReadFileTool()
248
+
249
+
250
+ @tool
251
+ def read_file_tool(file_path: str) -> str:
252
+ """Read the content of a file. Use this tool to read .py, .csv, .md, text files, PDFs, etc."""
253
+ if CUSTOM_DEBUG:
254
+ print_tool_call(
255
+ read_file_tool,
256
+ tool_name='read_file_tool',
257
+ args={'file_path': file_path},
258
+ )
259
+ response = read_tool.invoke({"file_path": file_path})
260
+ if not os.path.exists(file_path):
261
+ response = f"File not found: {file_path}"
262
+ print_tool_response(response)
263
+ print_tool_response(response)
264
  return response