Spaces:
Sleeping
Sleeping
feat: add audio transcription, youtube search and fileRead
Browse files
tools.py
CHANGED
@@ -16,7 +16,10 @@ from langchain_tavily import TavilySearch, TavilyExtract
|
|
16 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
17 |
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
|
18 |
from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
|
|
|
|
|
19 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
20 |
|
21 |
from basic_agent import print_conversation
|
22 |
|
@@ -111,6 +114,22 @@ def search_and_extract(query: str) -> list[dict]:
|
|
111 |
return structured_results
|
112 |
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
def extract_video_id(url: str) -> str:
|
116 |
parsed = urlparse(url)
|
@@ -198,4 +217,48 @@ def search_and_extract_from_wikipedia(query: str) -> list:
|
|
198 |
response = wiki_tool.invoke(query)
|
199 |
if CUSTOM_DEBUG:
|
200 |
print_tool_response(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
return response
|
|
|
16 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
17 |
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
|
18 |
from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
|
19 |
+
from langchain_google_community import SpeechToTextLoader
|
20 |
+
from langchain_community.tools import YouTubeSearchTool
|
21 |
from youtube_transcript_api import YouTubeTranscriptApi
|
22 |
+
from langchain_community.tools.file_management.read import ReadFileTool
|
23 |
|
24 |
from basic_agent import print_conversation
|
25 |
|
|
|
114 |
return structured_results
|
115 |
|
116 |
|
117 |
+
youtube_search_api = YouTubeSearchTool()
|
118 |
+
|
119 |
+
@tool
|
120 |
+
def youtube_search_tool(query: str, number_of_results:int=3) -> list:
|
121 |
+
"""Search YouTube for a query and return the top number_of_results."""
|
122 |
+
if CUSTOM_DEBUG:
|
123 |
+
print_tool_call(
|
124 |
+
youtube_search_tool,
|
125 |
+
tool_name='youtube_search_tool',
|
126 |
+
args={'query': query, number_of_results: number_of_results},
|
127 |
+
)
|
128 |
+
response = youtube_search_api.run(f"{query},{number_of_results}")
|
129 |
+
if CUSTOM_DEBUG:
|
130 |
+
print_tool_response(response)
|
131 |
+
return response
|
132 |
+
|
133 |
|
134 |
def extract_video_id(url: str) -> str:
|
135 |
parsed = urlparse(url)
|
|
|
217 |
response = wiki_tool.invoke(query)
|
218 |
if CUSTOM_DEBUG:
|
219 |
print_tool_response(response)
|
220 |
+
return response
|
221 |
+
|
222 |
+
|
223 |
+
@tool
|
224 |
+
def transcribe_audio(file_path: str) -> list:
|
225 |
+
"""Transcribe audio from a file using Google Speech-to-Text."""
|
226 |
+
if CUSTOM_DEBUG:
|
227 |
+
print_tool_call(
|
228 |
+
transcribe_audio,
|
229 |
+
tool_name='transcribe_audio',
|
230 |
+
args={'file_path': file_path},
|
231 |
+
)
|
232 |
+
project_id = os.getenv("GOOGLE_CLOUD_PROJECT_ID")
|
233 |
+
loader = SpeechToTextLoader(
|
234 |
+
project_id=project_id,
|
235 |
+
file_path=file_path,
|
236 |
+
is_long = False, # Set to True for long audio files
|
237 |
+
)
|
238 |
+
|
239 |
+
docs = loader.load()
|
240 |
+
docs_content = [doc.page_content for doc in docs]
|
241 |
+
|
242 |
+
if CUSTOM_DEBUG:
|
243 |
+
print_tool_response(docs_content)
|
244 |
+
return docs_content
|
245 |
+
|
246 |
+
|
247 |
+
read_tool = ReadFileTool()
|
248 |
+
|
249 |
+
|
250 |
+
@tool
|
251 |
+
def read_file_tool(file_path: str) -> str:
|
252 |
+
"""Read the content of a file. Use this tool to read .py, .csv, .md, text files, PDFs, etc."""
|
253 |
+
if CUSTOM_DEBUG:
|
254 |
+
print_tool_call(
|
255 |
+
read_file_tool,
|
256 |
+
tool_name='read_file_tool',
|
257 |
+
args={'file_path': file_path},
|
258 |
+
)
|
259 |
+
response = read_tool.invoke({"file_path": file_path})
|
260 |
+
if not os.path.exists(file_path):
|
261 |
+
response = f"File not found: {file_path}"
|
262 |
+
print_tool_response(response)
|
263 |
+
print_tool_response(response)
|
264 |
return response
|