wt002 commited on
Commit
95010ac
·
verified ·
1 Parent(s): 8519f42

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +46 -0
agent.py CHANGED
@@ -30,6 +30,9 @@ from langchain_core.documents import Document
30
  from langchain_community.vectorstores import FAISS
31
  from langchain_community.embeddings import HuggingFaceEmbeddings
32
 
 
 
 
33
 
34
 
35
  load_dotenv()
@@ -128,6 +131,46 @@ def arvix_search(query: str) -> str:
128
  return {"arvix_results": formatted_search_docs}
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  # -----------------------------
133
  # Load configuration from YAML
@@ -165,6 +208,9 @@ tool_map = {
165
  "wiki_search": wiki_search,
166
  "web_search": web_search,
167
  "arvix_search": arvix_search,
 
 
 
168
  }
169
 
170
  tools = [tool_map[name] for name in enabled_tool_names]
 
30
  from langchain_community.vectorstores import FAISS
31
  from langchain_community.embeddings import HuggingFaceEmbeddings
32
 
33
+ from youtube_transcript_api import YouTubeTranscriptApi
34
+ from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable
35
+ import re
36
 
37
 
38
  load_dotenv()
 
131
  return {"arvix_results": formatted_search_docs}
132
 
133
 
134
+ @tool
135
+ def get_youtube_transcript(url: str) -> str:
136
+ """
137
+ Fetch transcript text from a YouTube video.
138
+
139
+ Args:
140
+ url (str): Full YouTube video URL.
141
+
142
+ Returns:
143
+ str: Transcript text as a single string.
144
+
145
+ Raises:
146
+ ValueError: If no transcript is available or URL is invalid.
147
+ """
148
+ try:
149
+ # Extract video ID
150
+ video_id = extract_video_id(url)
151
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
152
+
153
+ # Combine all transcript text
154
+ full_text = " ".join([entry['text'] for entry in transcript])
155
+ return full_text
156
+
157
+ except (TranscriptsDisabled, VideoUnavailable) as e:
158
+ raise ValueError(f"Transcript not available: {e}")
159
+ except Exception as e:
160
+ raise ValueError(f"Failed to fetch transcript: {e}")
161
+
162
+ @tool
163
+ def extract_video_id(url: str) -> str:
164
+ """
165
+ Extract the video ID from a YouTube URL.
166
+ """
167
+ match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url)
168
+ if not match:
169
+ raise ValueError("Invalid YouTube URL")
170
+ return match.group(1)
171
+
172
+
173
+
174
 
175
  # -----------------------------
176
  # Load configuration from YAML
 
208
  "wiki_search": wiki_search,
209
  "web_search": web_search,
210
  "arvix_search": arvix_search,
211
+ "get_youtube_transcript": get_youtube_transcript,
212
+ "extract_video_id": extract_video_id,
213
+
214
  }
215
 
216
  tools = [tool_map[name] for name in enabled_tool_names]