Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,57 @@ from functools import lru_cache # Added: For caching search results
|
|
11 |
from youtube_transcript_api import YouTubeTranscriptApi
|
12 |
import re
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# TOOLS
|
15 |
search_tool = DuckDuckGoSearchTool()
|
16 |
visit_webpage = VisitWebpageTool()
|
@@ -96,58 +147,8 @@ class BasicAgent:
|
|
96 |
|
97 |
def _get_fallback_answer(self, question):
|
98 |
return f"Based on the information available, I cannot provide a specific answer to your question about {question.split()[0:3]}..."
|
99 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
100 |
-
import re
|
101 |
|
102 |
-
|
103 |
-
def __init__(self):
|
104 |
-
self.name = "youtube_video_tool"
|
105 |
-
|
106 |
-
def __call__(self, query):
|
107 |
-
"""
|
108 |
-
Extract information from a YouTube video.
|
109 |
-
|
110 |
-
Args:
|
111 |
-
query: Either a YouTube URL or video ID
|
112 |
-
|
113 |
-
Returns:
|
114 |
-
String with the transcript of the video
|
115 |
-
"""
|
116 |
-
try:
|
117 |
-
# Extract video ID from URL if needed
|
118 |
-
video_id = self._extract_video_id(query)
|
119 |
-
if not video_id:
|
120 |
-
return "Could not extract a valid YouTube video ID"
|
121 |
-
|
122 |
-
# Get the transcript
|
123 |
-
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
124 |
-
|
125 |
-
# Combine the transcript text
|
126 |
-
transcript_text = " ".join([item['text'] for item in transcript_list])
|
127 |
-
|
128 |
-
return f"Transcript from YouTube video {video_id}:\n{transcript_text}"
|
129 |
-
except Exception as e:
|
130 |
-
return f"Error processing YouTube video: {str(e)}"
|
131 |
-
|
132 |
-
def _extract_video_id(self, url_or_id):
|
133 |
-
"""Extract YouTube video ID from various URL formats or return the ID if already provided."""
|
134 |
-
# Handle direct video ID
|
135 |
-
if len(url_or_id) == 11 and re.match(r'^[A-Za-z0-9_-]{11}$', url_or_id):
|
136 |
-
return url_or_id
|
137 |
-
|
138 |
-
# Common YouTube URL patterns
|
139 |
-
patterns = [
|
140 |
-
r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]{11})',
|
141 |
-
r'youtube\.com\/watch\?.*v=([A-Za-z0-9_-]{11})',
|
142 |
-
r'youtube\.com\/shorts\/([A-Za-z0-9_-]{11})'
|
143 |
-
]
|
144 |
-
|
145 |
-
for pattern in patterns:
|
146 |
-
match = re.search(pattern, url_or_id)
|
147 |
-
if match:
|
148 |
-
return match.group(1)
|
149 |
-
|
150 |
-
return None
|
151 |
|
152 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
153 |
"""
|
|
|
11 |
from youtube_transcript_api import YouTubeTranscriptApi
|
12 |
import re
|
13 |
|
14 |
+
class YouTubeVideoTool:
|
15 |
+
def __init__(self):
|
16 |
+
self.name = "youtube_video_tool"
|
17 |
+
|
18 |
+
def __call__(self, query):
|
19 |
+
"""
|
20 |
+
Extract information from a YouTube video.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
query: Either a YouTube URL or video ID
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
String with the transcript of the video
|
27 |
+
"""
|
28 |
+
try:
|
29 |
+
# Extract video ID from URL if needed
|
30 |
+
video_id = self._extract_video_id(query)
|
31 |
+
if not video_id:
|
32 |
+
return "Could not extract a valid YouTube video ID"
|
33 |
+
|
34 |
+
# Get the transcript
|
35 |
+
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
36 |
+
|
37 |
+
# Combine the transcript text
|
38 |
+
transcript_text = " ".join([item['text'] for item in transcript_list])
|
39 |
+
|
40 |
+
return f"Transcript from YouTube video {video_id}:\n{transcript_text}"
|
41 |
+
except Exception as e:
|
42 |
+
return f"Error processing YouTube video: {str(e)}"
|
43 |
+
|
44 |
+
def _extract_video_id(self, url_or_id):
|
45 |
+
"""Extract YouTube video ID from various URL formats or return the ID if already provided."""
|
46 |
+
# Handle direct video ID
|
47 |
+
if len(url_or_id) == 11 and re.match(r'^[A-Za-z0-9_-]{11}$', url_or_id):
|
48 |
+
return url_or_id
|
49 |
+
|
50 |
+
# Common YouTube URL patterns
|
51 |
+
patterns = [
|
52 |
+
r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]{11})',
|
53 |
+
r'youtube\.com\/watch\?.*v=([A-Za-z0-9_-]{11})',
|
54 |
+
r'youtube\.com\/shorts\/([A-Za-z0-9_-]{11})'
|
55 |
+
]
|
56 |
+
|
57 |
+
for pattern in patterns:
|
58 |
+
match = re.search(pattern, url_or_id)
|
59 |
+
if match:
|
60 |
+
return match.group(1)
|
61 |
+
|
62 |
+
return None
|
63 |
+
|
64 |
+
|
65 |
# TOOLS
|
66 |
search_tool = DuckDuckGoSearchTool()
|
67 |
visit_webpage = VisitWebpageTool()
|
|
|
147 |
|
148 |
def _get_fallback_answer(self, question):
|
149 |
return f"Based on the information available, I cannot provide a specific answer to your question about {question.split()[0:3]}..."
|
|
|
|
|
150 |
|
151 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
154 |
"""
|