dygoo commited on
Commit
3880120
·
verified ·
1 Parent(s): 77d27d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -51
app.py CHANGED
@@ -11,6 +11,57 @@ from functools import lru_cache # Added: For caching search results
11
  from youtube_transcript_api import YouTubeTranscriptApi
12
  import re
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # TOOLS
15
  search_tool = DuckDuckGoSearchTool()
16
  visit_webpage = VisitWebpageTool()
@@ -96,58 +147,8 @@ class BasicAgent:
96
 
97
  def _get_fallback_answer(self, question):
98
  return f"Based on the information available, I cannot provide a specific answer to your question about {question.split()[0:3]}..."
99
- from youtube_transcript_api import YouTubeTranscriptApi
100
- import re
101
 
102
- class YouTubeVideoTool:
103
- def __init__(self):
104
- self.name = "youtube_video_tool"
105
-
106
- def __call__(self, query):
107
- """
108
- Extract information from a YouTube video.
109
-
110
- Args:
111
- query: Either a YouTube URL or video ID
112
-
113
- Returns:
114
- String with the transcript of the video
115
- """
116
- try:
117
- # Extract video ID from URL if needed
118
- video_id = self._extract_video_id(query)
119
- if not video_id:
120
- return "Could not extract a valid YouTube video ID"
121
-
122
- # Get the transcript
123
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
124
-
125
- # Combine the transcript text
126
- transcript_text = " ".join([item['text'] for item in transcript_list])
127
-
128
- return f"Transcript from YouTube video {video_id}:\n{transcript_text}"
129
- except Exception as e:
130
- return f"Error processing YouTube video: {str(e)}"
131
-
132
- def _extract_video_id(self, url_or_id):
133
- """Extract YouTube video ID from various URL formats or return the ID if already provided."""
134
- # Handle direct video ID
135
- if len(url_or_id) == 11 and re.match(r'^[A-Za-z0-9_-]{11}$', url_or_id):
136
- return url_or_id
137
-
138
- # Common YouTube URL patterns
139
- patterns = [
140
- r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]{11})',
141
- r'youtube\.com\/watch\?.*v=([A-Za-z0-9_-]{11})',
142
- r'youtube\.com\/shorts\/([A-Za-z0-9_-]{11})'
143
- ]
144
-
145
- for pattern in patterns:
146
- match = re.search(pattern, url_or_id)
147
- if match:
148
- return match.group(1)
149
-
150
- return None
151
 
152
  def run_and_submit_all( profile: gr.OAuthProfile | None):
153
  """
 
11
  from youtube_transcript_api import YouTubeTranscriptApi
12
  import re
13
 
14
+ class YouTubeVideoTool:
15
+ def __init__(self):
16
+ self.name = "youtube_video_tool"
17
+
18
+ def __call__(self, query):
19
+ """
20
+ Extract information from a YouTube video.
21
+
22
+ Args:
23
+ query: Either a YouTube URL or video ID
24
+
25
+ Returns:
26
+ String with the transcript of the video
27
+ """
28
+ try:
29
+ # Extract video ID from URL if needed
30
+ video_id = self._extract_video_id(query)
31
+ if not video_id:
32
+ return "Could not extract a valid YouTube video ID"
33
+
34
+ # Get the transcript
35
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
36
+
37
+ # Combine the transcript text
38
+ transcript_text = " ".join([item['text'] for item in transcript_list])
39
+
40
+ return f"Transcript from YouTube video {video_id}:\n{transcript_text}"
41
+ except Exception as e:
42
+ return f"Error processing YouTube video: {str(e)}"
43
+
44
+ def _extract_video_id(self, url_or_id):
45
+ """Extract YouTube video ID from various URL formats or return the ID if already provided."""
46
+ # Handle direct video ID
47
+ if len(url_or_id) == 11 and re.match(r'^[A-Za-z0-9_-]{11}$', url_or_id):
48
+ return url_or_id
49
+
50
+ # Common YouTube URL patterns
51
+ patterns = [
52
+ r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]{11})',
53
+ r'youtube\.com\/watch\?.*v=([A-Za-z0-9_-]{11})',
54
+ r'youtube\.com\/shorts\/([A-Za-z0-9_-]{11})'
55
+ ]
56
+
57
+ for pattern in patterns:
58
+ match = re.search(pattern, url_or_id)
59
+ if match:
60
+ return match.group(1)
61
+
62
+ return None
63
+
64
+
65
  # TOOLS
66
  search_tool = DuckDuckGoSearchTool()
67
  visit_webpage = VisitWebpageTool()
 
147
 
148
  def _get_fallback_answer(self, question):
149
  return f"Based on the information available, I cannot provide a specific answer to your question about {question.split()[0:3]}..."
 
 
150
 
151
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  def run_and_submit_all( profile: gr.OAuthProfile | None):
154
  """