Deadmon commited on
Commit
795c651
·
verified ·
1 Parent(s): 220eca8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +317 -11
app.py CHANGED
@@ -1,11 +1,317 @@
1
- 1. Initializing YouTube Service...
2
- Service Initialized.
3
- 2. Finding Channel ID for '@theAIsearch'...
4
- Found Channel ID: UCIgnGlGkVRhd4qNFcEwLL4A
5
- 3. Fetching Uploads Playlist ID...
6
- Found Playlist ID: UUIgnGlGkVRhd4qNFcEwLL4A
7
- 4. Fetching Video IDs with filters (last 90 days, max 50 videos)...
8
- Found 30 videos after filtering.
9
- 5. Scanning 30 videos for keywords: 3d, blender...
10
- Found mentions in: Recent Video Title - https://www.youtube.com/watch?v=abc123 (abc123)
11
- 6. Formatting Results...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from googleapiclient.discovery import build
4
+ from googleapiclient.errors import HttpError
5
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
6
+ import time
7
+ import traceback
8
+ import tempfile
9
+ from datetime import datetime, timedelta
10
+
11
+ # --- Constants ---
12
+ YOUTUBE_API_SERVICE_NAME = "youtube"
13
+ YOUTUBE_API_VERSION = "v3"
14
+ API_KEY = "YOUR_API_KEY_HERE" # Replace with your actual YouTube Data API key
15
+ DEFAULT_KEYWORDS = "3d, blender, maya, 3ds max, cinema 4d, houdini, zbrush, unreal engine, unity, substance painter, substance designer, v-ray, arnold, rendering, texturing, rigging, vfx, cgi, autodesk, fusion 360"
16
+ DEFAULT_DAYS = 180 # Default to 6 months
17
+ DEFAULT_MAX_VIDEOS = 100 # Default to 100 videos
18
+
19
+ # --- YouTube API Helper Functions ---
20
+
21
+ def get_youtube_service():
22
+ """Initializes and returns the YouTube API service."""
23
+ try:
24
+ return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY, cache_discovery=False)
25
+ except HttpError as e:
26
+ raise ConnectionError(f"Could not connect to YouTube API: {e}")
27
+
28
+ def get_channel_id(service, handle):
29
+ """Gets the channel ID from a handle or ID."""
30
+ if not handle:
31
+ raise ValueError("Channel handle or ID is required.")
32
+ if handle.startswith("UC") and len(handle) == 24:
33
+ return handle
34
+ handle = handle if handle.startswith('@') else f"@{handle}"
35
+ try:
36
+ search_response = service.search().list(q=handle, part="id", type="channel", maxResults=1).execute()
37
+ if not search_response.get("items"):
38
+ raise ValueError(f"Channel '{handle}' not found.")
39
+ return search_response["items"][0]["id"]["channelId"]
40
+ except HttpError as e:
41
+ raise ConnectionError(f"API error finding channel ID: {e.content}")
42
+
43
+ def get_uploads_playlist_id(service, channel_id):
44
+ """Gets the uploads playlist ID."""
45
+ try:
46
+ response = service.channels().list(id=channel_id, part="contentDetails").execute()
47
+ if not response.get("items"):
48
+ raise ValueError(f"No channel details for ID '{channel_id}'.")
49
+ return response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
50
+ except HttpError as e:
51
+ raise ConnectionError(f"API error getting uploads playlist: {e.content}")
52
+
53
+ def get_all_video_ids(service, playlist_id, keywords_set, days_filter, max_videos):
54
+ """Fetches video IDs with pre-filtering by keywords, date, and max limit."""
55
+ video_ids = []
56
+ next_page_token = None
57
+ cutoff_date = (datetime.now() - timedelta(days=days_filter)).isoformat("T") + "Z"
58
+
59
+ while True:
60
+ try:
61
+ response = service.playlistItems().list(
62
+ playlistId=playlist_id,
63
+ part="snippet,contentDetails",
64
+ maxResults=50,
65
+ pageToken=next_page_token
66
+ ).execute()
67
+ for item in response.get("items", []):
68
+ video_id = item["contentDetails"]["videoId"]
69
+ snippet = item["snippet"]
70
+ title = snippet["title"].lower()
71
+ description = snippet.get("description", "").lower()
72
+ published_at = snippet["publishedAt"]
73
+
74
+ # Date filter
75
+ if published_at < cutoff_date:
76
+ continue
77
+
78
+ # Keyword pre-filter (title or description)
79
+ if any(keyword in title or keyword in description for keyword in keywords_set):
80
+ video_ids.append(video_id)
81
+
82
+ if len(video_ids) >= max_videos:
83
+ return video_ids[:max_videos]
84
+
85
+ next_page_token = response.get("nextPageToken")
86
+ if not next_page_token:
87
+ break
88
+ except HttpError as e:
89
+ print(f"API Error fetching video IDs: {e.content}")
90
+ break
91
+ return video_ids[:max_videos]
92
+
93
+ def process_video(service, video_id, keywords_set):
94
+ """Processes a video for keyword mentions and links."""
95
+ video_url = f"https://www.youtube.com/watch?v={video_id}"
96
+ result = {
97
+ "video_id": video_id,
98
+ "video_url": video_url,
99
+ "title": f"Video ID: {video_id}",
100
+ "transcript_mentions": set(),
101
+ "description_mentions": set(),
102
+ "description_links": []
103
+ }
104
+
105
+ try:
106
+ video_response = service.videos().list(id=video_id, part="snippet").execute()
107
+ if video_response.get("items"):
108
+ snippet = video_response["items"][0]["snippet"]
109
+ result["title"] = snippet.get("title", f"Video ID: {video_id}")
110
+ description = snippet.get("description", "").lower()
111
+ for keyword in keywords_set:
112
+ if keyword in description:
113
+ result["description_mentions"].add(keyword)
114
+ result["description_links"] = re.findall(r'https?://\S+', snippet.get("description", ""))
115
+ except HttpError as e:
116
+ print(f"API error getting details for {video_id}: {e.resp.status}")
117
+
118
+ if not result["description_mentions"]:
119
+ try:
120
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
121
+ transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
122
+ if transcript:
123
+ full_transcript = transcript.fetch()
124
+ transcript_text = " ".join(segment['text'] for segment in full_transcript).lower()
125
+ for keyword in keywords_set:
126
+ if keyword in transcript_text:
127
+ result["transcript_mentions"].add(keyword)
128
+ except (TranscriptsDisabled, NoTranscriptFound, Exception) as e:
129
+ print(f"Error fetching transcript for {video_id}: {type(e).__name__}")
130
+
131
+ if result["transcript_mentions"] or result["description_mentions"]:
132
+ return result
133
+ return None
134
+
135
+ # --- Main Function ---
136
+
137
+ def scan_channel_videos(channel_handle, keywords_str, days_filter, max_videos, progress=gr.Progress(track_tqdm=True)):
138
+ """Scans a YouTube channel for keyword mentions and links with user-defined filters."""
139
+ start_time = time.time()
140
+ status_log = []
141
+ results = []
142
+
143
+ def log_status(message):
144
+ print(message)
145
+ status_log.append(message)
146
+ yield "\n".join(status_log), gr.Markdown("### Processing..."), None
147
+
148
+ try:
149
+ yield from log_status("1. Initializing YouTube Service...")
150
+ service = get_youtube_service()
151
+
152
+ yield from log_status(f"2. Finding Channel ID for '{channel_handle}'...")
153
+ channel_id = get_channel_id(service, channel_handle)
154
+ yield from log_status(f" Found Channel ID: {channel_id}")
155
+
156
+ yield from log_status(f"3. Fetching Uploads Playlist ID...")
157
+ playlist_id = get_uploads_playlist_id(service, channel_id)
158
+ yield from log_status(f" Found Playlist ID: {playlist_id}")
159
+
160
+ keywords_list = [k.strip().lower() for k in keywords_str.split(',') if k.strip()]
161
+ if not keywords_list:
162
+ raise ValueError("At least one keyword is required.")
163
+ keywords_set = set(keywords_list)
164
+
165
+ # Validate user inputs
166
+ days_filter = int(days_filter) if days_filter else DEFAULT_DAYS
167
+ max_videos = int(max_videos) if max_videos else DEFAULT_MAX_VIDEOS
168
+ if days_filter < 1:
169
+ raise ValueError("Days filter must be at least 1.")
170
+ if max_videos < 1:
171
+ raise ValueError("Max videos must be at least 1.")
172
+
173
+ yield from log_status(f"4. Fetching Video IDs with filters (last {days_filter} days, max {max_videos} videos)...")
174
+ video_ids = get_all_video_ids(service, playlist_id, keywords_set, days_filter, max_videos)
175
+ if not video_ids:
176
+ yield from log_status(" No videos found matching filters.")
177
+ yield "\n".join(status_log), gr.Markdown("### Error\nNo videos found matching filters."), None
178
+ return
179
+ yield from log_status(f" Found {len(video_ids)} videos after filtering.")
180
+
181
+ yield from log_status(f"5. Scanning {len(video_ids)} videos for keywords: {', '.join(keywords_list)}...")
182
+ for video_id in progress.tqdm(video_ids, desc="Scanning Videos"):
183
+ result = process_video(service, video_id, keywords_set)
184
+ if result:
185
+ results.append(result)
186
+ yield from log_status(f" Found mentions in: {result['title']} - {result['video_url']} ({video_id})")
187
+
188
+ yield from log_status("\n6. Formatting Results...")
189
+ final_md = f"""
190
+ ## Scan Results for {channel_handle}
191
+
192
+ **Searched Keywords**: {', '.join(keywords_list)}
193
+ **Videos Found**: {len(results)} out of {len(video_ids)} scanned (filtered from channel total)
194
+ **Scan Duration**: {time.time() - start_time:.2f} seconds
195
+ **Filters Applied**: Last {days_filter} days, max {max_videos} videos
196
+
197
+ ---
198
+
199
+ """
200
+ final_text = f"Scan Results for {channel_handle}\n\n"
201
+ final_text += f"Searched Keywords: {', '.join(keywords_list)}\n"
202
+ final_text += f"Videos Found: {len(results)} out of {len(video_ids)} scanned (filtered from channel total)\n"
203
+ final_text += f"Scan Duration: {time.time() - start_time:.2f} seconds\n"
204
+ final_text += f"Filters Applied: Last {days_filter} days, max {max_videos} videos\n\n"
205
+
206
+ if not results:
207
+ final_md += "\n**No mentions found for the specified keywords.**"
208
+ final_text += "No mentions found for the specified keywords.\n"
209
+ else:
210
+ for res in sorted(results, key=lambda x: x['title']):
211
+ final_md += f"""
212
+ ### {res['title']}
213
+
214
+ - **Video URL**: [{res['video_url']}]({res['video_url']})
215
+ """
216
+ final_text += f"Video: {res['title']}\n"
217
+ final_text += f"Video URL: {res['video_url']}\n"
218
+
219
+ if res['transcript_mentions']:
220
+ mentions = ', '.join(sorted(res['transcript_mentions']))
221
+ final_md += f"- **Transcript Mentions**: {mentions}\n"
222
+ final_text += f"Transcript Mentions: {mentions}\n"
223
+ if res['description_mentions']:
224
+ mentions = ', '.join(sorted(res['description_mentions']))
225
+ final_md += f"- **Description Mentions**: {mentions}\n"
226
+ final_text += f"Description Mentions: {mentions}\n"
227
+ if res['description_links']:
228
+ final_md += f"- **Links in Description**:\n"
229
+ final_text += f"Links in Description:\n"
230
+ for link in res['description_links']:
231
+ final_md += f" - [{link}]({link})\n"
232
+ final_text += f" - {link}\n"
233
+ final_md += "\n---\n"
234
+ final_text += "\n---\n"
235
+
236
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as temp_file:
237
+ temp_file.write(final_text)
238
+ temp_file_path = temp_file.name
239
+
240
+ yield "\n".join(status_log), gr.Markdown(final_md), gr.File(value=temp_file_path, label="Download Results", filename="youtube_scan_results.txt")
241
+
242
+ except ValueError as ve:
243
+ yield from log_status(f"Error: {ve}")
244
+ yield "\n".join(status_log), gr.Markdown(f"### Error\n**Input Error:** {ve}"), None
245
+ except ConnectionError as ce:
246
+ yield from log_status(f"Error: {ce}")
247
+ yield "\n".join(status_log), gr.Markdown(f"### Error\n**API Connection Error:** {ce}"), None
248
+ except Exception as e:
249
+ traceback.print_exc()
250
+ yield from log_status(f"Error: {e}")
251
+ yield "\n".join(status_log), gr.Markdown(f"### Error\n**Unexpected Error:** {e}"), None
252
+
253
+ # --- Gradio Interface ---
254
+
255
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
256
+ gr.Markdown("# YouTube Keyword Scanner")
257
+ gr.Markdown("Search for keywords in YouTube video transcripts and descriptions, with customizable filters and downloadable results.")
258
+
259
+ with gr.Row():
260
+ with gr.Column(scale=1):
261
+ gr.Markdown("## Settings")
262
+ channel_input = gr.Textbox(
263
+ label="Channel Handle or ID",
264
+ placeholder="e.g., @theAIsearch or UCxxxxxxxxxxxxxx",
265
+ value="@theAIsearch"
266
+ )
267
+ keywords_input = gr.Textbox(
268
+ label="Keywords (comma-separated)",
269
+ placeholder="e.g., 3d, blender, maya",
270
+ value=DEFAULT_KEYWORDS
271
+ )
272
+ days_filter_input = gr.Number(
273
+ label="Days to Look Back",
274
+ value=DEFAULT_DAYS,
275
+ minimum=1,
276
+ precision=0,
277
+ info="Filter videos from the last X days"
278
+ )
279
+ max_videos_input = gr.Number(
280
+ label="Max Videos to Scan",
281
+ value=DEFAULT_MAX_VIDEOS,
282
+ minimum=1,
283
+ precision=0,
284
+ info="Limit the number of videos scanned"
285
+ )
286
+ submit_button = gr.Button("Submit", variant="primary")
287
+ clear_button = gr.Button("Clear")
288
+
289
+ with gr.Column(scale=2):
290
+ gr.Markdown("## Status & Logs")
291
+ status_output = gr.Textbox(
292
+ label="Scan Progress",
293
+ lines=10,
294
+ max_lines=20,
295
+ interactive=False,
296
+ autoscroll=True
297
+ )
298
+ gr.Markdown("## Results")
299
+ results_output = gr.Markdown(value="Results will appear here.")
300
+ download_output = gr.File(label="Download Results", visible=False)
301
+
302
+ submit_button.click(
303
+ fn=scan_channel_videos,
304
+ inputs=[channel_input, keywords_input, days_filter_input, max_videos_input],
305
+ outputs=[status_output, results_output, download_output]
306
+ )
307
+ clear_button.click(
308
+ fn=lambda: ("", "Results cleared.", "", DEFAULT_KEYWORDS, DEFAULT_DAYS, DEFAULT_MAX_VIDEOS, None),
309
+ inputs=[],
310
+ outputs=[status_output, results_output, channel_input, keywords_input, days_filter_input, max_videos_input, download_output]
311
+ )
312
+
313
+ gr.Markdown("**Note:** Requires a valid YouTube Data API key. Filters help optimize performance.")
314
+
315
+ # --- Run the App ---
316
+ if __name__ == "__main__":
317
+ app.launch(debug=False)