Deadmon commited on
Commit
a2c17c5
·
verified ·
1 Parent(s): 55b5913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -187
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import re
3
  import gradio as gr
4
  from googleapiclient.discovery import build
@@ -10,115 +9,69 @@ import traceback
10
  # --- Constants ---
11
  YOUTUBE_API_SERVICE_NAME = "youtube"
12
  YOUTUBE_API_VERSION = "v3"
13
- DEFAULT_KEYWORDS = "3d, texturing, rigging, vfx, cgi"
14
  API_KEY = "AIzaSyCcxSkhTp6aowcyowuBkHIFTSrl_HJ79J0" # Replace with your actual YouTube Data API key
 
 
 
 
 
 
 
15
 
16
  # --- YouTube API Helper Functions ---
17
 
18
  def get_youtube_service():
19
- """Initializes and returns the YouTube API service with hardcoded API key."""
20
- if not API_KEY:
21
- raise ValueError("API Key is missing.")
22
  try:
23
- service = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
24
- developerKey=API_KEY, cache_discovery=False)
25
- return service
26
  except HttpError as e:
27
- if e.resp.status == 400:
28
- raise ValueError(f"Invalid API Key or API not enabled. Error: {e.content}")
29
- elif e.resp.status == 403:
30
- raise ValueError(f"API Key valid, but Quota Exceeded or Forbidden. Error: {e.content}")
31
- else:
32
- raise ConnectionError(f"Could not connect to YouTube API: {e}")
33
- except Exception as e:
34
- raise ConnectionError(f"Error building YouTube service: {e}")
35
 
36
- def get_channel_id(service, channel_identifier):
37
- """Gets the channel ID using the channel handle or ID."""
38
- if not channel_identifier:
39
- raise ValueError("Channel Handle or ID is missing.")
40
- if channel_identifier.startswith("UC") and len(channel_identifier) == 24:
41
- print(f"Assuming '{channel_identifier}' is a Channel ID.")
42
- return channel_identifier
43
- if channel_identifier.startswith('@'):
44
- handle = channel_identifier
45
- print(f"Attempting to find Channel ID for handle: {handle}")
46
- try:
47
- search_response = service.search().list(
48
- q=handle,
49
- part="id",
50
- type="channel",
51
- maxResults=1
52
- ).execute()
53
- if not search_response.get("items"):
54
- raise ValueError(f"Could not find channel for handle '{handle}'. Check the handle.")
55
- channel_id = search_response["items"][0]["id"]["channelId"]
56
- print(f"Found Channel ID: {channel_id}")
57
- return channel_id
58
- except HttpError as e:
59
- raise ConnectionError(f"API error finding channel ID for handle '{handle}': {e.content}")
60
- except Exception as e:
61
- raise Exception(f"An error occurred finding channel ID for handle '{handle}': {e}")
62
- else:
63
- raise ValueError("Invalid Channel Identifier. Use '@handle' or 'UC...' Channel ID.")
64
 
65
- def get_channel_uploads_playlist_id(service, channel_id):
66
- """Gets the uploads playlist ID for a given channel ID."""
67
  try:
68
- channels_response = service.channels().list(
69
- id=channel_id,
70
- part="contentDetails"
71
- ).execute()
72
- if not channels_response.get("items"):
73
- raise ValueError(f"Could not find channel details for ID '{channel_id}'.")
74
- playlist_id = channels_response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
75
- if not playlist_id:
76
- raise ValueError(f"Could not find uploads playlist for channel {channel_id}.")
77
- return playlist_id
78
  except HttpError as e:
79
- raise ConnectionError(f"API error getting uploads playlist for {channel_id}: {e.content}")
80
- except Exception as e:
81
- raise Exception(f"Error getting uploads playlist for {channel_id}: {e}")
82
 
83
  def get_all_video_ids(service, playlist_id):
84
- """Gets all video IDs from a playlist, yielding status updates."""
85
  video_ids = []
86
  next_page_token = None
87
- fetched_count = 0
88
- yield f"Fetching video IDs from playlist: {playlist_id}..."
89
  while True:
90
  try:
91
- playlist_items_response = service.playlistItems().list(
92
  playlistId=playlist_id,
93
  part="contentDetails",
94
  maxResults=50,
95
  pageToken=next_page_token
96
  ).execute()
97
- current_page_ids = [item["contentDetails"]["videoId"]
98
- for item in playlist_items_response.get("items", [])]
99
- video_ids.extend(current_page_ids)
100
- fetched_count += len(current_page_ids)
101
- next_page_token = playlist_items_response.get("nextPageToken")
102
- yield f"Fetched {fetched_count} video IDs so far..."
103
  if not next_page_token:
104
  break
105
  except HttpError as e:
106
- if e.resp.status == 403:
107
- yield f"API Quota Error: {e.content}. Stopping early."
108
- break
109
- else:
110
- yield f"API Error fetching video list: {e.content}. Stopping early."
111
- break
112
- except Exception as e:
113
- yield f"Error fetching video list: {e}. Stopping early."
114
  break
115
- yield f"Finished fetching. Total unique video IDs found: {len(video_ids)}"
116
  return video_ids
117
 
118
  def process_video(service, video_id, keywords_set):
119
- """Fetches video details and transcript, searches for keywords."""
120
  video_url = f"https://www.youtube.com/watch?v={video_id}"
121
- found_data = {
122
  "video_id": video_id,
123
  "video_url": video_url,
124
  "title": f"Video ID: {video_id}",
@@ -126,52 +79,45 @@ def process_video(service, video_id, keywords_set):
126
  "description_mentions": set(),
127
  "description_links": []
128
  }
129
- has_mention = False
130
- status_updates = []
131
 
 
132
  try:
133
- video_response = service.videos().list(
134
- id=video_id,
135
- part="snippet"
136
- ).execute()
137
  if video_response.get("items"):
138
  snippet = video_response["items"][0]["snippet"]
139
- found_data["title"] = snippet.get("title", f"Video ID: {video_id}")
140
  description = snippet.get("description", "").lower()
141
  for keyword in keywords_set:
142
  if keyword in description:
143
- found_data["description_mentions"].add(keyword)
144
- has_mention = True
145
- found_data["description_links"] = re.findall(r'https?://\S+', snippet.get("description", ""))
146
- else:
147
- status_updates.append(f" - Could not retrieve details for video {video_id}")
148
  except HttpError as e:
149
- status_updates.append(f" - API error getting details for {video_id}: {e.resp.status}")
150
- except Exception as e:
151
- status_updates.append(f" - Error getting details for {video_id}: {e}")
152
 
153
- transcript_text = ""
154
  try:
155
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
156
  transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
157
  if transcript:
158
  full_transcript = transcript.fetch()
159
- transcript_text = " ".join([segment['text'] for segment in full_transcript]).lower()
160
  for keyword in keywords_set:
161
  if keyword in transcript_text:
162
- found_data["transcript_mentions"].add(keyword)
163
- has_mention = True
164
  except (TranscriptsDisabled, NoTranscriptFound):
165
- status_updates.append(f" - No transcript available for {video_id}")
166
  except Exception as e:
167
- status_updates.append(f" - Error fetching transcript for {video_id}: {type(e).__name__}")
168
 
169
- return found_data if has_mention else None, status_updates
 
 
 
170
 
171
- # --- Gradio Main Function ---
172
 
173
- def scan_channel_videos(channel_identifier, keywords_str, progress=gr.Progress(track_tqdm=True)):
174
- """Main function called by Gradio interface with hardcoded API key."""
175
  start_time = time.time()
176
  status_log = []
177
  results = []
@@ -184,103 +130,72 @@ def scan_channel_videos(channel_identifier, keywords_str, progress=gr.Progress(t
184
  try:
185
  yield from log_status("1. Initializing YouTube Service...")
186
  service = get_youtube_service()
187
- yield from log_status(" Service Initialized.")
188
 
189
- yield from log_status(f"2. Finding Channel ID for '{channel_identifier}'...")
190
- channel_id = get_channel_id(service, channel_identifier)
191
  yield from log_status(f" Found Channel ID: {channel_id}")
192
 
193
- yield from log_status(f"3. Finding Uploads Playlist ID for {channel_id}...")
194
- uploads_playlist_id = get_channel_uploads_playlist_id(service, channel_id)
195
- yield from log_status(f" Found Uploads Playlist ID: {uploads_playlist_id}")
196
 
197
- yield from log_status("4. Fetching all Video IDs...")
198
- video_ids_generator = get_all_video_ids(service, uploads_playlist_id)
199
- video_ids = None
200
- for item in video_ids_generator:
201
- if isinstance(item, str):
202
- yield from log_status(f" {item}")
203
- else:
204
- video_ids = item # Capture the final returned list
205
-
206
- if not video_ids or len(video_ids) == 0:
207
- yield from log_status(" No video IDs found or fetching failed.")
208
- yield "\n".join(status_log), gr.Markdown("### Scan Stopped\nNo videos found or fetching failed.")
209
  return
 
210
 
211
- keywords_list = [k.strip().lower() for k in keywords_str.split(',') if k.strip()]
212
- if not keywords_list:
213
- raise ValueError("Please provide at least one keyword.")
214
- keywords_set = set(keywords_list)
215
- yield from log_status(f"5. Scanning {len(video_ids)} videos for keywords: {', '.join(keywords_list)}")
216
 
217
- total_videos = len(video_ids)
218
  for video_id in progress.tqdm(video_ids, desc="Scanning Videos"):
219
- video_result, video_statuses = process_video(service, video_id, keywords_set)
220
- if video_result:
221
- results.append(video_result)
222
- yield from log_status(f" Found mention in: {video_result['title']} ({video_id})")
223
-
224
- yield from log_status("\n6. Scan Complete. Formatting results...")
225
- final_md = f"## Scan Results for {channel_identifier}\n\n"
226
- final_md += f"Searched for keywords: `{', '.join(keywords_list)}`\n"
227
- final_md += f"Found mentions in **{len(results)}** out of **{total_videos}** videos scanned.\n"
228
  final_md += f"Total time: {time.time() - start_time:.2f} seconds.\n"
229
  final_md += "---\n"
230
 
231
  if not results:
232
- final_md += "\n**No mentions found for the specified keywords.**"
233
  else:
234
- for res in results:
235
  final_md += f"\n### [{res['title']}]({res['video_url']})\n"
236
- final_md += f"*Video URL: <{res['video_url']}>*\n\n"
237
  if res['transcript_mentions']:
238
- mentions = ", ".join(sorted(res['transcript_mentions']))
239
- final_md += f"**Transcript Mentions:** `{mentions}`\n"
240
  if res['description_mentions']:
241
- mentions = ", ".join(sorted(res['description_mentions']))
242
- final_md += f"**Description Mentions:** `{mentions}`\n"
243
  if res['description_links']:
244
- final_md += f"\n**Links in Description:**\n"
245
  for link in res['description_links']:
246
  final_md += f"- <{link}>\n"
247
  final_md += "\n---\n"
248
 
249
  yield "\n".join(status_log), gr.Markdown(final_md)
250
 
251
- except ValueError as ve:
252
- yield from log_status(f"Configuration Error: {ve}")
253
- yield "\n".join(status_log), gr.Markdown(f"### Error\n**Input Error:** {ve}")
254
- except ConnectionError as ce:
255
- yield from log_status(f"API Connection Error: {ce}")
256
- yield "\n".join(status_log), gr.Markdown(f"### Error\n**API Connection Error:** {ce}")
257
- except HttpError as he:
258
- yield from log_status(f"API HTTP Error: {he.resp.status} - {he.content}")
259
- yield "\n".join(status_log), gr.Markdown(f"### Error\n**API HTTP Error:** Status {he.resp.status}\n{he.content}")
260
  except Exception as e:
261
  traceback.print_exc()
262
- yield from log_status(f"An unexpected error occurred: {e}")
263
  yield "\n".join(status_log), gr.Markdown(f"### Error\n**Unexpected Error:** {e}")
264
 
265
- # --- Gradio Interface Definition ---
266
 
267
  with gr.Blocks(theme=gr.themes.Soft()) as app:
268
- gr.Markdown("# YouTube Channel 3D Software Scanner")
269
- gr.Markdown("Find mentions of 3D software in video transcripts and descriptions.")
270
 
271
  with gr.Row():
272
  with gr.Column(scale=1):
273
- gr.Markdown("## Settings")
274
- channel_input = gr.Textbox(
275
- label="Channel Handle or ID",
276
- placeholder="e.g., @theAIsearch or UCxxxxxxxxxxxxxx"
277
- )
278
- keywords_input = gr.Textbox(
279
- label="Keywords to Search (comma-separated)",
280
- value=DEFAULT_KEYWORDS
281
- )
282
- scan_button = gr.Button("Scan Channel", variant="primary")
283
- clear_button = gr.Button("Clear All")
284
 
285
  with gr.Column(scale=2):
286
  gr.Markdown("## Status & Logs")
@@ -292,25 +207,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
292
  autoscroll=True
293
  )
294
  gr.Markdown("## Results")
295
- results_output = gr.Markdown(value="Results will be displayed here after scanning.")
296
-
297
- scan_button.click(
298
- fn=scan_channel_videos,
299
- inputs=[channel_input, keywords_input],
300
- outputs=[status_output, results_output]
301
- )
302
-
303
- def clear_outputs():
304
- return "", "Results cleared.", "", DEFAULT_KEYWORDS
305
 
306
- clear_button.click(
307
- fn=clear_outputs,
308
- inputs=[],
309
- outputs=[status_output, results_output, channel_input, keywords_input]
310
- )
311
 
312
- gr.Markdown("---")
313
- gr.Markdown("**Note:** Scans may take time depending on video count and API quotas.")
314
 
315
  # --- Run the App ---
316
  if __name__ == "__main__":
 
 
1
  import re
2
  import gradio as gr
3
  from googleapiclient.discovery import build
 
9
  # --- Constants ---
10
  YOUTUBE_API_SERVICE_NAME = "youtube"
11
  YOUTUBE_API_VERSION = "v3"
 
12
  API_KEY = "AIzaSyCcxSkhTp6aowcyowuBkHIFTSrl_HJ79J0" # Replace with your actual YouTube Data API key
13
+ CHANNEL_HANDLE = "@theAIsearch"
14
+ # Common 3D software keywords
15
+ KEYWORDS = [
16
+ "3d", "blender", "maya", "3ds max", "cinema 4d", "houdini", "zbrush", "unreal engine",
17
+ "unity", "substance painter", "substance designer", "v-ray", "arnold", "rendering",
18
+ "texturing", "rigging", "vfx", "cgi", "autodesk", "fusion 360"
19
+ ]
20
 
21
  # --- YouTube API Helper Functions ---
22
 
23
  def get_youtube_service():
24
+ """Initializes and returns the YouTube API service."""
 
 
25
  try:
26
+ return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY, cache_discovery=False)
 
 
27
  except HttpError as e:
28
+ raise ConnectionError(f"Could not connect to YouTube API: {e}")
 
 
 
 
 
 
 
29
 
30
+ def get_channel_id(service, handle):
31
+ """Gets the channel ID from a handle."""
32
+ try:
33
+ search_response = service.search().list(q=handle, part="id", type="channel", maxResults=1).execute()
34
+ if not search_response.get("items"):
35
+ raise ValueError(f"Channel '{handle}' not found.")
36
+ return search_response["items"][0]["id"]["channelId"]
37
+ except HttpError as e:
38
+ raise ConnectionError(f"API error finding channel ID: {e.content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ def get_uploads_playlist_id(service, channel_id):
41
+ """Gets the uploads playlist ID."""
42
  try:
43
+ response = service.channels().list(id=channel_id, part="contentDetails").execute()
44
+ if not response.get("items"):
45
+ raise ValueError(f"No channel details for ID '{channel_id}'.")
46
+ return response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
 
 
 
 
 
 
47
  except HttpError as e:
48
+ raise ConnectionError(f"API error getting uploads playlist: {e.content}")
 
 
49
 
50
  def get_all_video_ids(service, playlist_id):
51
+ """Fetches all video IDs from the uploads playlist."""
52
  video_ids = []
53
  next_page_token = None
 
 
54
  while True:
55
  try:
56
+ response = service.playlistItems().list(
57
  playlistId=playlist_id,
58
  part="contentDetails",
59
  maxResults=50,
60
  pageToken=next_page_token
61
  ).execute()
62
+ video_ids.extend(item["contentDetails"]["videoId"] for item in response.get("items", []))
63
+ next_page_token = response.get("nextPageToken")
 
 
 
 
64
  if not next_page_token:
65
  break
66
  except HttpError as e:
67
+ print(f"API Error fetching video IDs: {e.content}")
 
 
 
 
 
 
 
68
  break
 
69
  return video_ids
70
 
71
  def process_video(service, video_id, keywords_set):
72
+ """Processes a video for 3D software mentions and links."""
73
  video_url = f"https://www.youtube.com/watch?v={video_id}"
74
+ result = {
75
  "video_id": video_id,
76
  "video_url": video_url,
77
  "title": f"Video ID: {video_id}",
 
79
  "description_mentions": set(),
80
  "description_links": []
81
  }
 
 
82
 
83
+ # Fetch video details
84
  try:
85
+ video_response = service.videos().list(id=video_id, part="snippet").execute()
 
 
 
86
  if video_response.get("items"):
87
  snippet = video_response["items"][0]["snippet"]
88
+ result["title"] = snippet.get("title", f"Video ID: {video_id}")
89
  description = snippet.get("description", "").lower()
90
  for keyword in keywords_set:
91
  if keyword in description:
92
+ result["description_mentions"].add(keyword)
93
+ result["description_links"] = re.findall(r'https?://\S+', snippet.get("description", ""))
 
 
 
94
  except HttpError as e:
95
+ print(f"API error getting details for {video_id}: {e.resp.status}")
 
 
96
 
97
+ # Fetch transcript
98
  try:
99
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
100
  transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
101
  if transcript:
102
  full_transcript = transcript.fetch()
103
+ transcript_text = " ".join(segment['text'] for segment in full_transcript).lower()
104
  for keyword in keywords_set:
105
  if keyword in transcript_text:
106
+ result["transcript_mentions"].add(keyword)
 
107
  except (TranscriptsDisabled, NoTranscriptFound):
108
+ pass # Skip silently if no transcript
109
  except Exception as e:
110
+ print(f"Error fetching transcript for {video_id}: {type(e).__name__}")
111
 
112
+ # Return result only if there are mentions
113
+ if result["transcript_mentions"] or result["description_mentions"]:
114
+ return result
115
+ return None
116
 
117
+ # --- Main Function ---
118
 
119
+ def scan_channel_videos(progress=gr.Progress(track_tqdm=True)):
120
+ """Scans @theAIsearch for 3D software mentions and links."""
121
  start_time = time.time()
122
  status_log = []
123
  results = []
 
130
  try:
131
  yield from log_status("1. Initializing YouTube Service...")
132
  service = get_youtube_service()
 
133
 
134
+ yield from log_status(f"2. Finding Channel ID for '{CHANNEL_HANDLE}'...")
135
+ channel_id = get_channel_id(service, CHANNEL_HANDLE)
136
  yield from log_status(f" Found Channel ID: {channel_id}")
137
 
138
+ yield from log_status(f"3. Fetching Uploads Playlist ID...")
139
+ playlist_id = get_uploads_playlist_id(service, channel_id)
140
+ yield from log_status(f" Found Playlist ID: {playlist_id}")
141
 
142
+ yield from log_status("4. Fetching Video IDs...")
143
+ video_ids = get_all_video_ids(service, playlist_id)
144
+ if not video_ids:
145
+ yield from log_status(" No videos found.")
146
+ yield "\n".join(status_log), gr.Markdown("### Error\nNo videos found.")
 
 
 
 
 
 
 
147
  return
148
+ yield from log_status(f" Found {len(video_ids)} videos.")
149
 
150
+ keywords_set = set(k.lower() for k in KEYWORDS)
151
+ yield from log_status(f"5. Scanning {len(video_ids)} videos for 3D software mentions...")
 
 
 
152
 
 
153
  for video_id in progress.tqdm(video_ids, desc="Scanning Videos"):
154
+ result = process_video(service, video_id, keywords_set)
155
+ if result:
156
+ results.append(result)
157
+ yield from log_status(f" Found mentions in: {result['title']} ({video_id})")
158
+
159
+ yield from log_status("\n6. Formatting Results...")
160
+ final_md = f"## Scan Results for {CHANNEL_HANDLE}\n\n"
161
+ final_md += f"Searched for 3D software keywords: `{', '.join(KEYWORDS)}`\n"
162
+ final_md += f"Found mentions in **{len(results)}** out of **{len(video_ids)}** videos.\n"
163
  final_md += f"Total time: {time.time() - start_time:.2f} seconds.\n"
164
  final_md += "---\n"
165
 
166
  if not results:
167
+ final_md += "\n**No mentions of 3D software found.**"
168
  else:
169
+ for res in sorted(results, key=lambda x: x['title']):
170
  final_md += f"\n### [{res['title']}]({res['video_url']})\n"
171
+ final_md += f"*Video URL: <{res['video_url']}>*\n"
172
  if res['transcript_mentions']:
173
+ final_md += f"**Transcript Mentions:** `{', '.join(sorted(res['transcript_mentions']))}`\n"
 
174
  if res['description_mentions']:
175
+ final_md += f"**Description Mentions:** `{', '.join(sorted(res['description_mentions']))}`\n"
 
176
  if res['description_links']:
177
+ final_md += f"**Links in Description:**\n"
178
  for link in res['description_links']:
179
  final_md += f"- <{link}>\n"
180
  final_md += "\n---\n"
181
 
182
  yield "\n".join(status_log), gr.Markdown(final_md)
183
 
 
 
 
 
 
 
 
 
 
184
  except Exception as e:
185
  traceback.print_exc()
186
+ yield from log_status(f"Error: {e}")
187
  yield "\n".join(status_log), gr.Markdown(f"### Error\n**Unexpected Error:** {e}")
188
 
189
+ # --- Gradio Interface ---
190
 
191
  with gr.Blocks(theme=gr.themes.Soft()) as app:
192
+ gr.Markdown("# YouTube 3D Software Scanner for @theAIsearch")
193
+ gr.Markdown("Finds mentions of 3D software in transcripts and links from video descriptions.")
194
 
195
  with gr.Row():
196
  with gr.Column(scale=1):
197
+ scan_button = gr.Button("Scan @theAIsearch", variant="primary")
198
+ clear_button = gr.Button("Clear")
 
 
 
 
 
 
 
 
 
199
 
200
  with gr.Column(scale=2):
201
  gr.Markdown("## Status & Logs")
 
207
  autoscroll=True
208
  )
209
  gr.Markdown("## Results")
210
+ results_output = gr.Markdown(value="Results will appear here.")
 
 
 
 
 
 
 
 
 
211
 
212
+ scan_button.click(fn=scan_channel_videos, inputs=[], outputs=[status_output, results_output])
213
+ clear_button.click(fn=lambda: ("", "Results cleared."), inputs=[], outputs=[status_output, results_output])
 
 
 
214
 
215
+ gr.Markdown("**Note:** Requires a valid YouTube Data API key.")
 
216
 
217
  # --- Run the App ---
218
  if __name__ == "__main__":