Spaces:

Deadmon
/

search-youtuber

Sleeping

App Files Files Community

Deadmon commited on Apr 8

Commit

a2c17c5

verified ·

1 Parent(s): 55b5913

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -187

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import re
 import gradio as gr
 from googleapiclient.discovery import build
@@ -10,115 +9,69 @@ import traceback
 # --- Constants ---
 YOUTUBE_API_SERVICE_NAME = "youtube"
 YOUTUBE_API_VERSION = "v3"
-DEFAULT_KEYWORDS = "3d, texturing, rigging, vfx, cgi"
 API_KEY = "AIzaSyCcxSkhTp6aowcyowuBkHIFTSrl_HJ79J0"  # Replace with your actual YouTube Data API key
 # --- YouTube API Helper Functions ---
 def get_youtube_service():
-    """Initializes and returns the YouTube API service with hardcoded API key."""
-    if not API_KEY:
-        raise ValueError("API Key is missing.")
     try:
-        service = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
-                       developerKey=API_KEY, cache_discovery=False)
-        return service
     except HttpError as e:
-        if e.resp.status == 400:
-            raise ValueError(f"Invalid API Key or API not enabled. Error: {e.content}")
-        elif e.resp.status == 403:
-            raise ValueError(f"API Key valid, but Quota Exceeded or Forbidden. Error: {e.content}")
-        else:
-            raise ConnectionError(f"Could not connect to YouTube API: {e}")
-    except Exception as e:
-        raise ConnectionError(f"Error building YouTube service: {e}")
-def get_channel_id(service, channel_identifier):
-    """Gets the channel ID using the channel handle or ID."""
-    if not channel_identifier:
-        raise ValueError("Channel Handle or ID is missing.")
-    if channel_identifier.startswith("UC") and len(channel_identifier) == 24:
-        print(f"Assuming '{channel_identifier}' is a Channel ID.")
-        return channel_identifier
-    if channel_identifier.startswith('@'):
-        handle = channel_identifier
-        print(f"Attempting to find Channel ID for handle: {handle}")
-        try:
-            search_response = service.search().list(
-                q=handle,
-                part="id",
-                type="channel",
-                maxResults=1
-            ).execute()
-            if not search_response.get("items"):
-                raise ValueError(f"Could not find channel for handle '{handle}'. Check the handle.")
-            channel_id = search_response["items"][0]["id"]["channelId"]
-            print(f"Found Channel ID: {channel_id}")
-            return channel_id
-        except HttpError as e:
-            raise ConnectionError(f"API error finding channel ID for handle '{handle}': {e.content}")
-        except Exception as e:
-            raise Exception(f"An error occurred finding channel ID for handle '{handle}': {e}")
-    else:
-        raise ValueError("Invalid Channel Identifier. Use '@handle' or 'UC...' Channel ID.")
-def get_channel_uploads_playlist_id(service, channel_id):
-    """Gets the uploads playlist ID for a given channel ID."""
     try:
-        channels_response = service.channels().list(
-            id=channel_id,
-            part="contentDetails"
-        ).execute()
-        if not channels_response.get("items"):
-            raise ValueError(f"Could not find channel details for ID '{channel_id}'.")
-        playlist_id = channels_response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
-        if not playlist_id:
-            raise ValueError(f"Could not find uploads playlist for channel {channel_id}.")
-        return playlist_id
     except HttpError as e:
-        raise ConnectionError(f"API error getting uploads playlist for {channel_id}: {e.content}")
-    except Exception as e:
-        raise Exception(f"Error getting uploads playlist for {channel_id}: {e}")
 def get_all_video_ids(service, playlist_id):
-    """Gets all video IDs from a playlist, yielding status updates."""
     video_ids = []
     next_page_token = None
-    fetched_count = 0
-    yield f"Fetching video IDs from playlist: {playlist_id}..."
     while True:
         try:
-            playlist_items_response = service.playlistItems().list(
                 playlistId=playlist_id,
                 part="contentDetails",
                 maxResults=50,
                 pageToken=next_page_token
             ).execute()
-            current_page_ids = [item["contentDetails"]["videoId"]
-                              for item in playlist_items_response.get("items", [])]
-            video_ids.extend(current_page_ids)
-            fetched_count += len(current_page_ids)
-            next_page_token = playlist_items_response.get("nextPageToken")
-            yield f"Fetched {fetched_count} video IDs so far..."
             if not next_page_token:
                 break
         except HttpError as e:
-            if e.resp.status == 403:
-                yield f"API Quota Error: {e.content}. Stopping early."
-                break
-            else:
-                yield f"API Error fetching video list: {e.content}. Stopping early."
-                break
-        except Exception as e:
-            yield f"Error fetching video list: {e}. Stopping early."
             break
-    yield f"Finished fetching. Total unique video IDs found: {len(video_ids)}"
     return video_ids
 def process_video(service, video_id, keywords_set):
-    """Fetches video details and transcript, searches for keywords."""
     video_url = f"https://www.youtube.com/watch?v={video_id}"
-    found_data = {
         "video_id": video_id,
         "video_url": video_url,
         "title": f"Video ID: {video_id}",
@@ -126,52 +79,45 @@ def process_video(service, video_id, keywords_set):
         "description_mentions": set(),
         "description_links": []
     }
-    has_mention = False
-    status_updates = []
     try:
-        video_response = service.videos().list(
-            id=video_id,
-            part="snippet"
-        ).execute()
         if video_response.get("items"):
             snippet = video_response["items"][0]["snippet"]
-            found_data["title"] = snippet.get("title", f"Video ID: {video_id}")
             description = snippet.get("description", "").lower()
             for keyword in keywords_set:
                 if keyword in description:
-                    found_data["description_mentions"].add(keyword)
-                    has_mention = True
-            found_data["description_links"] = re.findall(r'https?://\S+', snippet.get("description", ""))
-        else:
-            status_updates.append(f"  - Could not retrieve details for video {video_id}")
     except HttpError as e:
-        status_updates.append(f"  - API error getting details for {video_id}: {e.resp.status}")
-    except Exception as e:
-        status_updates.append(f"  - Error getting details for {video_id}: {e}")
-    transcript_text = ""
     try:
         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
         transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
         if transcript:
             full_transcript = transcript.fetch()
-            transcript_text = " ".join([segment['text'] for segment in full_transcript]).lower()
             for keyword in keywords_set:
                 if keyword in transcript_text:
-                    found_data["transcript_mentions"].add(keyword)
-                    has_mention = True
     except (TranscriptsDisabled, NoTranscriptFound):
-        status_updates.append(f"  - No transcript available for {video_id}")
     except Exception as e:
-        status_updates.append(f"  - Error fetching transcript for {video_id}: {type(e).__name__}")
-    return found_data if has_mention else None, status_updates
-# --- Gradio Main Function ---
-def scan_channel_videos(channel_identifier, keywords_str, progress=gr.Progress(track_tqdm=True)):
-    """Main function called by Gradio interface with hardcoded API key."""
     start_time = time.time()
     status_log = []
     results = []
@@ -184,103 +130,72 @@ def scan_channel_videos(channel_identifier, keywords_str, progress=gr.Progress(t
     try:
         yield from log_status("1. Initializing YouTube Service...")
         service = get_youtube_service()
-        yield from log_status("   Service Initialized.")
-        yield from log_status(f"2. Finding Channel ID for '{channel_identifier}'...")
-        channel_id = get_channel_id(service, channel_identifier)
         yield from log_status(f"   Found Channel ID: {channel_id}")
-        yield from log_status(f"3. Finding Uploads Playlist ID for {channel_id}...")
-        uploads_playlist_id = get_channel_uploads_playlist_id(service, channel_id)
-        yield from log_status(f"   Found Uploads Playlist ID: {uploads_playlist_id}")
-        yield from log_status("4. Fetching all Video IDs...")
-        video_ids_generator = get_all_video_ids(service, uploads_playlist_id)
-        video_ids = None
-        for item in video_ids_generator:
-            if isinstance(item, str):
-                yield from log_status(f"   {item}")
-            else:
-                video_ids = item  # Capture the final returned list
-        if not video_ids or len(video_ids) == 0:
-            yield from log_status("   No video IDs found or fetching failed.")
-            yield "\n".join(status_log), gr.Markdown("### Scan Stopped\nNo videos found or fetching failed.")
             return
-        keywords_list = [k.strip().lower() for k in keywords_str.split(',') if k.strip()]
-        if not keywords_list:
-            raise ValueError("Please provide at least one keyword.")
-        keywords_set = set(keywords_list)
-        yield from log_status(f"5. Scanning {len(video_ids)} videos for keywords: {', '.join(keywords_list)}")
-        total_videos = len(video_ids)
         for video_id in progress.tqdm(video_ids, desc="Scanning Videos"):
-            video_result, video_statuses = process_video(service, video_id, keywords_set)
-            if video_result:
-                results.append(video_result)
-                yield from log_status(f"   Found mention in: {video_result['title']} ({video_id})")
-        yield from log_status("\n6. Scan Complete. Formatting results...")
-        final_md = f"## Scan Results for {channel_identifier}\n\n"
-        final_md += f"Searched for keywords: `{', '.join(keywords_list)}`\n"
-        final_md += f"Found mentions in **{len(results)}** out of **{total_videos}** videos scanned.\n"
         final_md += f"Total time: {time.time() - start_time:.2f} seconds.\n"
         final_md += "---\n"
         if not results:
-            final_md += "\n**No mentions found for the specified keywords.**"
         else:
-            for res in results:
                 final_md += f"\n### [{res['title']}]({res['video_url']})\n"
-                final_md += f"*Video URL: <{res['video_url']}>*\n\n"
                 if res['transcript_mentions']:
-                    mentions = ", ".join(sorted(res['transcript_mentions']))
-                    final_md += f"**Transcript Mentions:** `{mentions}`\n"
                 if res['description_mentions']:
-                    mentions = ", ".join(sorted(res['description_mentions']))
-                    final_md += f"**Description Mentions:** `{mentions}`\n"
                 if res['description_links']:
-                    final_md += f"\n**Links in Description:**\n"
                     for link in res['description_links']:
                         final_md += f"- <{link}>\n"
                 final_md += "\n---\n"
         yield "\n".join(status_log), gr.Markdown(final_md)
-    except ValueError as ve:
-        yield from log_status(f"Configuration Error: {ve}")
-        yield "\n".join(status_log), gr.Markdown(f"### Error\n**Input Error:** {ve}")
-    except ConnectionError as ce:
-        yield from log_status(f"API Connection Error: {ce}")
-        yield "\n".join(status_log), gr.Markdown(f"### Error\n**API Connection Error:** {ce}")
-    except HttpError as he:
-        yield from log_status(f"API HTTP Error: {he.resp.status} - {he.content}")
-        yield "\n".join(status_log), gr.Markdown(f"### Error\n**API HTTP Error:** Status {he.resp.status}\n{he.content}")
     except Exception as e:
         traceback.print_exc()
-        yield from log_status(f"An unexpected error occurred: {e}")
         yield "\n".join(status_log), gr.Markdown(f"### Error\n**Unexpected Error:** {e}")
-# --- Gradio Interface Definition ---
 with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("# YouTube Channel 3D Software Scanner")
-    gr.Markdown("Find mentions of 3D software in video transcripts and descriptions.")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("## Settings")
-            channel_input = gr.Textbox(
-                label="Channel Handle or ID",
-                placeholder="e.g., @theAIsearch or UCxxxxxxxxxxxxxx"
-            )
-            keywords_input = gr.Textbox(
-                label="Keywords to Search (comma-separated)",
-                value=DEFAULT_KEYWORDS
-            )
-            scan_button = gr.Button("Scan Channel", variant="primary")
-            clear_button = gr.Button("Clear All")
         with gr.Column(scale=2):
             gr.Markdown("## Status & Logs")
@@ -292,25 +207,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
                 autoscroll=True
             )
             gr.Markdown("## Results")
-            results_output = gr.Markdown(value="Results will be displayed here after scanning.")
-    scan_button.click(
-        fn=scan_channel_videos,
-        inputs=[channel_input, keywords_input],
-        outputs=[status_output, results_output]
-    )
-    def clear_outputs():
-        return "", "Results cleared.", "", DEFAULT_KEYWORDS
-    clear_button.click(
-        fn=clear_outputs,
-        inputs=[],
-        outputs=[status_output, results_output, channel_input, keywords_input]
-    )
-    gr.Markdown("---")
-    gr.Markdown("**Note:** Scans may take time depending on video count and API quotas.")
 # --- Run the App ---
 if __name__ == "__main__":

 import re
 import gradio as gr
 from googleapiclient.discovery import build
 # --- Constants ---
 YOUTUBE_API_SERVICE_NAME = "youtube"
 YOUTUBE_API_VERSION = "v3"
 API_KEY = "AIzaSyCcxSkhTp6aowcyowuBkHIFTSrl_HJ79J0"  # Replace with your actual YouTube Data API key
+CHANNEL_HANDLE = "@theAIsearch"
+# Common 3D software keywords
+KEYWORDS = [
+    "3d", "blender", "maya", "3ds max", "cinema 4d", "houdini", "zbrush", "unreal engine",
+    "unity", "substance painter", "substance designer", "v-ray", "arnold", "rendering",
+    "texturing", "rigging", "vfx", "cgi", "autodesk", "fusion 360"
+]
 # --- YouTube API Helper Functions ---
 def get_youtube_service():
+    """Initializes and returns the YouTube API service."""
     try:
+        return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY, cache_discovery=False)
     except HttpError as e:
+        raise ConnectionError(f"Could not connect to YouTube API: {e}")
+def get_channel_id(service, handle):
+    """Gets the channel ID from a handle."""
+    try:
+        search_response = service.search().list(q=handle, part="id", type="channel", maxResults=1).execute()
+        if not search_response.get("items"):
+            raise ValueError(f"Channel '{handle}' not found.")
+        return search_response["items"][0]["id"]["channelId"]
+    except HttpError as e:
+        raise ConnectionError(f"API error finding channel ID: {e.content}")
+def get_uploads_playlist_id(service, channel_id):
+    """Gets the uploads playlist ID."""
     try:
+        response = service.channels().list(id=channel_id, part="contentDetails").execute()
+        if not response.get("items"):
+            raise ValueError(f"No channel details for ID '{channel_id}'.")
+        return response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
     except HttpError as e:
+        raise ConnectionError(f"API error getting uploads playlist: {e.content}")
 def get_all_video_ids(service, playlist_id):
+    """Fetches all video IDs from the uploads playlist."""
     video_ids = []
     next_page_token = None
     while True:
         try:
+            response = service.playlistItems().list(
                 playlistId=playlist_id,
                 part="contentDetails",
                 maxResults=50,
                 pageToken=next_page_token
             ).execute()
+            video_ids.extend(item["contentDetails"]["videoId"] for item in response.get("items", []))
+            next_page_token = response.get("nextPageToken")
             if not next_page_token:
                 break
         except HttpError as e:
+            print(f"API Error fetching video IDs: {e.content}")
             break
     return video_ids
 def process_video(service, video_id, keywords_set):
+    """Processes a video for 3D software mentions and links."""
     video_url = f"https://www.youtube.com/watch?v={video_id}"
+    result = {
         "video_id": video_id,
         "video_url": video_url,
         "title": f"Video ID: {video_id}",
         "description_mentions": set(),
         "description_links": []
     }
+    # Fetch video details
     try:
+        video_response = service.videos().list(id=video_id, part="snippet").execute()
         if video_response.get("items"):
             snippet = video_response["items"][0]["snippet"]
+            result["title"] = snippet.get("title", f"Video ID: {video_id}")
             description = snippet.get("description", "").lower()
             for keyword in keywords_set:
                 if keyword in description:
+                    result["description_mentions"].add(keyword)
+            result["description_links"] = re.findall(r'https?://\S+', snippet.get("description", ""))
     except HttpError as e:
+        print(f"API error getting details for {video_id}: {e.resp.status}")
+    # Fetch transcript
     try:
         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
         transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
         if transcript:
             full_transcript = transcript.fetch()
+            transcript_text = " ".join(segment['text'] for segment in full_transcript).lower()
             for keyword in keywords_set:
                 if keyword in transcript_text:
+                    result["transcript_mentions"].add(keyword)
     except (TranscriptsDisabled, NoTranscriptFound):
+        pass  # Skip silently if no transcript
     except Exception as e:
+        print(f"Error fetching transcript for {video_id}: {type(e).__name__}")
+    # Return result only if there are mentions
+    if result["transcript_mentions"] or result["description_mentions"]:
+        return result
+    return None
+# --- Main Function ---
+def scan_channel_videos(progress=gr.Progress(track_tqdm=True)):
+    """Scans @theAIsearch for 3D software mentions and links."""
     start_time = time.time()
     status_log = []
     results = []
     try:
         yield from log_status("1. Initializing YouTube Service...")
         service = get_youtube_service()
+        yield from log_status(f"2. Finding Channel ID for '{CHANNEL_HANDLE}'...")
+        channel_id = get_channel_id(service, CHANNEL_HANDLE)
         yield from log_status(f"   Found Channel ID: {channel_id}")
+        yield from log_status(f"3. Fetching Uploads Playlist ID...")
+        playlist_id = get_uploads_playlist_id(service, channel_id)
+        yield from log_status(f"   Found Playlist ID: {playlist_id}")
+        yield from log_status("4. Fetching Video IDs...")
+        video_ids = get_all_video_ids(service, playlist_id)
+        if not video_ids:
+            yield from log_status("   No videos found.")
+            yield "\n".join(status_log), gr.Markdown("### Error\nNo videos found.")
             return
+        yield from log_status(f"   Found {len(video_ids)} videos.")
+        keywords_set = set(k.lower() for k in KEYWORDS)
+        yield from log_status(f"5. Scanning {len(video_ids)} videos for 3D software mentions...")
         for video_id in progress.tqdm(video_ids, desc="Scanning Videos"):
+            result = process_video(service, video_id, keywords_set)
+            if result:
+                results.append(result)
+                yield from log_status(f"   Found mentions in: {result['title']} ({video_id})")
+        yield from log_status("\n6. Formatting Results...")
+        final_md = f"## Scan Results for {CHANNEL_HANDLE}\n\n"
+        final_md += f"Searched for 3D software keywords: `{', '.join(KEYWORDS)}`\n"
+        final_md += f"Found mentions in **{len(results)}** out of **{len(video_ids)}** videos.\n"
         final_md += f"Total time: {time.time() - start_time:.2f} seconds.\n"
         final_md += "---\n"
         if not results:
+            final_md += "\n**No mentions of 3D software found.**"
         else:
+            for res in sorted(results, key=lambda x: x['title']):
                 final_md += f"\n### [{res['title']}]({res['video_url']})\n"
+                final_md += f"*Video URL: <{res['video_url']}>*\n"
                 if res['transcript_mentions']:
+                    final_md += f"**Transcript Mentions:** `{', '.join(sorted(res['transcript_mentions']))}`\n"
                 if res['description_mentions']:
+                    final_md += f"**Description Mentions:** `{', '.join(sorted(res['description_mentions']))}`\n"
                 if res['description_links']:
+                    final_md += f"**Links in Description:**\n"
                     for link in res['description_links']:
                         final_md += f"- <{link}>\n"
                 final_md += "\n---\n"
         yield "\n".join(status_log), gr.Markdown(final_md)
     except Exception as e:
         traceback.print_exc()
+        yield from log_status(f"Error: {e}")
         yield "\n".join(status_log), gr.Markdown(f"### Error\n**Unexpected Error:** {e}")
+# --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("# YouTube 3D Software Scanner for @theAIsearch")
+    gr.Markdown("Finds mentions of 3D software in transcripts and links from video descriptions.")
     with gr.Row():
         with gr.Column(scale=1):
+            scan_button = gr.Button("Scan @theAIsearch", variant="primary")
+            clear_button = gr.Button("Clear")
         with gr.Column(scale=2):
             gr.Markdown("## Status & Logs")
                 autoscroll=True
             )
             gr.Markdown("## Results")
+            results_output = gr.Markdown(value="Results will appear here.")
+    scan_button.click(fn=scan_channel_videos, inputs=[], outputs=[status_output, results_output])
+    clear_button.click(fn=lambda: ("", "Results cleared."), inputs=[], outputs=[status_output, results_output])
+    gr.Markdown("**Note:** Requires a valid YouTube Data API key.")
 # --- Run the App ---
 if __name__ == "__main__":