Spaces:
Sleeping
Sleeping
File size: 9,313 Bytes
81d650f ed38ff8 81d650f 55b5913 a2c17c5 81d650f ed38ff8 81d650f ed38ff8 a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f ed38ff8 81d650f a2c17c5 81d650f a2c17c5 81d650f ed38ff8 a2c17c5 81d650f ed38ff8 a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f ed38ff8 81d650f a2c17c5 81d650f ed38ff8 a2c17c5 ed38ff8 a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f ed38ff8 81d650f ed38ff8 81d650f ed38ff8 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 ed38ff8 a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 ed38ff8 81d650f ed38ff8 81d650f ed38ff8 a2c17c5 ed38ff8 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f 55b5913 81d650f ed38ff8 81d650f a2c17c5 81d650f a2c17c5 81d650f a2c17c5 81d650f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import re
import gradio as gr
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
import time
import traceback
# --- Constants ---
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
API_KEY = "AIzaSyCcxSkhTp6aowcyowuBkHIFTSrl_HJ79J0" # Replace with your actual YouTube Data API key
CHANNEL_HANDLE = "@theAIsearch"
# Common 3D software keywords
KEYWORDS = [
"3d", "blender", "maya", "3ds max", "cinema 4d", "houdini", "zbrush", "unreal engine",
"unity", "substance painter", "substance designer", "v-ray", "arnold", "rendering",
"texturing", "rigging", "vfx", "cgi", "autodesk", "fusion 360"
]
# --- YouTube API Helper Functions ---
def get_youtube_service():
"""Initializes and returns the YouTube API service."""
try:
return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY, cache_discovery=False)
except HttpError as e:
raise ConnectionError(f"Could not connect to YouTube API: {e}")
def get_channel_id(service, handle):
"""Gets the channel ID from a handle."""
try:
search_response = service.search().list(q=handle, part="id", type="channel", maxResults=1).execute()
if not search_response.get("items"):
raise ValueError(f"Channel '{handle}' not found.")
return search_response["items"][0]["id"]["channelId"]
except HttpError as e:
raise ConnectionError(f"API error finding channel ID: {e.content}")
def get_uploads_playlist_id(service, channel_id):
"""Gets the uploads playlist ID."""
try:
response = service.channels().list(id=channel_id, part="contentDetails").execute()
if not response.get("items"):
raise ValueError(f"No channel details for ID '{channel_id}'.")
return response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
except HttpError as e:
raise ConnectionError(f"API error getting uploads playlist: {e.content}")
def get_all_video_ids(service, playlist_id):
"""Fetches all video IDs from the uploads playlist."""
video_ids = []
next_page_token = None
while True:
try:
response = service.playlistItems().list(
playlistId=playlist_id,
part="contentDetails",
maxResults=50,
pageToken=next_page_token
).execute()
video_ids.extend(item["contentDetails"]["videoId"] for item in response.get("items", []))
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
except HttpError as e:
print(f"API Error fetching video IDs: {e.content}")
break
return video_ids
def process_video(service, video_id, keywords_set):
"""Processes a video for 3D software mentions and links."""
video_url = f"https://www.youtube.com/watch?v={video_id}"
result = {
"video_id": video_id,
"video_url": video_url,
"title": f"Video ID: {video_id}",
"transcript_mentions": set(),
"description_mentions": set(),
"description_links": []
}
# Fetch video details
try:
video_response = service.videos().list(id=video_id, part="snippet").execute()
if video_response.get("items"):
snippet = video_response["items"][0]["snippet"]
result["title"] = snippet.get("title", f"Video ID: {video_id}")
description = snippet.get("description", "").lower()
for keyword in keywords_set:
if keyword in description:
result["description_mentions"].add(keyword)
result["description_links"] = re.findall(r'https?://\S+', snippet.get("description", ""))
except HttpError as e:
print(f"API error getting details for {video_id}: {e.resp.status}")
# Fetch transcript
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
if transcript:
full_transcript = transcript.fetch()
transcript_text = " ".join(segment['text'] for segment in full_transcript).lower()
for keyword in keywords_set:
if keyword in transcript_text:
result["transcript_mentions"].add(keyword)
except (TranscriptsDisabled, NoTranscriptFound):
pass # Skip silently if no transcript
except Exception as e:
print(f"Error fetching transcript for {video_id}: {type(e).__name__}")
# Return result only if there are mentions
if result["transcript_mentions"] or result["description_mentions"]:
return result
return None
# --- Main Function ---
def scan_channel_videos(progress=gr.Progress(track_tqdm=True)):
"""Scans @theAIsearch for 3D software mentions and links."""
start_time = time.time()
status_log = []
results = []
def log_status(message):
print(message)
status_log.append(message)
yield "\n".join(status_log), gr.Markdown("### Processing...")
try:
yield from log_status("1. Initializing YouTube Service...")
service = get_youtube_service()
yield from log_status(f"2. Finding Channel ID for '{CHANNEL_HANDLE}'...")
channel_id = get_channel_id(service, CHANNEL_HANDLE)
yield from log_status(f" Found Channel ID: {channel_id}")
yield from log_status(f"3. Fetching Uploads Playlist ID...")
playlist_id = get_uploads_playlist_id(service, channel_id)
yield from log_status(f" Found Playlist ID: {playlist_id}")
yield from log_status("4. Fetching Video IDs...")
video_ids = get_all_video_ids(service, playlist_id)
if not video_ids:
yield from log_status(" No videos found.")
yield "\n".join(status_log), gr.Markdown("### Error\nNo videos found.")
return
yield from log_status(f" Found {len(video_ids)} videos.")
keywords_set = set(k.lower() for k in KEYWORDS)
yield from log_status(f"5. Scanning {len(video_ids)} videos for 3D software mentions...")
for video_id in progress.tqdm(video_ids, desc="Scanning Videos"):
result = process_video(service, video_id, keywords_set)
if result:
results.append(result)
yield from log_status(f" Found mentions in: {result['title']} ({video_id})")
yield from log_status("\n6. Formatting Results...")
final_md = f"## Scan Results for {CHANNEL_HANDLE}\n\n"
final_md += f"Searched for 3D software keywords: `{', '.join(KEYWORDS)}`\n"
final_md += f"Found mentions in **{len(results)}** out of **{len(video_ids)}** videos.\n"
final_md += f"Total time: {time.time() - start_time:.2f} seconds.\n"
final_md += "---\n"
if not results:
final_md += "\n**No mentions of 3D software found.**"
else:
for res in sorted(results, key=lambda x: x['title']):
final_md += f"\n### [{res['title']}]({res['video_url']})\n"
final_md += f"*Video URL: <{res['video_url']}>*\n"
if res['transcript_mentions']:
final_md += f"**Transcript Mentions:** `{', '.join(sorted(res['transcript_mentions']))}`\n"
if res['description_mentions']:
final_md += f"**Description Mentions:** `{', '.join(sorted(res['description_mentions']))}`\n"
if res['description_links']:
final_md += f"**Links in Description:**\n"
for link in res['description_links']:
final_md += f"- <{link}>\n"
final_md += "\n---\n"
yield "\n".join(status_log), gr.Markdown(final_md)
except Exception as e:
traceback.print_exc()
yield from log_status(f"Error: {e}")
yield "\n".join(status_log), gr.Markdown(f"### Error\n**Unexpected Error:** {e}")
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# YouTube 3D Software Scanner for @theAIsearch")
gr.Markdown("Finds mentions of 3D software in transcripts and links from video descriptions.")
with gr.Row():
with gr.Column(scale=1):
scan_button = gr.Button("Scan @theAIsearch", variant="primary")
clear_button = gr.Button("Clear")
with gr.Column(scale=2):
gr.Markdown("## Status & Logs")
status_output = gr.Textbox(
label="Scan Progress",
lines=10,
max_lines=20,
interactive=False,
autoscroll=True
)
gr.Markdown("## Results")
results_output = gr.Markdown(value="Results will appear here.")
scan_button.click(fn=scan_channel_videos, inputs=[], outputs=[status_output, results_output])
clear_button.click(fn=lambda: ("", "Results cleared."), inputs=[], outputs=[status_output, results_output])
gr.Markdown("**Note:** Requires a valid YouTube Data API key.")
# --- Run the App ---
if __name__ == "__main__":
app.launch(debug=False) |