Update app.py
Browse files
app.py
CHANGED
@@ -1,1137 +1,6 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
YouTube Video Analyzer & Downloader Pro
|
4 |
-
(쿠키 자동 처리 버전)
|
5 |
-
|
6 |
-
· `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면
|
7 |
-
자동으로 사용합니다.
|
8 |
-
· Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 **우선** 적용됩니다.
|
9 |
-
"""
|
10 |
-
|
11 |
-
# ──────────────────────────────────────────────────────────────
|
12 |
-
# 표준 라이브러리
|
13 |
-
# ──────────────────────────────────────────────────────────────
|
14 |
-
import os
|
15 |
-
import re
|
16 |
-
import json
|
17 |
-
import uuid
|
18 |
-
import shutil
|
19 |
-
import tempfile
|
20 |
-
from datetime import datetime
|
21 |
-
from pathlib import Path
|
22 |
-
|
23 |
-
# ──────────────────────────────────────────────────────────────
|
24 |
-
# 외부 라이브러리
|
25 |
-
# ──────────────────────────────────────────────────────────────
|
26 |
-
import gradio as gr
|
27 |
-
import yt_dlp
|
28 |
-
import google.generativeai as genai
|
29 |
-
# ───────── transcript_utils.py ─────────
|
30 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
31 |
-
from datetime import timedelta
|
32 |
-
|
33 |
-
def fetch_transcript(video_id, lang_pref=("ko","en")):
|
34 |
-
# available_transcripts()가 1.1.0부터 추가됨
|
35 |
-
for lang in lang_pref:
|
36 |
-
try:
|
37 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
|
38 |
-
break
|
39 |
-
except Exception:
|
40 |
-
continue
|
41 |
-
else:
|
42 |
-
raise RuntimeError("자막을 찾을 수 없습니다.")
|
43 |
-
|
44 |
-
lines = []
|
45 |
-
for seg in transcript:
|
46 |
-
t = str(timedelta(seconds=int(seg["start"]))) # 0:01:23
|
47 |
-
t_mmss = ":".join(t.split(":")[-2:]) # 01:23
|
48 |
-
lines.append(f"**[{t_mmss}]** {seg['text']}")
|
49 |
-
return "\n".join(lines)
|
50 |
-
# ──────────────────────────────────────────────────────────────
|
51 |
-
# 기본 쿠키 파일 경로 ― 파일명이 동일하면 자동 사용
|
52 |
-
# ──────────────────────────────────────────────────────────────
|
53 |
-
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
|
54 |
-
|
55 |
-
|
56 |
-
# =================================================================
|
57 |
-
# Main Class
|
58 |
-
# =================================================================
|
59 |
-
class YouTubeDownloader:
|
60 |
-
def __init__(self):
|
61 |
-
# 임시 디렉터리 (Gradio 호환)
|
62 |
-
self.download_dir = tempfile.mkdtemp()
|
63 |
-
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
|
64 |
-
|
65 |
-
# 사용자 Downloads 하위 폴더
|
66 |
-
self.downloads_folder = os.path.join(
|
67 |
-
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
|
68 |
-
)
|
69 |
-
os.makedirs(self.downloads_folder, exist_ok=True)
|
70 |
-
|
71 |
-
self.gemini_model = None
|
72 |
-
|
73 |
-
# ---------------------------------------------------------
|
74 |
-
# Google Gemini API
|
75 |
-
# ---------------------------------------------------------
|
76 |
-
def configure_gemini(self, api_key):
|
77 |
-
try:
|
78 |
-
genai.configure(api_key=api_key)
|
79 |
-
self.gemini_model = genai.GenerativeModel(
|
80 |
-
model_name="gemini-1.5-flash-latest"
|
81 |
-
)
|
82 |
-
return True, "✅ Gemini API configured successfully!"
|
83 |
-
except Exception as e:
|
84 |
-
return False, f"❌ Failed to configure Gemini API: {e}"
|
85 |
-
|
86 |
-
# ---------------------------------------------------------
|
87 |
-
# 임시 디렉터리 정리
|
88 |
-
# ---------------------------------------------------------
|
89 |
-
def cleanup(self):
|
90 |
-
try:
|
91 |
-
if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
|
92 |
-
shutil.rmtree(self.download_dir)
|
93 |
-
if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
|
94 |
-
shutil.rmtree(self.temp_downloads)
|
95 |
-
except Exception as e:
|
96 |
-
print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
|
97 |
-
|
98 |
-
# ---------------------------------------------------------
|
99 |
-
# 유튜브 URL 검증
|
100 |
-
# ---------------------------------------------------------
|
101 |
-
def is_valid_youtube_url(self, url):
|
102 |
-
youtube_regex = re.compile(
|
103 |
-
r"(https?://)?(www\.)?"
|
104 |
-
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
|
105 |
-
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
|
106 |
-
)
|
107 |
-
return youtube_regex.match(url) is not None
|
108 |
-
|
109 |
-
# ---------------------------------------------------------
|
110 |
-
# Gemini-AI 장면 분석
|
111 |
-
# ---------------------------------------------------------
|
112 |
-
def generate_scene_breakdown_gemini(self, video_info):
|
113 |
-
if not self.gemini_model:
|
114 |
-
return self.generate_scene_breakdown_fallback(video_info)
|
115 |
-
|
116 |
-
try:
|
117 |
-
duration = video_info.get("duration", 0)
|
118 |
-
title = video_info.get("title", "")
|
119 |
-
description = video_info.get("description", "")[:1500]
|
120 |
-
|
121 |
-
if not duration:
|
122 |
-
return [
|
123 |
-
"**[Duration Unknown]**: Unable to generate timestamped breakdown - "
|
124 |
-
"video duration not available"
|
125 |
-
]
|
126 |
-
|
127 |
-
prompt = f"""
|
128 |
-
Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
|
129 |
-
with precise timestamps and specific descriptions:
|
130 |
-
|
131 |
-
Title: {title}
|
132 |
-
Duration: {duration} seconds
|
133 |
-
Description: {description}
|
134 |
-
|
135 |
-
IMPORTANT INSTRUCTIONS:
|
136 |
-
1. Create detailed scene descriptions that include:
|
137 |
-
- Physical appearance of people (age, gender, clothing, hair, etc.)
|
138 |
-
- Exact actions being performed
|
139 |
-
- Dialogue or speech (include actual lines if audible, or infer probable spoken
|
140 |
-
lines based on actions and setting; format them as "Character: line…")
|
141 |
-
- Setting and environment details
|
142 |
-
- Props, objects, or products being shown
|
143 |
-
- Visual effects, text overlays, or graphics
|
144 |
-
- Mood, tone, and atmosphere
|
145 |
-
- Camera movements or angles (if apparent)
|
146 |
-
2. Dialogue Emphasis:
|
147 |
-
- Include short dialogue lines in **every scene** wherever plausible.
|
148 |
-
- Write lines like: Character: "Actual or inferred line…"
|
149 |
-
- If dialogue is not available, intelligently infer probable phrases
|
150 |
-
(e.g., "Welcome!", "Try this now!", "It feels amazing!").
|
151 |
-
3. Timestamp Guidelines:
|
152 |
-
- For videos under 1 minute: 2-3 second segments
|
153 |
-
- For videos 1-5 minutes: 3-5 second segments
|
154 |
-
- For videos 5-15 minutes: 5-10 second segments
|
155 |
-
- For videos over 15 minutes: 10-15 second segments
|
156 |
-
- Maximum 20 scenes total for longer videos
|
157 |
-
4. Format each scene EXACTLY like this:
|
158 |
-
**[MM:SS-MM:SS]**: Detailed description…
|
159 |
-
5. Write descriptions as if you're watching the video in real-time,
|
160 |
-
noting everything visible and audible.
|
161 |
-
"""
|
162 |
-
response = self.gemini_model.generate_content(prompt)
|
163 |
-
|
164 |
-
if response and response.text:
|
165 |
-
scenes = []
|
166 |
-
lines = response.text.split("\n")
|
167 |
-
current_scene = ""
|
168 |
-
|
169 |
-
for line in lines:
|
170 |
-
line = line.strip()
|
171 |
-
if line.startswith("**[") and "]**:" in line:
|
172 |
-
if current_scene:
|
173 |
-
scenes.append(current_scene.strip())
|
174 |
-
current_scene = line
|
175 |
-
elif current_scene:
|
176 |
-
current_scene += "\n" + line
|
177 |
-
|
178 |
-
if current_scene:
|
179 |
-
scenes.append(current_scene.strip())
|
180 |
-
|
181 |
-
return (
|
182 |
-
scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
|
183 |
-
)
|
184 |
-
|
185 |
-
return self.generate_scene_breakdown_fallback(video_info)
|
186 |
-
|
187 |
-
except Exception as e:
|
188 |
-
print(f"Gemini API error: {e}")
|
189 |
-
return self.generate_scene_breakdown_fallback(video_info)
|
190 |
-
|
191 |
-
# ---------------------------------------------------------
|
192 |
-
# Fallback 장면 분석
|
193 |
-
# ---------------------------------------------------------
|
194 |
-
def generate_scene_breakdown_fallback(self, video_info):
|
195 |
-
duration = video_info.get("duration", 0)
|
196 |
-
title = video_info.get("title", "").lower()
|
197 |
-
description = video_info.get("description", "").lower()
|
198 |
-
uploader = video_info.get("uploader", "Content creator")
|
199 |
-
|
200 |
-
if not duration:
|
201 |
-
return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
|
202 |
-
|
203 |
-
if duration <= 60:
|
204 |
-
segment_length = 3
|
205 |
-
elif duration <= 300:
|
206 |
-
segment_length = 5
|
207 |
-
elif duration <= 900:
|
208 |
-
segment_length = 10
|
209 |
-
else:
|
210 |
-
segment_length = 15
|
211 |
-
|
212 |
-
scenes = []
|
213 |
-
num_segments = min(duration // segment_length + 1, 20)
|
214 |
-
video_type = self.detect_video_type_detailed(title, description)
|
215 |
-
|
216 |
-
for i in range(num_segments):
|
217 |
-
start_time = i * segment_length
|
218 |
-
end_time = min(start_time + segment_length - 1, duration)
|
219 |
-
|
220 |
-
start_fmt = f"{start_time//60}:{start_time%60:02d}"
|
221 |
-
end_fmt = f"{end_time//60}:{end_time%60:02d}"
|
222 |
-
desc = self.generate_contextual_description(
|
223 |
-
i, num_segments, video_type, uploader, title
|
224 |
-
)
|
225 |
-
scenes.append(f"**[{start_fmt}-{end_fmt}]**: {desc}")
|
226 |
-
|
227 |
-
return scenes
|
228 |
-
|
229 |
-
# ---------------------------------------------------------
|
230 |
-
# 비디오 유형 감지(상세)
|
231 |
-
# ---------------------------------------------------------
|
232 |
-
def detect_video_type_detailed(self, title, description):
|
233 |
-
text = (title + " " + description).lower()
|
234 |
-
if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
|
235 |
-
return "tutorial"
|
236 |
-
if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
|
237 |
-
return "review"
|
238 |
-
if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
|
239 |
-
return "vlog"
|
240 |
-
if any(w in text for w in ["music", "song", "cover", "lyrics"]):
|
241 |
-
return "music"
|
242 |
-
if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
|
243 |
-
return "entertainment"
|
244 |
-
if any(w in text for w in ["news", "breaking", "update", "report"]):
|
245 |
-
return "news"
|
246 |
-
if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
|
247 |
-
return "cooking"
|
248 |
-
if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
|
249 |
-
return "fitness"
|
250 |
-
return "general"
|
251 |
-
|
252 |
-
# ---------------------------------------------------------
|
253 |
-
# 장면별 설명 생성
|
254 |
-
# ---------------------------------------------------------
|
255 |
-
def generate_contextual_description(
|
256 |
-
self, scene_index, total_scenes, video_type, uploader, title
|
257 |
-
):
|
258 |
-
presenter_desc = "The content creator"
|
259 |
-
if "woman" in title or "girl" in title:
|
260 |
-
presenter_desc = "A woman"
|
261 |
-
elif "man" in title or "guy" in title:
|
262 |
-
presenter_desc = "A man"
|
263 |
-
|
264 |
-
if scene_index == 0:
|
265 |
-
if video_type == "tutorial":
|
266 |
-
return (
|
267 |
-
f"{presenter_desc} appears on screen, introducing themselves and the "
|
268 |
-
f"topic. They are in a well-lit workspace, wearing casual clothes."
|
269 |
-
)
|
270 |
-
if video_type == "vlog":
|
271 |
-
return (
|
272 |
-
f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
|
273 |
-
f"explains what today's vlog is about."
|
274 |
-
)
|
275 |
-
if video_type == "review":
|
276 |
-
return (
|
277 |
-
f"{presenter_desc} holds up the product to be reviewed, giving a brief "
|
278 |
-
f"overview of its features."
|
279 |
-
)
|
280 |
-
return (
|
281 |
-
f"{presenter_desc} starts the video with an engaging introduction to "
|
282 |
-
f"capture viewers' attention."
|
283 |
-
)
|
284 |
-
|
285 |
-
if scene_index == total_scenes - 1:
|
286 |
-
if video_type == "tutorial":
|
287 |
-
return (
|
288 |
-
f"{presenter_desc} shows the final result, thanks viewers, and "
|
289 |
-
f"encourages them to like and subscribe."
|
290 |
-
)
|
291 |
-
if video_type == "vlog":
|
292 |
-
return (
|
293 |
-
f"{presenter_desc} wraps up the day, sharing final thoughts and "
|
294 |
-
f"bidding farewell."
|
295 |
-
)
|
296 |
-
return (
|
297 |
-
f"{presenter_desc} concludes, summarizing key points and prompting "
|
298 |
-
f"engagement through likes and comments."
|
299 |
-
)
|
300 |
-
|
301 |
-
if video_type == "tutorial":
|
302 |
-
return (
|
303 |
-
f"{presenter_desc} demonstrates the next step, providing clear "
|
304 |
-
f"instructions with close-up shots."
|
305 |
-
)
|
306 |
-
if video_type == "review":
|
307 |
-
return (
|
308 |
-
f"{presenter_desc} examines a specific feature of the product, showing "
|
309 |
-
f"it in use and commenting on performance."
|
310 |
-
)
|
311 |
-
if video_type == "vlog":
|
312 |
-
return (
|
313 |
-
f"{presenter_desc} continues the day's activities, sharing candid "
|
314 |
-
f"moments and personal reflections."
|
315 |
-
)
|
316 |
-
if video_type == "cooking":
|
317 |
-
return (
|
318 |
-
f"{presenter_desc} prepares ingredients, chopping and mixing while "
|
319 |
-
f"explaining each step."
|
320 |
-
)
|
321 |
-
if video_type == "fitness":
|
322 |
-
return (
|
323 |
-
f"{presenter_desc} performs an exercise set, demonstrating proper form "
|
324 |
-
f"and offering tips."
|
325 |
-
)
|
326 |
-
return (
|
327 |
-
f"{presenter_desc} proceeds with the main content, engaging viewers through "
|
328 |
-
f"clear explanations."
|
329 |
-
)
|
330 |
-
|
331 |
-
# ---------------------------------------------------------
|
332 |
-
# 비디오 유형 (간략)
|
333 |
-
# ---------------------------------------------------------
|
334 |
-
def detect_video_type(self, title, description):
|
335 |
-
text = (title + " " + description).lower()
|
336 |
-
if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
|
337 |
-
return "🎵 Music Video"
|
338 |
-
if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
|
339 |
-
return "📚 Tutorial/Educational"
|
340 |
-
if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
|
341 |
-
return "🎭 Entertainment/Comedy"
|
342 |
-
if any(w in text for w in ["news", "breaking", "report", "update"]):
|
343 |
-
return "📰 News/Information"
|
344 |
-
if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
|
345 |
-
return "⭐ Review/Unboxing"
|
346 |
-
if any(w in text for w in ["commercial", "ad", "brand", "product"]):
|
347 |
-
return "📺 Commercial/Advertisement"
|
348 |
-
return "🎬 General Content"
|
349 |
-
|
350 |
-
# ---------------------------------------------------------
|
351 |
-
# 배경 음악 추정
|
352 |
-
# ---------------------------------------------------------
|
353 |
-
def detect_background_music(self, video_info):
|
354 |
-
title = video_info.get("title", "").lower()
|
355 |
-
if "music" in title or "song" in title:
|
356 |
-
return "🎵 Original Music/Soundtrack"
|
357 |
-
if "commercial" in title or "ad" in title:
|
358 |
-
return "🎶 Upbeat Commercial Music"
|
359 |
-
if "tutorial" in title or "how to" in title:
|
360 |
-
return "🔇 Minimal/No Background Music"
|
361 |
-
if "vlog" in title or "daily" in title:
|
362 |
-
return "🎼 Ambient Background Music"
|
363 |
-
return "🎵 Background Music"
|
364 |
-
|
365 |
-
# ---------------------------------------------------------
|
366 |
-
# 인플루언서 규모 추정
|
367 |
-
# ---------------------------------------------------------
|
368 |
-
def detect_influencer_status(self, video_info):
|
369 |
-
subs = video_info.get("channel_followers", 0)
|
370 |
-
views = video_info.get("view_count", 0)
|
371 |
-
if subs > 10_000_000:
|
372 |
-
return "🌟 Mega Influencer (10M+)"
|
373 |
-
if subs > 1_000_000:
|
374 |
-
return "⭐ Major Influencer (1M+)"
|
375 |
-
if subs > 100_000:
|
376 |
-
return "🎯 Mid-tier Influencer (100K+)"
|
377 |
-
if subs > 10_000:
|
378 |
-
return "📈 Micro Influencer (10K+)"
|
379 |
-
if views > 100_000:
|
380 |
-
return "🔥 Viral Content Creator"
|
381 |
-
return "👤 Regular Content Creator"
|
382 |
-
|
383 |
-
# ---------------------------------------------------------
|
384 |
-
# 숫자 포맷터
|
385 |
-
# ---------------------------------------------------------
|
386 |
-
def format_number(self, num):
|
387 |
-
if not num:
|
388 |
-
return "0"
|
389 |
-
if num >= 1_000_000_000:
|
390 |
-
return f"{num/1_000_000_000:.1f}B"
|
391 |
-
if num >= 1_000_000:
|
392 |
-
return f"{num/1_000_000:.1f}M"
|
393 |
-
if num >= 1_000:
|
394 |
-
return f"{num/1_000:.1f}K"
|
395 |
-
return str(num)
|
396 |
-
|
397 |
-
# ---------------------------------------------------------
|
398 |
-
# 최종 리포트 생성
|
399 |
-
# ---------------------------------------------------------
|
400 |
-
def format_video_info(self, video_info):
|
401 |
-
if not video_info:
|
402 |
-
return "❌ No video information available."
|
403 |
-
|
404 |
-
title = video_info.get("title", "Unknown")
|
405 |
-
uploader = video_info.get("uploader", "Unknown")
|
406 |
-
duration = video_info.get("duration", 0)
|
407 |
-
dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
|
408 |
-
views = video_info.get("view_count", 0)
|
409 |
-
likes = video_info.get("like_count", 0)
|
410 |
-
comments = video_info.get("comment_count", 0)
|
411 |
-
upload_date = video_info.get("upload_date", "Unknown")
|
412 |
-
|
413 |
-
if len(upload_date) == 8:
|
414 |
-
upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
|
415 |
-
|
416 |
-
scenes = self.generate_scene_breakdown_gemini(video_info)
|
417 |
-
vtype = self.detect_video_type(title, video_info.get("description", ""))
|
418 |
-
bgm = self.detect_background_music(video_info)
|
419 |
-
creator = self.detect_influencer_status(video_info)
|
420 |
-
engagement = (likes / views) * 100 if views else 0
|
421 |
-
|
422 |
-
report = f"""
|
423 |
-
🎬 YOUTUBE VIDEO ANALYSIS REPORT
|
424 |
-
{'='*50}
|
425 |
-
|
426 |
-
📋 BASIC INFORMATION
|
427 |
-
{'─'*25}
|
428 |
-
📹 **Title:** {title}
|
429 |
-
👤 **Uploader:** {uploader}
|
430 |
-
📅 **Upload Date:** {upload_date}
|
431 |
-
⏱️ **Duration:** {dur_str}
|
432 |
-
🆔 **Video ID:** {video_info.get('id', 'Unknown')}
|
433 |
-
|
434 |
-
📊 PERFORMANCE METRICS
|
435 |
-
{'─'*25}
|
436 |
-
👀 **Views:** {self.format_number(views)} ({views:,})
|
437 |
-
👍 **Likes:** {self.format_number(likes)} ({likes:,})
|
438 |
-
💬 **Comments:** {self.format_number(comments)} ({comments:,})
|
439 |
-
📈 **Engagement Rate:** {engagement:.2f}%
|
440 |
-
|
441 |
-
🎯 CONTENT ANALYSIS
|
442 |
-
{'─'*25}
|
443 |
-
📂 **Video Type:** {vtype}
|
444 |
-
🎵 **Background Music:** {bgm}
|
445 |
-
👑 **Creator Status:** {creator}
|
446 |
-
|
447 |
-
🎬 DETAILED SCENE BREAKDOWN
|
448 |
-
{'─'*30}
|
449 |
-
{chr(10).join(scenes)}
|
450 |
-
|
451 |
-
📝 DESCRIPTION PREVIEW
|
452 |
-
{'─'*25}
|
453 |
-
{video_info.get('description', 'No description available')[:500]}
|
454 |
-
{'...(truncated)' if len(video_info.get('description', '')) > 500 else ''}
|
455 |
-
|
456 |
-
{'='*50}
|
457 |
-
📊 **Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
458 |
-
🤖 **AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
|
459 |
-
"""
|
460 |
-
return report.strip()
|
461 |
-
|
462 |
-
# ---------------------------------------------------------
|
463 |
-
# 메타데이터 추출
|
464 |
-
# ---------------------------------------------------------
|
465 |
-
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
|
466 |
-
if not url or not url.strip():
|
467 |
-
return None, "❌ Please enter a YouTube URL"
|
468 |
-
if not self.is_valid_youtube_url(url):
|
469 |
-
return None, "❌ Invalid YouTube URL format"
|
470 |
-
|
471 |
-
# 쿠키 선택 순서: UI 업로드 → 기본 쿠키 → None
|
472 |
-
if cookiefile and os.path.exists(cookiefile):
|
473 |
-
cookiefile = cookiefile
|
474 |
-
elif DEFAULT_COOKIE_FILE.exists():
|
475 |
-
cookiefile = str(DEFAULT_COOKIE_FILE)
|
476 |
-
else:
|
477 |
-
cookiefile = None
|
478 |
-
|
479 |
-
try:
|
480 |
-
progress(0.1, desc="Initializing YouTube extractor…")
|
481 |
-
ydl_opts = {"noplaylist": True, "extract_flat": False}
|
482 |
-
if cookiefile:
|
483 |
-
ydl_opts["cookiefile"] = cookiefile
|
484 |
-
|
485 |
-
progress(0.5, desc="Extracting video metadata…")
|
486 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
487 |
-
info = ydl.extract_info(url, download=False)
|
488 |
-
|
489 |
-
progress(1.0, desc="✅ Analysis complete!")
|
490 |
-
return info, "✅ Video information extracted successfully"
|
491 |
-
|
492 |
-
except Exception as e:
|
493 |
-
return None, f"❌ Error: {e}"
|
494 |
-
|
495 |
-
# ---------------------------------------------------------
|
496 |
-
# 다운로드
|
497 |
-
# ---------------------------------------------------------
|
498 |
-
def download_video(
|
499 |
-
self,
|
500 |
-
url,
|
501 |
-
quality="best",
|
502 |
-
audio_only=False,
|
503 |
-
progress=gr.Progress(),
|
504 |
-
cookiefile=None,
|
505 |
-
):
|
506 |
-
if not url or not url.strip():
|
507 |
-
return None, "❌ Please enter a YouTube URL"
|
508 |
-
if not self.is_valid_youtube_url(url):
|
509 |
-
return None, "❌ Invalid YouTube URL format"
|
510 |
-
|
511 |
-
# 쿠키 선택 순서
|
512 |
-
if cookiefile and os.path.exists(cookiefile):
|
513 |
-
cookiefile = cookiefile
|
514 |
-
elif DEFAULT_COOKIE_FILE.exists():
|
515 |
-
cookiefile = str(DEFAULT_COOKIE_FILE)
|
516 |
-
else:
|
517 |
-
cookiefile = None
|
518 |
-
|
519 |
-
try:
|
520 |
-
progress(0.1, desc="Preparing download…")
|
521 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
522 |
-
|
523 |
-
ydl_opts = {
|
524 |
-
"outtmpl": os.path.join(
|
525 |
-
self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
|
526 |
-
),
|
527 |
-
"noplaylist": True,
|
528 |
-
}
|
529 |
-
|
530 |
-
if audio_only:
|
531 |
-
ydl_opts["format"] = "bestaudio/best"
|
532 |
-
ydl_opts["postprocessors"] = [
|
533 |
-
{
|
534 |
-
"key": "FFmpegExtractAudio",
|
535 |
-
"preferredcodec": "mp3",
|
536 |
-
"preferredquality": "192",
|
537 |
-
}
|
538 |
-
]
|
539 |
-
else:
|
540 |
-
if quality == "720p":
|
541 |
-
ydl_opts["format"] = "best[height<=720]"
|
542 |
-
elif quality == "480p":
|
543 |
-
ydl_opts["format"] = "best[height<=480]"
|
544 |
-
else: # "best"
|
545 |
-
ydl_opts["format"] = "best[height<=1080]"
|
546 |
-
|
547 |
-
if cookiefile:
|
548 |
-
ydl_opts["cookiefile"] = cookiefile
|
549 |
-
|
550 |
-
# 진행률 훅
|
551 |
-
def hook(d):
|
552 |
-
if d["status"] == "downloading":
|
553 |
-
if "total_bytes" in d:
|
554 |
-
pct = d["downloaded_bytes"] / d["total_bytes"] * 100
|
555 |
-
progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
|
556 |
-
else:
|
557 |
-
progress(0.5, desc="Downloading…")
|
558 |
-
elif d["status"] == "finished":
|
559 |
-
progress(0.8, desc="Processing download…")
|
560 |
-
|
561 |
-
ydl_opts["progress_hooks"] = [hook]
|
562 |
-
|
563 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
564 |
-
ydl.extract_info(url, download=True)
|
565 |
-
|
566 |
-
progress(0.9, desc="Copying to Downloads folder…")
|
567 |
-
|
568 |
-
# temp 디렉터리에서 파일 찾기
|
569 |
-
downloaded_temp = None
|
570 |
-
for f in os.listdir(self.temp_downloads):
|
571 |
-
if timestamp in f:
|
572 |
-
downloaded_temp = os.path.join(self.temp_downloads, f)
|
573 |
-
break
|
574 |
-
|
575 |
-
if not downloaded_temp:
|
576 |
-
return None, "❌ Downloaded file not found"
|
577 |
-
|
578 |
-
final_name = os.path.basename(downloaded_temp)
|
579 |
-
final_path = os.path.join(self.downloads_folder, final_name)
|
580 |
-
|
581 |
-
try:
|
582 |
-
shutil.copy2(downloaded_temp, final_path)
|
583 |
-
saved = True
|
584 |
-
except Exception as e:
|
585 |
-
print(f"Copy warning: {e}")
|
586 |
-
saved = False
|
587 |
-
final_path = "File kept only in temp folder"
|
588 |
-
|
589 |
-
progress(1.0, desc="✅ Download complete!")
|
590 |
-
|
591 |
-
msg = (
|
592 |
-
"✅ Download successful!\n"
|
593 |
-
f"📁 Temp file: {os.path.basename(downloaded_temp)}\n"
|
594 |
-
f"📁 Saved to: {final_path if saved else 'Copy failed'}\n"
|
595 |
-
f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
|
596 |
-
)
|
597 |
-
return downloaded_temp, msg
|
598 |
-
|
599 |
-
except Exception as e:
|
600 |
-
return None, f"❌ Download failed: {e}"
|
601 |
-
|
602 |
-
|
603 |
-
# =================================================================
|
604 |
-
# Helper functions for Gradio
|
605 |
-
# =================================================================
|
606 |
-
downloader = YouTubeDownloader()
|
607 |
-
|
608 |
-
|
609 |
-
def configure_api_key(api_key):
|
610 |
-
if not api_key or not api_key.strip():
|
611 |
-
return "❌ Please enter a valid Google API key", gr.update(visible=False)
|
612 |
-
ok, msg = downloader.configure_gemini(api_key.strip())
|
613 |
-
return msg, gr.update(visible=ok)
|
614 |
-
|
615 |
-
|
616 |
-
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
|
617 |
-
try:
|
618 |
-
progress(0.05, desc="Starting analysis…")
|
619 |
-
cookiefile = cookies_file if cookies_file else None
|
620 |
-
info, msg = downloader.get_video_info(
|
621 |
-
url, progress=progress, cookiefile=cookiefile
|
622 |
-
)
|
623 |
-
if info:
|
624 |
-
progress(0.95, desc="Generating report…")
|
625 |
-
return downloader.format_video_info(info)
|
626 |
-
return f"❌ Analysis Failed: {msg}"
|
627 |
-
except Exception as e:
|
628 |
-
return f"❌ System Error: {e}"
|
629 |
-
|
630 |
-
|
631 |
-
def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
|
632 |
-
try:
|
633 |
-
progress(0.05, desc="Preparing download…")
|
634 |
-
cookiefile = cookies_file if cookies_file else None
|
635 |
-
file_path, status = downloader.download_video(
|
636 |
-
url, quality, audio_only, progress=progress, cookiefile=cookiefile
|
637 |
-
)
|
638 |
-
return (file_path, status) if file_path else (None, status)
|
639 |
-
except Exception as e:
|
640 |
-
return None, f"❌ System Error: {e}"
|
641 |
-
|
642 |
-
|
643 |
-
# =================================================================
|
644 |
-
# Gradio UI
|
645 |
-
# =================================================================
|
646 |
-
#!/usr/bin/env python3
|
647 |
-
"""
|
648 |
-
YouTube Video Analyzer & Downloader Pro
|
649 |
-
───────────────────────────────────────
|
650 |
-
• `www.youtube.com_cookies.txt` 가 app.py 와 같은 폴더에 있으면 자동으로 사용
|
651 |
-
• UI에서 쿠키를 업로드하면 그 파일이 *우선* 적용
|
652 |
-
• “Transcript” 탭을 추가해 **전체 자막 + MM:SS 타임스탬프** 출력
|
653 |
-
"""
|
654 |
-
|
655 |
-
# ── 표준 라이브러리 ───────────────────────────────────────────
|
656 |
-
import os, re, json, shutil, tempfile
|
657 |
-
from datetime import datetime, timedelta
|
658 |
-
from pathlib import Path
|
659 |
-
|
660 |
-
# ── 서드파티 ──────────────────────────────────────────────────
|
661 |
-
import gradio as gr
|
662 |
-
import yt_dlp
|
663 |
-
import google.generativeai as genai
|
664 |
-
from youtube_transcript_api import YouTubeTranscriptApi # NEW
|
665 |
-
|
666 |
-
# ── 상수 ──────────────────────────────────────────────────────
|
667 |
-
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
|
668 |
-
|
669 |
-
# YouTube URL 정규식(캡처 그룹 6이 영상 ID)
|
670 |
-
_YT_REGEX = re.compile(
|
671 |
-
r"(https?://)?(www\.)?"
|
672 |
-
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
|
673 |
-
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
|
674 |
-
)
|
675 |
-
|
676 |
-
|
677 |
-
# =================================================================
|
678 |
-
# Helper : video-ID 추출 + 자막 가져오기
|
679 |
-
# =================================================================
|
680 |
-
def extract_video_id(url: str) -> str | None:
|
681 |
-
"""유튜브 URL에서 11-글자 비디오 ID 반환(없으면 None)"""
|
682 |
-
m = _YT_REGEX.match(url)
|
683 |
-
return m.group(6) if m else None
|
684 |
-
|
685 |
-
|
686 |
-
def fetch_transcript(video_id: str, pref_lang=("ko", "en")) -> str:
|
687 |
-
"""
|
688 |
-
유튜브 자막을 가져와
|
689 |
-
**[MM:SS]** line 형식으로 결합한 뒤 문자열로 반환.
|
690 |
-
"""
|
691 |
-
transcript = None
|
692 |
-
# 언어 우선순위대로 시도
|
693 |
-
for lang in pref_lang:
|
694 |
-
try:
|
695 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
|
696 |
-
break
|
697 |
-
except Exception:
|
698 |
-
continue
|
699 |
-
# 그래도 실패하면 임의 언어
|
700 |
-
if transcript is None:
|
701 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
702 |
-
|
703 |
-
lines = []
|
704 |
-
for seg in transcript:
|
705 |
-
t = str(timedelta(seconds=int(seg["start"]))) # H:MM:SS
|
706 |
-
t_mmss = ":".join(t.split(":")[-2:]) # MM:SS
|
707 |
-
lines.append(f"**[{t_mmss}]** {seg['text']}")
|
708 |
-
return "\n".join(lines)
|
709 |
-
|
710 |
-
|
711 |
-
# =================================================================
|
712 |
-
# 메인 클래스
|
713 |
-
# =================================================================
|
714 |
-
class YouTubeDownloader:
|
715 |
-
def __init__(self):
|
716 |
-
self.download_dir = tempfile.mkdtemp()
|
717 |
-
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
|
718 |
-
self.downloads_folder = os.path.join(
|
719 |
-
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
|
720 |
-
)
|
721 |
-
os.makedirs(self.downloads_folder, exist_ok=True)
|
722 |
-
self.gemini_model = None
|
723 |
-
|
724 |
-
# ───────── Gemini ─────────
|
725 |
-
def configure_gemini(self, api_key):
|
726 |
-
try:
|
727 |
-
genai.configure(api_key=api_key)
|
728 |
-
self.gemini_model = genai.GenerativeModel(
|
729 |
-
model_name="gemini-1.5-flash-latest"
|
730 |
-
)
|
731 |
-
return True, "✅ Gemini API configured successfully!"
|
732 |
-
except Exception as e:
|
733 |
-
return False, f"❌ Failed to configure Gemini API: {e}"
|
734 |
-
|
735 |
-
# ───────── 정리 ────���────
|
736 |
-
def cleanup(self):
|
737 |
-
try:
|
738 |
-
if os.path.exists(self.download_dir):
|
739 |
-
shutil.rmtree(self.download_dir)
|
740 |
-
if os.path.exists(self.temp_downloads):
|
741 |
-
shutil.rmtree(self.temp_downloads)
|
742 |
-
except Exception:
|
743 |
-
pass
|
744 |
-
|
745 |
-
# ───────── URL 검증 ──────
|
746 |
-
def is_valid_youtube_url(self, url):
|
747 |
-
return _YT_REGEX.match(url) is not None
|
748 |
-
|
749 |
-
# ───────── Gemini scene breakdown (생략 없이 전체 구현) ──────
|
750 |
-
def generate_scene_breakdown_gemini(self, video_info):
|
751 |
-
if not self.gemini_model:
|
752 |
-
return self.generate_scene_breakdown_fallback(video_info)
|
753 |
-
try:
|
754 |
-
duration = video_info.get("duration", 0)
|
755 |
-
title = video_info.get("title", "")
|
756 |
-
description = video_info.get("description", "")[:1500]
|
757 |
-
if not duration:
|
758 |
-
return [
|
759 |
-
"**[Duration Unknown]**: Unable to generate timestamped breakdown – "
|
760 |
-
"video duration not available"
|
761 |
-
]
|
762 |
-
|
763 |
-
prompt = f"""
|
764 |
-
Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
|
765 |
-
with precise timestamps and specific descriptions:
|
766 |
-
|
767 |
-
Title: {title}
|
768 |
-
Duration: {duration} seconds
|
769 |
-
Description: {description}
|
770 |
-
|
771 |
-
IMPORTANT INSTRUCTIONS:
|
772 |
-
1. Create detailed scene descriptions that include:
|
773 |
-
- Physical appearance of people (age, gender, clothing, hair, etc.)
|
774 |
-
- Exact actions being performed
|
775 |
-
- Dialogue or speech (include actual lines if audible, or infer probable spoken
|
776 |
-
lines based on actions and setting; format them as "Character: line…")
|
777 |
-
- Setting and environment details
|
778 |
-
- Props, objects, or products being shown
|
779 |
-
- Visual effects, text overlays, or graphics
|
780 |
-
- Mood, tone, and atmosphere
|
781 |
-
- Camera movements or angles (if apparent)
|
782 |
-
2. Dialogue Emphasis:
|
783 |
-
- Include short dialogue lines in **every scene** wherever plausible.
|
784 |
-
- Write lines like: Character: "Actual or inferred line…"
|
785 |
-
- If dialogue is not available, intelligently infer probable phrases
|
786 |
-
3. Timestamp Guidelines:
|
787 |
-
- <1 min : 2–3 s | 1–5 min : 3–5 s | 5–15 min : 5–10 s | >15 min : 10–15 s
|
788 |
-
- Max 20 scenes
|
789 |
-
4. Format: **[MM:SS-MM:SS]** description
|
790 |
-
"""
|
791 |
-
resp = self.gemini_model.generate_content(prompt)
|
792 |
-
if not resp or not resp.text:
|
793 |
-
return self.generate_scene_breakdown_fallback(video_info)
|
794 |
-
|
795 |
-
scenes, cur = [], ""
|
796 |
-
for line in resp.text.splitlines():
|
797 |
-
line = line.strip()
|
798 |
-
if line.startswith("**[") and "]**:" in line:
|
799 |
-
if cur:
|
800 |
-
scenes.append(cur.strip())
|
801 |
-
cur = line
|
802 |
-
elif cur:
|
803 |
-
cur += "\n" + line
|
804 |
-
if cur:
|
805 |
-
scenes.append(cur.strip())
|
806 |
-
return scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
|
807 |
-
except Exception:
|
808 |
-
return self.generate_scene_breakdown_fallback(video_info)
|
809 |
-
|
810 |
-
# ───────── fallback breakdown ──────
|
811 |
-
def generate_scene_breakdown_fallback(self, video_info):
|
812 |
-
duration = video_info.get("duration", 0)
|
813 |
-
if not duration:
|
814 |
-
return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
|
815 |
-
|
816 |
-
if duration <= 60:
|
817 |
-
seg = 3
|
818 |
-
elif duration <= 300:
|
819 |
-
seg = 5
|
820 |
-
elif duration <= 900:
|
821 |
-
seg = 10
|
822 |
-
else:
|
823 |
-
seg = 15
|
824 |
-
|
825 |
-
total = min(duration // seg + 1, 20)
|
826 |
-
vtype = self.detect_video_type_detailed(
|
827 |
-
video_info.get("title", ""), video_info.get("description", "")
|
828 |
-
)
|
829 |
-
scenes = []
|
830 |
-
for i in range(total):
|
831 |
-
s, e = i * seg, min(i * seg + seg - 1, duration)
|
832 |
-
scenes.append(
|
833 |
-
f"**[{s//60:02d}:{s%60:02d}-{e//60:02d}:{e%60:02d}]**: "
|
834 |
-
f"{self.generate_contextual_description(i, total, vtype, '', video_info.get('title',''))}"
|
835 |
-
)
|
836 |
-
return scenes
|
837 |
-
|
838 |
-
# ───────── detect helpers (상세) ──────
|
839 |
-
def detect_video_type_detailed(self, title, desc):
|
840 |
-
t = (title + " " + desc).lower()
|
841 |
-
if any(x in t for x in ["tutorial", "how to", "guide", "diy"]):
|
842 |
-
return "tutorial"
|
843 |
-
if any(x in t for x in ["review", "unboxing", "comparison"]):
|
844 |
-
return "review"
|
845 |
-
if any(x in t for x in ["vlog", "daily", "routine"]):
|
846 |
-
return "vlog"
|
847 |
-
if any(x in t for x in ["music", "song", "cover"]):
|
848 |
-
return "music"
|
849 |
-
if any(x in t for x in ["comedy", "prank", "challenge"]):
|
850 |
-
return "entertainment"
|
851 |
-
if any(x in t for x in ["news", "update", "report"]):
|
852 |
-
return "news"
|
853 |
-
if any(x in t for x in ["cooking", "recipe", "food"]):
|
854 |
-
return "cooking"
|
855 |
-
if any(x in t for x in ["workout", "fitness", "yoga"]):
|
856 |
-
return "fitness"
|
857 |
-
return "general"
|
858 |
-
|
859 |
-
def generate_contextual_description(
|
860 |
-
self, idx, total, vtype, uploader, title
|
861 |
-
):
|
862 |
-
if idx == 0:
|
863 |
-
return "The creator greets viewers and introduces the video."
|
864 |
-
if idx == total - 1:
|
865 |
-
return "The creator wraps up and thanks viewers."
|
866 |
-
return "Content continues according to the video type."
|
867 |
-
|
868 |
-
# ───────── quick-detect helpers (요약) ──────
|
869 |
-
def detect_video_type(self, title, desc):
|
870 |
-
t = (title + " " + desc).lower()
|
871 |
-
if any(x in t for x in ["music", "song", "album"]):
|
872 |
-
return "🎵 Music"
|
873 |
-
if any(x in t for x in ["tutorial", "guide"]):
|
874 |
-
return "📚 Tutorial"
|
875 |
-
if any(x in t for x in ["comedy", "vlog"]):
|
876 |
-
return "🎭 Entertainment"
|
877 |
-
if any(x in t for x in ["news", "report"]):
|
878 |
-
return "📰 News"
|
879 |
-
if any(x in t for x in ["review", "unboxing"]):
|
880 |
-
return "⭐ Review"
|
881 |
-
return "🎬 General"
|
882 |
-
|
883 |
-
def detect_background_music(self, video_info):
|
884 |
-
title = video_info.get("title", "").lower()
|
885 |
-
if "music" in title:
|
886 |
-
return "🎵 Original music"
|
887 |
-
if "tutorial" in title:
|
888 |
-
return "🔇 Minimal music"
|
889 |
-
return "🎼 Background music"
|
890 |
-
|
891 |
-
def detect_influencer_status(self, video_info):
|
892 |
-
subs = video_info.get("channel_followers", 0)
|
893 |
-
if subs > 10_000_000:
|
894 |
-
return "🌟 Mega (10 M+)"
|
895 |
-
if subs > 1_000_000:
|
896 |
-
return "⭐ Major (1 M+)"
|
897 |
-
if subs > 100_000:
|
898 |
-
return "🎯 Mid (100 K+)"
|
899 |
-
return "👤"
|
900 |
-
|
901 |
-
@staticmethod
|
902 |
-
def format_number(n):
|
903 |
-
if n >= 1_000_000:
|
904 |
-
return f"{n/1_000_000:.1f} M"
|
905 |
-
if n >= 1_000:
|
906 |
-
return f"{n/1_000:.1f} K"
|
907 |
-
return str(n)
|
908 |
-
|
909 |
-
# ───────── 리포트 ──────
|
910 |
-
def format_video_info(self, info):
|
911 |
-
title = info.get("title", "")
|
912 |
-
uploader = info.get("uploader", "")
|
913 |
-
duration = info.get("duration", 0)
|
914 |
-
dur = f"{duration//60}:{duration%60:02d}"
|
915 |
-
views = info.get("view_count", 0)
|
916 |
-
likes = info.get("like_count", 0)
|
917 |
-
comments = info.get("comment_count", 0)
|
918 |
-
scenes = self.generate_scene_breakdown_gemini(info)
|
919 |
-
|
920 |
-
return f"""
|
921 |
-
🎬 **{title}**
|
922 |
-
Uploader: {uploader} Duration: {dur}
|
923 |
-
|
924 |
-
Views / Likes / Comments: {self.format_number(views)} / {self.format_number(likes)} / {self.format_number(comments)}
|
925 |
-
|
926 |
-
{'-'*48}
|
927 |
-
{"".join(scenes)}
|
928 |
-
"""
|
929 |
-
|
930 |
-
# ───────── 메타데이터 추출 ──────
|
931 |
-
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
|
932 |
-
if not self.is_valid_youtube_url(url):
|
933 |
-
return None, "❌ Invalid URL"
|
934 |
-
|
935 |
-
if cookiefile and os.path.exists(cookiefile):
|
936 |
-
cookiefile = cookiefile
|
937 |
-
elif DEFAULT_COOKIE_FILE.exists():
|
938 |
-
cookiefile = str(DEFAULT_COOKIE_FILE)
|
939 |
-
else:
|
940 |
-
cookiefile = None
|
941 |
-
|
942 |
-
try:
|
943 |
-
ydl_opts = {"noplaylist": True, "quiet": True}
|
944 |
-
if cookiefile:
|
945 |
-
ydl_opts["cookiefile"] = cookiefile
|
946 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
947 |
-
info = ydl.extract_info(url, download=False)
|
948 |
-
return info, "OK"
|
949 |
-
except Exception as e:
|
950 |
-
return None, f"yt-dlp error: {e}"
|
951 |
-
|
952 |
-
# ───────── 다운로드 ──────
|
953 |
-
def download_video(
|
954 |
-
self, url, quality="best", audio_only=False, progress=gr.Progress(), cookiefile=None
|
955 |
-
):
|
956 |
-
if not self.is_valid_youtube_url(url):
|
957 |
-
return None, "❌ Invalid URL"
|
958 |
-
if cookiefile and os.path.exists(cookiefile):
|
959 |
-
cookiefile = cookiefile
|
960 |
-
elif DEFAULT_COOKIE_FILE.exists():
|
961 |
-
cookiefile = str(DEFAULT_COOKIE_FILE)
|
962 |
-
else:
|
963 |
-
cookiefile = None
|
964 |
-
|
965 |
-
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
966 |
-
ydl_opts = {
|
967 |
-
"outtmpl": os.path.join(self.temp_downloads, f"%(title)s_{ts}.%(ext)s"),
|
968 |
-
"noplaylist": True,
|
969 |
-
}
|
970 |
-
if audio_only:
|
971 |
-
ydl_opts["format"] = "bestaudio/best"
|
972 |
-
ydl_opts["postprocessors"] = [
|
973 |
-
{"key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192"}
|
974 |
-
]
|
975 |
-
else:
|
976 |
-
if quality == "720p":
|
977 |
-
ydl_opts["format"] = "best[height<=720]"
|
978 |
-
elif quality == "480p":
|
979 |
-
ydl_opts["format"] = "best[height<=480]"
|
980 |
-
else:
|
981 |
-
ydl_opts["format"] = "best[height<=1080]"
|
982 |
-
if cookiefile:
|
983 |
-
ydl_opts["cookiefile"] = cookiefile
|
984 |
-
|
985 |
-
try:
|
986 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
987 |
-
ydl.extract_info(url, download=True)
|
988 |
-
# 첫 파일 찾기
|
989 |
-
for f in os.listdir(self.temp_downloads):
|
990 |
-
if ts in f:
|
991 |
-
temp_fp = os.path.join(self.temp_downloads, f)
|
992 |
-
final_fp = os.path.join(self.downloads_folder, f)
|
993 |
-
try:
|
994 |
-
shutil.copy2(temp_fp, final_fp)
|
995 |
-
saved = final_fp
|
996 |
-
except Exception:
|
997 |
-
saved = temp_fp
|
998 |
-
return temp_fp, f"✅ Saved: {saved}"
|
999 |
-
return None, "❌ Downloaded file not found"
|
1000 |
-
except Exception as e:
|
1001 |
-
return None, f"❌ Download failed: {e}"
|
1002 |
-
|
1003 |
-
|
1004 |
-
# =================================================================
|
1005 |
-
# Gradio Helper 함수
|
1006 |
-
# =================================================================
|
1007 |
-
downloader = YouTubeDownloader()
|
1008 |
-
|
1009 |
-
|
1010 |
-
def configure_api_key(api_key):
|
1011 |
-
ok, msg = downloader.configure_gemini(api_key.strip()) if api_key else (False, "❌ API key required")
|
1012 |
-
return msg, gr.update(visible=ok)
|
1013 |
-
|
1014 |
-
|
1015 |
-
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
|
1016 |
-
info, err = downloader.get_video_info(url, progress, cookies_file)
|
1017 |
-
return downloader.format_video_info(info) if info else f"❌ {err}"
|
1018 |
-
|
1019 |
-
|
1020 |
-
def download_with_cookies(url, qual, audio, cookies_file, progress=gr.Progress()):
|
1021 |
-
fp, msg = downloader.download_video(url, qual, audio, progress, cookies_file)
|
1022 |
-
return fp, msg
|
1023 |
-
|
1024 |
-
|
1025 |
-
def get_transcript(url, cookies_file):
|
1026 |
-
vid = extract_video_id(url)
|
1027 |
-
if not vid:
|
1028 |
-
return "❌ Invalid YouTube URL"
|
1029 |
-
try:
|
1030 |
-
return fetch_transcript(vid)
|
1031 |
-
except Exception as e:
|
1032 |
-
return f"❌ {e}"
|
1033 |
-
|
1034 |
-
|
1035 |
-
# =================================================================
|
1036 |
-
# UI
|
1037 |
-
# =================================================================
|
1038 |
-
def create_interface():
|
1039 |
-
with gr.Blocks(
|
1040 |
-
theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro"
|
1041 |
-
) as iface:
|
1042 |
-
gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>")
|
1043 |
-
|
1044 |
-
# API 설정
|
1045 |
-
with gr.Group():
|
1046 |
-
gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>")
|
1047 |
-
with gr.Row():
|
1048 |
-
api_key_in = gr.Textbox(
|
1049 |
-
label="🔑 Google API Key", type="password", placeholder="Paste your Google API key…"
|
1050 |
-
)
|
1051 |
-
api_btn = gr.Button("🔧 Configure API", variant="secondary")
|
1052 |
-
api_status = gr.Textbox(
|
1053 |
-
label="API Status",
|
1054 |
-
value="❌ Gemini API not configured – Using fallback analysis",
|
1055 |
-
interactive=False,
|
1056 |
-
lines=1,
|
1057 |
-
)
|
1058 |
-
|
1059 |
-
# 공통 입력
|
1060 |
-
with gr.Row():
|
1061 |
-
url_in = gr.Textbox(label="🔗 YouTube URL", placeholder="Paste YouTube video URL…")
|
1062 |
-
cookies_in = gr.File(
|
1063 |
-
label="🍪 Upload cookies.txt (optional)", file_types=[".txt"], type="filepath"
|
1064 |
-
)
|
1065 |
-
|
1066 |
-
with gr.Tabs():
|
1067 |
-
# 분석 탭
|
1068 |
-
with gr.TabItem("📊 Video Analysis"):
|
1069 |
-
analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
|
1070 |
-
analysis_out = gr.Textbox(label="📊 Analysis Report", lines=30, show_copy_button=True)
|
1071 |
-
analyze_btn.click(
|
1072 |
-
analyze_with_cookies, inputs=[url_in, cookies_in], outputs=analysis_out, show_progress=True
|
1073 |
-
)
|
1074 |
-
# 다운로드 탭
|
1075 |
-
with gr.TabItem("⬇️ Video Download"):
|
1076 |
-
with gr.Row():
|
1077 |
-
quality_dd = gr.Dropdown(
|
1078 |
-
choices=["best", "720p", "480p"], value="best", label="📺 Quality"
|
1079 |
-
)
|
1080 |
-
audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)")
|
1081 |
-
download_btn = gr.Button("⬇️ Download Video", variant="primary")
|
1082 |
-
dl_status = gr.Textbox(label="📥 Download Status", lines=5, show_copy_button=True)
|
1083 |
-
dl_file = gr.File(label="📁 Downloaded File", visible=False)
|
1084 |
-
|
1085 |
-
def wrapped_dl(u, q, a, c, prog=gr.Progress()):
|
1086 |
-
fp, st = download_with_cookies(u, q, a, c, prog)
|
1087 |
-
return (st, gr.update(value=fp, visible=True)) if fp and os.path.exists(fp) else (
|
1088 |
-
st,
|
1089 |
-
gr.update(visible=False),
|
1090 |
-
)
|
1091 |
-
|
1092 |
-
download_btn.click(
|
1093 |
-
wrapped_dl,
|
1094 |
-
inputs=[url_in, quality_dd, audio_cb, cookies_in],
|
1095 |
-
outputs=[dl_status, dl_file],
|
1096 |
-
show_progress=True,
|
1097 |
-
)
|
1098 |
-
# 자막 탭 NEW
|
1099 |
-
with gr.TabItem("🗒️ Transcript"):
|
1100 |
-
tr_btn = gr.Button("📜 Get Full Transcript", variant="primary")
|
1101 |
-
tr_out = gr.Textbox(
|
1102 |
-
label="🗒️ Transcript (full)", lines=30, show_copy_button=True
|
1103 |
-
)
|
1104 |
-
tr_btn.click(
|
1105 |
-
get_transcript, inputs=[url_in, cookies_in], outputs=tr_out, show_progress=True
|
1106 |
-
)
|
1107 |
-
|
1108 |
-
# API 버튼
|
1109 |
-
api_btn.click(configure_api_key, inputs=[api_key_in], outputs=[api_status])
|
1110 |
-
|
1111 |
-
gr.HTML(
|
1112 |
-
"""
|
1113 |
-
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
|
1114 |
-
<h3>💡 Tip: 쿠키 파일 자동 사용</h3>
|
1115 |
-
<p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은
|
1116 |
-
폴더에 두면 업로드 없이 자동 사용됩니다.</p>
|
1117 |
-
</div>
|
1118 |
-
"""
|
1119 |
-
)
|
1120 |
-
return iface
|
1121 |
-
|
1122 |
-
|
1123 |
-
# =================================================================
|
1124 |
-
# 실행
|
1125 |
-
# =================================================================
|
1126 |
-
if __name__ == "__main__":
|
1127 |
-
demo = create_interface()
|
1128 |
-
import atexit
|
1129 |
-
|
1130 |
-
atexit.register(downloader.cleanup)
|
1131 |
-
demo.launch(debug=True, show_error=True)
|
1132 |
-
#!/usr/bin/env python3
|
1133 |
-
"""
|
1134 |
-
YouTube Video Analyzer & Downloader Pro
|
1135 |
───────────────────────────────────────
|
1136 |
• `www.youtube.com_cookies.txt` 가 app.py 와 같은 폴더에 있으면 자동으로 사용
|
1137 |
• UI에서 쿠키를 업로드하면 그 파일이 *우선* 적용
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
YouTube Video Analyzer & Downloader Pro
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
───────────────────────────────────────
|
5 |
• `www.youtube.com_cookies.txt` 가 app.py 와 같은 폴더에 있으면 자동으로 사용
|
6 |
• UI에서 쿠키를 업로드하면 그 파일이 *우선* 적용
|