Update app.py
Browse files
app.py
CHANGED
@@ -1,27 +1,37 @@
|
|
1 |
#!/usr/bin/env python3
|
|
|
2 |
"""
|
3 |
YouTube Video Analyzer & Downloader Pro
|
4 |
βββββββββββββββββββββββββββββββββββββββ
|
5 |
-
β’ `www.youtube.com_cookies.txt`
|
6 |
β’ UIμμ μΏ ν€λ₯Ό μ
λ‘λνλ©΄ κ·Έ νμΌμ΄ *μ°μ * μ μ©
|
7 |
-
β’ βTranscriptβ
|
8 |
"""
|
9 |
|
10 |
-
#
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
12 |
from datetime import datetime, timedelta
|
13 |
from pathlib import Path
|
14 |
|
15 |
-
#
|
|
|
|
|
16 |
import gradio as gr
|
17 |
import yt_dlp
|
18 |
import google.generativeai as genai
|
19 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
20 |
|
21 |
-
#
|
|
|
|
|
22 |
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
|
23 |
|
24 |
-
#
|
25 |
_YT_REGEX = re.compile(
|
26 |
r"(https?://)?(www\.)?"
|
27 |
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
|
@@ -29,128 +39,104 @@ _YT_REGEX = re.compile(
|
|
29 |
)
|
30 |
|
31 |
|
32 |
-
#
|
33 |
-
# Helper :
|
34 |
-
#
|
35 |
def extract_video_id(url: str) -> str | None:
|
36 |
-
"""μ νλΈ URLμμ 11-κΈμ λΉλμ€ ID λ°ν(μμΌλ©΄ None)"""
|
37 |
m = _YT_REGEX.match(url)
|
38 |
return m.group(6) if m else None
|
39 |
|
40 |
|
41 |
def fetch_transcript(video_id: str, pref_lang=("ko", "en")) -> str:
|
42 |
-
"""
|
43 |
-
μ νλΈ μλ§μ κ°μ Έμ
|
44 |
-
**[MM:SS]** line νμμΌλ‘ κ²°ν©ν λ€ λ¬Έμμ΄λ‘ λ°ν.
|
45 |
-
"""
|
46 |
-
transcript = None
|
47 |
# μΈμ΄ μ°μ μμλλ‘ μλ
|
|
|
48 |
for lang in pref_lang:
|
49 |
try:
|
50 |
-
|
51 |
break
|
52 |
except Exception:
|
53 |
continue
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
t_mmss = ":".join(t.split(":")[-2:]) # MM:SS
|
62 |
lines.append(f"**[{t_mmss}]** {seg['text']}")
|
63 |
return "\n".join(lines)
|
64 |
|
65 |
|
66 |
-
#
|
67 |
# λ©μΈ ν΄λμ€
|
68 |
-
#
|
69 |
class YouTubeDownloader:
|
70 |
def __init__(self):
|
|
|
71 |
self.download_dir = tempfile.mkdtemp()
|
72 |
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
|
|
|
73 |
self.downloads_folder = os.path.join(
|
74 |
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
|
75 |
)
|
76 |
os.makedirs(self.downloads_folder, exist_ok=True)
|
77 |
-
self.gemini_model = None
|
78 |
|
79 |
-
|
80 |
-
|
|
|
|
|
81 |
try:
|
82 |
genai.configure(api_key=api_key)
|
83 |
-
self.gemini_model = genai.GenerativeModel(
|
84 |
-
model_name="gemini-1.5-flash-latest"
|
85 |
-
)
|
86 |
return True, "β
Gemini API configured successfully!"
|
87 |
except Exception as e:
|
88 |
return False, f"β Failed to configure Gemini API: {e}"
|
89 |
|
90 |
# βββββββββ μ 리 βββββββββ
|
91 |
def cleanup(self):
|
92 |
-
|
93 |
-
|
94 |
-
shutil.rmtree(
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
# βββββββββ URL κ²μ¦ ββββββ
|
101 |
-
def is_valid_youtube_url(self, url):
|
102 |
return _YT_REGEX.match(url) is not None
|
103 |
|
104 |
-
# βββββββββ
|
105 |
-
def generate_scene_breakdown_gemini(self,
|
106 |
if not self.gemini_model:
|
107 |
-
return self.generate_scene_breakdown_fallback(
|
|
|
108 |
try:
|
109 |
-
duration =
|
110 |
-
title =
|
111 |
-
description =
|
112 |
-
if not duration:
|
113 |
-
return [
|
114 |
-
"**[Duration Unknown]**: Unable to generate timestamped breakdown β "
|
115 |
-
"video duration not available"
|
116 |
-
]
|
117 |
|
118 |
prompt = f"""
|
119 |
Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
|
120 |
-
with precise timestamps and specific descriptions:
|
121 |
-
|
122 |
-
Title: {title}
|
123 |
-
Duration: {duration}
|
124 |
-
Description: {description}
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
- Setting and environment details
|
133 |
-
- Props, objects, or products being shown
|
134 |
-
- Visual effects, text overlays, or graphics
|
135 |
-
- Mood, tone, and atmosphere
|
136 |
-
- Camera movements or angles (if apparent)
|
137 |
-
2. Dialogue Emphasis:
|
138 |
-
- Include short dialogue lines in **every scene** wherever plausible.
|
139 |
-
- Write lines like: Character: "Actual or inferred lineβ¦"
|
140 |
-
- If dialogue is not available, intelligently infer probable phrases
|
141 |
-
3. Timestamp Guidelines:
|
142 |
-
- <1 min : 2β3 sβ| 1β5 min : 3β5 s | 5β15 min : 5β10 s | >15 min : 10β15 s
|
143 |
-
- Max 20 scenes
|
144 |
-
4. Format: **[MM:SS-MM:SS]** description
|
145 |
"""
|
146 |
resp = self.gemini_model.generate_content(prompt)
|
147 |
if not resp or not resp.text:
|
148 |
-
return self.generate_scene_breakdown_fallback(
|
149 |
|
150 |
scenes, cur = [], ""
|
151 |
for line in resp.text.splitlines():
|
152 |
line = line.strip()
|
153 |
-
if line.startswith("**[") and "]
|
154 |
if cur:
|
155 |
scenes.append(cur.strip())
|
156 |
cur = line
|
@@ -158,170 +144,93 @@ IMPORTANT INSTRUCTIONS:
|
|
158 |
cur += "\n" + line
|
159 |
if cur:
|
160 |
scenes.append(cur.strip())
|
161 |
-
return scenes
|
|
|
162 |
except Exception:
|
163 |
-
return self.generate_scene_breakdown_fallback(
|
164 |
|
165 |
-
# βββββββββ
|
166 |
-
def generate_scene_breakdown_fallback(self,
|
167 |
-
duration =
|
168 |
if not duration:
|
169 |
-
return ["**[
|
170 |
-
|
171 |
-
if duration <= 60:
|
172 |
-
seg = 3
|
173 |
-
elif duration <= 300:
|
174 |
-
seg = 5
|
175 |
-
elif duration <= 900:
|
176 |
-
seg = 10
|
177 |
-
else:
|
178 |
-
seg = 15
|
179 |
|
|
|
180 |
total = min(duration // seg + 1, 20)
|
181 |
-
|
182 |
-
video_info.get("title", ""), video_info.get("description", "")
|
183 |
-
)
|
184 |
-
scenes = []
|
185 |
for i in range(total):
|
186 |
s, e = i * seg, min(i * seg + seg - 1, duration)
|
187 |
scenes.append(
|
188 |
-
f"**[{s//60:02d}:{s%60:02d}-{e//60:02d}:{e%60:02d}]
|
189 |
-
|
190 |
)
|
191 |
return scenes
|
192 |
|
193 |
-
# βββββββββ
|
194 |
-
def detect_video_type_detailed(self, title, desc):
|
195 |
-
t = (title + " " + desc).lower()
|
196 |
-
if any(x in t for x in ["tutorial", "how to", "guide", "diy"]):
|
197 |
-
return "tutorial"
|
198 |
-
if any(x in t for x in ["review", "unboxing", "comparison"]):
|
199 |
-
return "review"
|
200 |
-
if any(x in t for x in ["vlog", "daily", "routine"]):
|
201 |
-
return "vlog"
|
202 |
-
if any(x in t for x in ["music", "song", "cover"]):
|
203 |
-
return "music"
|
204 |
-
if any(x in t for x in ["comedy", "prank", "challenge"]):
|
205 |
-
return "entertainment"
|
206 |
-
if any(x in t for x in ["news", "update", "report"]):
|
207 |
-
return "news"
|
208 |
-
if any(x in t for x in ["cooking", "recipe", "food"]):
|
209 |
-
return "cooking"
|
210 |
-
if any(x in t for x in ["workout", "fitness", "yoga"]):
|
211 |
-
return "fitness"
|
212 |
-
return "general"
|
213 |
-
|
214 |
-
def generate_contextual_description(
|
215 |
-
self, idx, total, vtype, uploader, title
|
216 |
-
):
|
217 |
-
if idx == 0:
|
218 |
-
return "The creator greets viewers and introduces the video."
|
219 |
-
if idx == total - 1:
|
220 |
-
return "The creator wraps up and thanks viewers."
|
221 |
-
return "Content continues according to the video type."
|
222 |
-
|
223 |
-
# βββββββββ quick-detect helpers (μμ½) ββββββ
|
224 |
-
def detect_video_type(self, title, desc):
|
225 |
-
t = (title + " " + desc).lower()
|
226 |
-
if any(x in t for x in ["music", "song", "album"]):
|
227 |
-
return "π΅ Music"
|
228 |
-
if any(x in t for x in ["tutorial", "guide"]):
|
229 |
-
return "π Tutorial"
|
230 |
-
if any(x in t for x in ["comedy", "vlog"]):
|
231 |
-
return "π Entertainment"
|
232 |
-
if any(x in t for x in ["news", "report"]):
|
233 |
-
return "π° News"
|
234 |
-
if any(x in t for x in ["review", "unboxing"]):
|
235 |
-
return "β Review"
|
236 |
-
return "π¬ General"
|
237 |
-
|
238 |
-
def detect_background_music(self, video_info):
|
239 |
-
title = video_info.get("title", "").lower()
|
240 |
-
if "music" in title:
|
241 |
-
return "π΅ Original music"
|
242 |
-
if "tutorial" in title:
|
243 |
-
return "π Minimal music"
|
244 |
-
return "πΌ Background music"
|
245 |
-
|
246 |
-
def detect_influencer_status(self, video_info):
|
247 |
-
subs = video_info.get("channel_followers", 0)
|
248 |
-
if subs > 10_000_000:
|
249 |
-
return "π Mega (10 M+)"
|
250 |
-
if subs > 1_000_000:
|
251 |
-
return "β Major (1 M+)"
|
252 |
-
if subs > 100_000:
|
253 |
-
return "π― Mid (100 K+)"
|
254 |
-
return "π€"
|
255 |
-
|
256 |
@staticmethod
|
257 |
-
def
|
258 |
if n >= 1_000_000:
|
259 |
return f"{n/1_000_000:.1f} M"
|
260 |
if n >= 1_000:
|
261 |
return f"{n/1_000:.1f} K"
|
262 |
return str(n)
|
263 |
|
264 |
-
#
|
265 |
-
def format_video_info(self, info):
|
266 |
-
title = info.get("title", "")
|
267 |
-
uploader = info.get("uploader", "")
|
268 |
duration = info.get("duration", 0)
|
269 |
dur = f"{duration//60}:{duration%60:02d}"
|
270 |
-
views = info.get("view_count", 0)
|
271 |
-
likes = info.get("like_count", 0)
|
272 |
-
comments = info.get("comment_count", 0)
|
273 |
-
scenes = self.generate_scene_breakdown_gemini(info)
|
274 |
|
275 |
-
return f"""
|
276 |
π¬ **{title}**
|
277 |
-
Uploader: {uploader}
|
278 |
-
|
279 |
-
Views / Likes / Comments: {self.format_number(views)} / {self.format_number(likes)} / {self.format_number(comments)}
|
280 |
|
281 |
{'-'*48}
|
282 |
-
{
|
283 |
"""
|
284 |
|
285 |
-
# βββββββββ λ©νλ°μ΄ν°
|
286 |
-
def get_video_info(self, url
|
287 |
-
if not self.is_valid_youtube_url(url):
|
288 |
-
return None, "β Invalid URL"
|
289 |
-
|
290 |
if cookiefile and os.path.exists(cookiefile):
|
291 |
-
|
292 |
elif DEFAULT_COOKIE_FILE.exists():
|
293 |
-
|
294 |
else:
|
295 |
-
|
296 |
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
info = ydl.extract_info(url, download=False)
|
303 |
-
return info, "OK"
|
304 |
-
except Exception as e:
|
305 |
-
return None, f"yt-dlp error: {e}"
|
306 |
|
307 |
-
# βββββββββ λ€μ΄λ‘λ
|
308 |
def download_video(
|
309 |
-
self,
|
|
|
|
|
|
|
|
|
310 |
):
|
311 |
-
if not self.is_valid_youtube_url(url):
|
312 |
-
return None, "β Invalid URL"
|
313 |
if cookiefile and os.path.exists(cookiefile):
|
314 |
-
|
315 |
elif DEFAULT_COOKIE_FILE.exists():
|
316 |
-
|
317 |
else:
|
318 |
-
|
319 |
|
320 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
321 |
-
ydl_opts = {
|
322 |
"outtmpl": os.path.join(self.temp_downloads, f"%(title)s_{ts}.%(ext)s"),
|
323 |
"noplaylist": True,
|
324 |
}
|
|
|
325 |
if audio_only:
|
326 |
ydl_opts["format"] = "bestaudio/best"
|
327 |
ydl_opts["postprocessors"] = [
|
@@ -334,31 +243,29 @@ Views / Likes / Comments: {self.format_number(views)} / {self.format_number(like
|
|
334 |
ydl_opts["format"] = "best[height<=480]"
|
335 |
else:
|
336 |
ydl_opts["format"] = "best[height<=1080]"
|
337 |
-
if cookiefile:
|
338 |
-
ydl_opts["cookiefile"] = cookiefile
|
339 |
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
return
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
|
358 |
|
359 |
-
#
|
360 |
-
# Gradio Helper
|
361 |
-
#
|
362 |
downloader = YouTubeDownloader()
|
363 |
|
364 |
|
@@ -367,117 +274,104 @@ def configure_api_key(api_key):
|
|
367 |
return msg, gr.update(visible=ok)
|
368 |
|
369 |
|
370 |
-
def
|
371 |
-
|
372 |
-
|
|
|
|
|
|
|
373 |
|
374 |
|
375 |
-
def
|
376 |
-
fp, msg = downloader.download_video(url, qual, audio,
|
377 |
-
return
|
378 |
|
379 |
|
380 |
-
def
|
381 |
vid = extract_video_id(url)
|
382 |
if not vid:
|
383 |
-
return "β Invalid
|
384 |
try:
|
385 |
return fetch_transcript(vid)
|
386 |
except Exception as e:
|
387 |
return f"β {e}"
|
388 |
|
389 |
|
390 |
-
#
|
391 |
# UI
|
392 |
-
#
|
393 |
def create_interface():
|
394 |
-
with gr.Blocks(
|
395 |
-
|
396 |
-
) as iface:
|
397 |
gr.HTML("<h1>π₯ YouTube Video Analyzer & Downloader Pro</h1>")
|
398 |
|
399 |
# API μ€μ
|
400 |
with gr.Group():
|
401 |
gr.HTML("<h3>π Google Gemini API Configuration</h3>")
|
402 |
with gr.Row():
|
403 |
-
api_key_in = gr.Textbox(
|
404 |
-
label="π Google API Key", type="password", placeholder="Paste your Google API keyβ¦"
|
405 |
-
)
|
406 |
api_btn = gr.Button("π§ Configure API", variant="secondary")
|
407 |
api_status = gr.Textbox(
|
408 |
-
label="API Status",
|
409 |
value="β Gemini API not configured β Using fallback analysis",
|
410 |
interactive=False,
|
411 |
lines=1,
|
|
|
412 |
)
|
413 |
|
414 |
# κ³΅ν΅ μ
λ ₯
|
415 |
with gr.Row():
|
416 |
url_in = gr.Textbox(label="π YouTube URL", placeholder="Paste YouTube video URLβ¦")
|
417 |
-
|
418 |
-
label="πͺ Upload cookies.txt (optional)", file_types=[".txt"], type="filepath"
|
419 |
-
)
|
420 |
|
421 |
with gr.Tabs():
|
422 |
-
# λΆμ
|
423 |
with gr.TabItem("π Video Analysis"):
|
424 |
-
analyze_btn = gr.Button("π Analyze
|
425 |
analysis_out = gr.Textbox(label="π Analysis Report", lines=30, show_copy_button=True)
|
426 |
-
analyze_btn.click(
|
427 |
-
|
428 |
-
|
429 |
-
# λ€μ΄λ‘λ ν
|
430 |
with gr.TabItem("β¬οΈ Video Download"):
|
431 |
with gr.Row():
|
432 |
-
quality_dd = gr.Dropdown(
|
433 |
-
choices=["best", "720p", "480p"], value="best", label="πΊ Quality"
|
434 |
-
)
|
435 |
audio_cb = gr.Checkbox(label="π΅ Audio only (MP3)")
|
436 |
-
download_btn = gr.Button("β¬οΈ Download
|
437 |
-
dl_status = gr.Textbox(label="π₯
|
438 |
-
dl_file = gr.File(label="π
|
439 |
-
|
440 |
-
def
|
441 |
-
|
442 |
-
return (
|
443 |
-
|
444 |
-
gr.update(visible=False),
|
445 |
)
|
446 |
|
447 |
-
download_btn.click(
|
448 |
-
|
449 |
-
|
450 |
-
outputs=[dl_status, dl_file],
|
451 |
-
show_progress=True,
|
452 |
-
)
|
453 |
-
# μλ§ ν NEW
|
454 |
with gr.TabItem("ποΈ Transcript"):
|
455 |
-
tr_btn = gr.Button("π Get
|
456 |
-
tr_out = gr.Textbox(
|
457 |
-
|
458 |
-
|
459 |
-
tr_btn.click(
|
460 |
-
get_transcript, inputs=[url_in, cookies_in], outputs=tr_out, show_progress=True
|
461 |
-
)
|
462 |
-
|
463 |
-
# API λ²νΌ
|
464 |
api_btn.click(configure_api_key, inputs=[api_key_in], outputs=[api_status])
|
465 |
|
466 |
gr.HTML(
|
467 |
"""
|
468 |
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
|
469 |
-
<h3>π‘ Tip
|
470 |
-
<p><code>www.youtube.com_cookies.txt</code> νμΌμ <
|
471 |
ν΄λμ λλ©΄ μ
λ‘λ μμ΄ μλ μ¬μ©λ©λλ€.</p>
|
472 |
</div>
|
473 |
"""
|
474 |
)
|
|
|
475 |
return iface
|
476 |
|
477 |
|
478 |
-
#
|
479 |
-
#
|
480 |
-
#
|
481 |
if __name__ == "__main__":
|
482 |
demo = create_interface()
|
483 |
import atexit
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
"""
|
4 |
YouTube Video Analyzer & Downloader Pro
|
5 |
βββββββββββββββββββββββββββββββββββββββ
|
6 |
+
β’ `www.youtube.com_cookies.txt` νμΌμ΄ app.pyμ κ°μ ν΄λμ μμΌλ©΄ μλ μ¬μ©
|
7 |
β’ UIμμ μΏ ν€λ₯Ό μ
λ‘λνλ©΄ κ·Έ νμΌμ΄ *μ°μ * μ μ©
|
8 |
+
β’ βTranscriptβ νμμ **μ 체 μλ§ + MM:SS νμμ€ν¬ν** μ 곡
|
9 |
"""
|
10 |
|
11 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
12 |
+
# νμ€ λΌμ΄λΈλ¬λ¦¬
|
13 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
14 |
+
import os
|
15 |
+
import re
|
16 |
+
import shutil
|
17 |
+
import tempfile
|
18 |
from datetime import datetime, timedelta
|
19 |
from pathlib import Path
|
20 |
|
21 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
22 |
+
# μλνν° λΌμ΄λΈλ¬λ¦¬
|
23 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
24 |
import gradio as gr
|
25 |
import yt_dlp
|
26 |
import google.generativeai as genai
|
27 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
28 |
|
29 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
30 |
+
# μμ
|
31 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
32 |
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
|
33 |
|
34 |
+
# μ νλΈ URL μ κ·μ
|
35 |
_YT_REGEX = re.compile(
|
36 |
r"(https?://)?(www\.)?"
|
37 |
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
|
|
|
39 |
)
|
40 |
|
41 |
|
42 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
43 |
+
# Helper : video-ID μΆμΆ / μλ§ κ°μ Έμ€κΈ°
|
44 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
45 |
def extract_video_id(url: str) -> str | None:
|
|
|
46 |
m = _YT_REGEX.match(url)
|
47 |
return m.group(6) if m else None
|
48 |
|
49 |
|
50 |
def fetch_transcript(video_id: str, pref_lang=("ko", "en")) -> str:
|
|
|
|
|
|
|
|
|
|
|
51 |
# μΈμ΄ μ°μ μμλλ‘ μλ
|
52 |
+
tr = None
|
53 |
for lang in pref_lang:
|
54 |
try:
|
55 |
+
tr = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
|
56 |
break
|
57 |
except Exception:
|
58 |
continue
|
59 |
+
if tr is None:
|
60 |
+
tr = YouTubeTranscriptApi.get_transcript(video_id) # λ§μ§λ§ μλ
|
61 |
+
|
62 |
+
lines: list[str] = []
|
63 |
+
for seg in tr:
|
64 |
+
t = str(timedelta(seconds=int(seg["start"]))) # H:MM:SS
|
65 |
+
t_mmss = ":".join(t.split(":")[-2:]) # MM:SS
|
|
|
66 |
lines.append(f"**[{t_mmss}]** {seg['text']}")
|
67 |
return "\n".join(lines)
|
68 |
|
69 |
|
70 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
71 |
# λ©μΈ ν΄λμ€
|
72 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
73 |
class YouTubeDownloader:
|
74 |
def __init__(self):
|
75 |
+
# μμ λλ ν°λ¦¬
|
76 |
self.download_dir = tempfile.mkdtemp()
|
77 |
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
|
78 |
+
# Downloads ν΄λ
|
79 |
self.downloads_folder = os.path.join(
|
80 |
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
|
81 |
)
|
82 |
os.makedirs(self.downloads_folder, exist_ok=True)
|
|
|
83 |
|
84 |
+
self.gemini_model = None # Gemini λͺ¨λΈ νΈλ€
|
85 |
+
|
86 |
+
# βββββββββ Gemini μ€μ βββββββββ
|
87 |
+
def configure_gemini(self, api_key: str):
|
88 |
try:
|
89 |
genai.configure(api_key=api_key)
|
90 |
+
self.gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
|
|
|
|
|
91 |
return True, "β
Gemini API configured successfully!"
|
92 |
except Exception as e:
|
93 |
return False, f"β Failed to configure Gemini API: {e}"
|
94 |
|
95 |
# βββββββββ μ 리 βββββββββ
|
96 |
def cleanup(self):
|
97 |
+
for p in (self.download_dir, self.temp_downloads):
|
98 |
+
try:
|
99 |
+
shutil.rmtree(p, ignore_errors=True)
|
100 |
+
except Exception:
|
101 |
+
pass
|
102 |
+
|
103 |
+
# βββββββββ μ ν¨μ± κ²μ¬ βββββββββ
|
104 |
+
def is_valid_youtube_url(self, url: str) -> bool:
|
|
|
|
|
105 |
return _YT_REGEX.match(url) is not None
|
106 |
|
107 |
+
# βββββββββ μ₯λ©΄ λΆμ : Gemini βββββββββ
|
108 |
+
def generate_scene_breakdown_gemini(self, info: dict) -> list[str]:
|
109 |
if not self.gemini_model:
|
110 |
+
return self.generate_scene_breakdown_fallback(info)
|
111 |
+
|
112 |
try:
|
113 |
+
duration = info.get("duration", 0)
|
114 |
+
title = info.get("title", "")
|
115 |
+
description = info.get("description", "")[:1500]
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
prompt = f"""
|
118 |
Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
|
119 |
+
with precise timestamps (MM:SS-MM:SS) and specific descriptions:
|
120 |
+
|
121 |
+
Title : {title}
|
122 |
+
Duration : {duration} s
|
123 |
+
Description : {description}
|
124 |
+
|
125 |
+
Follow these rules:
|
126 |
+
β’ Include visual details, actions, inferred dialogue, setting, props, graphics
|
127 |
+
β’ Dialogue : short lines for **every** scene if plausible
|
128 |
+
β’ Timestamp : 2-3 s (<1 min) / 3-5 s (1-5 min) / 5-10 s (5-15 min) / 10-15 s (>15 min)
|
129 |
+
β’ β€ 20 scenes total
|
130 |
+
β’ Formatting β **[MM:SS-MM:SS]** Description
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
"""
|
132 |
resp = self.gemini_model.generate_content(prompt)
|
133 |
if not resp or not resp.text:
|
134 |
+
return self.generate_scene_breakdown_fallback(info)
|
135 |
|
136 |
scenes, cur = [], ""
|
137 |
for line in resp.text.splitlines():
|
138 |
line = line.strip()
|
139 |
+
if line.startswith("**[") and "]**" in line:
|
140 |
if cur:
|
141 |
scenes.append(cur.strip())
|
142 |
cur = line
|
|
|
144 |
cur += "\n" + line
|
145 |
if cur:
|
146 |
scenes.append(cur.strip())
|
147 |
+
return scenes or self.generate_scene_breakdown_fallback(info)
|
148 |
+
|
149 |
except Exception:
|
150 |
+
return self.generate_scene_breakdown_fallback(info)
|
151 |
|
152 |
+
# βββββββββ μ₯λ©΄ λΆμ : Fallback βββββββββ
|
153 |
+
def generate_scene_breakdown_fallback(self, info: dict) -> list[str]:
|
154 |
+
duration = info.get("duration", 0)
|
155 |
if not duration:
|
156 |
+
return ["**[00:00]** Unable to determine duration"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
+
seg = 3 if duration <= 60 else 5 if duration <= 300 else 10 if duration <= 900 else 15
|
159 |
total = min(duration // seg + 1, 20)
|
160 |
+
scenes: list[str] = []
|
|
|
|
|
|
|
161 |
for i in range(total):
|
162 |
s, e = i * seg, min(i * seg + seg - 1, duration)
|
163 |
scenes.append(
|
164 |
+
f"**[{s//60:02d}:{s%60:02d}-{e//60:02d}:{e%60:02d}]** "
|
165 |
+
"Content continuesβ¦"
|
166 |
)
|
167 |
return scenes
|
168 |
|
169 |
+
# βββββββββ μ«μ ν¬λ§· βββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
@staticmethod
|
171 |
+
def fmt_num(n: int) -> str:
|
172 |
if n >= 1_000_000:
|
173 |
return f"{n/1_000_000:.1f} M"
|
174 |
if n >= 1_000:
|
175 |
return f"{n/1_000:.1f} K"
|
176 |
return str(n)
|
177 |
|
178 |
+
# βββββββββ κ²°κ³Ό 리ν¬νΈ βββββββββ
|
179 |
+
def format_video_info(self, info: dict) -> str:
|
180 |
+
title = info.get("title", "Unknown")
|
181 |
+
uploader = info.get("uploader", "Unknown")
|
182 |
duration = info.get("duration", 0)
|
183 |
dur = f"{duration//60}:{duration%60:02d}"
|
184 |
+
views = self.fmt_num(info.get("view_count", 0))
|
185 |
+
likes = self.fmt_num(info.get("like_count", 0))
|
186 |
+
comments = self.fmt_num(info.get("comment_count", 0))
|
187 |
+
scenes = "\n".join(self.generate_scene_breakdown_gemini(info))
|
188 |
|
189 |
+
return f"""\
|
190 |
π¬ **{title}**
|
191 |
+
Uploader : {uploader}
|
192 |
+
Duration : {dur}βViews / Likes / Comments : {views} / {likes} / {comments}
|
|
|
193 |
|
194 |
{'-'*48}
|
195 |
+
{scenes}
|
196 |
"""
|
197 |
|
198 |
+
# βββββββββ λ©νλ°μ΄ν° βββββββββ
|
199 |
+
def get_video_info(self, url: str, cookiefile: str | None = None):
|
|
|
|
|
|
|
200 |
if cookiefile and os.path.exists(cookiefile):
|
201 |
+
ck = cookiefile
|
202 |
elif DEFAULT_COOKIE_FILE.exists():
|
203 |
+
ck = str(DEFAULT_COOKIE_FILE)
|
204 |
else:
|
205 |
+
ck = None
|
206 |
|
207 |
+
ydl_opts = {"noplaylist": True, "quiet": True}
|
208 |
+
if ck:
|
209 |
+
ydl_opts["cookiefile"] = ck
|
210 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
211 |
+
return ydl.extract_info(url, download=False)
|
|
|
|
|
|
|
|
|
212 |
|
213 |
+
# βββββββββ λ€μ΄λ‘λ βββββββββ
|
214 |
def download_video(
|
215 |
+
self,
|
216 |
+
url: str,
|
217 |
+
quality: str = "best",
|
218 |
+
audio_only: bool = False,
|
219 |
+
cookiefile: str | None = None,
|
220 |
):
|
|
|
|
|
221 |
if cookiefile and os.path.exists(cookiefile):
|
222 |
+
ck = cookiefile
|
223 |
elif DEFAULT_COOKIE_FILE.exists():
|
224 |
+
ck = str(DEFAULT_COOKIE_FILE)
|
225 |
else:
|
226 |
+
ck = None
|
227 |
|
228 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
229 |
+
ydl_opts: dict = {
|
230 |
"outtmpl": os.path.join(self.temp_downloads, f"%(title)s_{ts}.%(ext)s"),
|
231 |
"noplaylist": True,
|
232 |
}
|
233 |
+
|
234 |
if audio_only:
|
235 |
ydl_opts["format"] = "bestaudio/best"
|
236 |
ydl_opts["postprocessors"] = [
|
|
|
243 |
ydl_opts["format"] = "best[height<=480]"
|
244 |
else:
|
245 |
ydl_opts["format"] = "best[height<=1080]"
|
|
|
|
|
246 |
|
247 |
+
if ck:
|
248 |
+
ydl_opts["cookiefile"] = ck
|
249 |
+
|
250 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
251 |
+
ydl.extract_info(url, download=True)
|
252 |
+
|
253 |
+
# temp νμΌ μ°ΎοΏ½οΏ½οΏ½
|
254 |
+
for f in os.listdir(self.temp_downloads):
|
255 |
+
if ts in f:
|
256 |
+
src = os.path.join(self.temp_downloads, f)
|
257 |
+
dst = os.path.join(self.downloads_folder, f)
|
258 |
+
try:
|
259 |
+
shutil.copy2(src, dst)
|
260 |
+
return dst, "β
Saved to Downloads"
|
261 |
+
except Exception:
|
262 |
+
return src, "β
Saved to temp (copy failed)"
|
263 |
+
return None, "β File not found"
|
264 |
|
265 |
|
266 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
267 |
+
# Gradio Helper
|
268 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
269 |
downloader = YouTubeDownloader()
|
270 |
|
271 |
|
|
|
274 |
return msg, gr.update(visible=ok)
|
275 |
|
276 |
|
277 |
+
def analyze_fn(url, cookie):
|
278 |
+
try:
|
279 |
+
info = downloader.get_video_info(url, cookie)
|
280 |
+
return downloader.format_video_info(info)
|
281 |
+
except Exception as e:
|
282 |
+
return f"β {e}"
|
283 |
|
284 |
|
285 |
+
def download_fn(url, qual, audio, cookie):
|
286 |
+
fp, msg = downloader.download_video(url, qual, audio, cookie)
|
287 |
+
return msg, fp
|
288 |
|
289 |
|
290 |
+
def transcript_fn(url, _cookie):
|
291 |
vid = extract_video_id(url)
|
292 |
if not vid:
|
293 |
+
return "β Invalid URL"
|
294 |
try:
|
295 |
return fetch_transcript(vid)
|
296 |
except Exception as e:
|
297 |
return f"β {e}"
|
298 |
|
299 |
|
300 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
301 |
# UI
|
302 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
303 |
def create_interface():
|
304 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="π₯ YouTube Video Analyzer & Downloader Pro") as iface:
|
305 |
+
|
|
|
306 |
gr.HTML("<h1>π₯ YouTube Video Analyzer & Downloader Pro</h1>")
|
307 |
|
308 |
# API μ€μ
|
309 |
with gr.Group():
|
310 |
gr.HTML("<h3>π Google Gemini API Configuration</h3>")
|
311 |
with gr.Row():
|
312 |
+
api_key_in = gr.Textbox(label="π Google API Key", type="password")
|
|
|
|
|
313 |
api_btn = gr.Button("π§ Configure API", variant="secondary")
|
314 |
api_status = gr.Textbox(
|
|
|
315 |
value="β Gemini API not configured β Using fallback analysis",
|
316 |
interactive=False,
|
317 |
lines=1,
|
318 |
+
label="API Status",
|
319 |
)
|
320 |
|
321 |
# κ³΅ν΅ μ
λ ₯
|
322 |
with gr.Row():
|
323 |
url_in = gr.Textbox(label="π YouTube URL", placeholder="Paste YouTube video URLβ¦")
|
324 |
+
cookie_in = gr.File(label="πͺ Upload cookies.txt (optional)", type="filepath", file_types=[".txt"])
|
|
|
|
|
325 |
|
326 |
with gr.Tabs():
|
327 |
+
# λΆμ
|
328 |
with gr.TabItem("π Video Analysis"):
|
329 |
+
analyze_btn = gr.Button("π Analyze", variant="primary")
|
330 |
analysis_out = gr.Textbox(label="π Analysis Report", lines=30, show_copy_button=True)
|
331 |
+
analyze_btn.click(analyze_fn, [url_in, cookie_in], analysis_out, show_progress=True)
|
332 |
+
|
333 |
+
# λ€μ΄λ‘λ
|
|
|
334 |
with gr.TabItem("β¬οΈ Video Download"):
|
335 |
with gr.Row():
|
336 |
+
quality_dd = gr.Dropdown(["best", "720p", "480p"], value="best", label="πΊ Quality")
|
|
|
|
|
337 |
audio_cb = gr.Checkbox(label="π΅ Audio only (MP3)")
|
338 |
+
download_btn = gr.Button("β¬οΈ Download", variant="primary")
|
339 |
+
dl_status = gr.Textbox(label="π₯ Status", lines=5, show_copy_button=True)
|
340 |
+
dl_file = gr.File(label="π File", visible=False)
|
341 |
+
|
342 |
+
def wrap_dl(u, q, a, c):
|
343 |
+
msg, fp = download_fn(u, q, a, c)
|
344 |
+
return (
|
345 |
+
msg,
|
346 |
+
gr.update(value=fp, visible=True) if fp and os.path.exists(fp) else gr.update(visible=False),
|
347 |
)
|
348 |
|
349 |
+
download_btn.click(wrap_dl, [url_in, quality_dd, audio_cb, cookie_in], [dl_status, dl_file], show_progress=True)
|
350 |
+
|
351 |
+
# μλ§
|
|
|
|
|
|
|
|
|
352 |
with gr.TabItem("ποΈ Transcript"):
|
353 |
+
tr_btn = gr.Button("π Get Transcript", variant="primary")
|
354 |
+
tr_out = gr.Textbox(label="Transcript", lines=30, show_copy_button=True)
|
355 |
+
tr_btn.click(transcript_fn, [url_in, cookie_in], tr_out, show_progress=True)
|
356 |
+
|
|
|
|
|
|
|
|
|
|
|
357 |
api_btn.click(configure_api_key, inputs=[api_key_in], outputs=[api_status])
|
358 |
|
359 |
gr.HTML(
|
360 |
"""
|
361 |
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
|
362 |
+
<h3>π‘ Tip</h3>
|
363 |
+
<p><code>www.youtube.com_cookies.txt</code> νμΌμ <b>app.py</b>μ κ°μ
|
364 |
ν΄λμ λλ©΄ μ
λ‘λ μμ΄ μλ μ¬μ©λ©λλ€.</p>
|
365 |
</div>
|
366 |
"""
|
367 |
)
|
368 |
+
|
369 |
return iface
|
370 |
|
371 |
|
372 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
373 |
+
# Entrypoint
|
374 |
+
# ββββββββββββββββββββββββββββββββββββββ
|
375 |
if __name__ == "__main__":
|
376 |
demo = create_interface()
|
377 |
import atexit
|