fantaxy commited on
Commit
592f28f
Β·
verified Β·
1 Parent(s): 86335a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -286
app.py CHANGED
@@ -1,27 +1,37 @@
1
  #!/usr/bin/env python3
 
2
  """
3
  YouTube Video Analyzer & Downloader Pro
4
  ───────────────────────────────────────
5
- β€’ `www.youtube.com_cookies.txt` κ°€ app.py 와 같은 폴더에 있으면 μžλ™μœΌλ‘œ μ‚¬μš©
6
  β€’ UIμ—μ„œ μΏ ν‚€λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ κ·Έ 파일이 *μš°μ„ * 적용
7
- β€’ β€œTranscript” 탭을 μΆ”κ°€ν•΄ **전체 μžλ§‰ + MM:SS νƒ€μž„μŠ€νƒ¬ν”„** 좜λ ₯
8
  """
9
 
10
- # ── ν‘œμ€€ 라이브러리 ───────────────────────────────────────────
11
- import os, re, json, shutil, tempfile
 
 
 
 
 
12
  from datetime import datetime, timedelta
13
  from pathlib import Path
14
 
15
- # ── μ„œλ“œνŒŒν‹° ──────────────────────────────────────────────────
 
 
16
  import gradio as gr
17
  import yt_dlp
18
  import google.generativeai as genai
19
- from youtube_transcript_api import YouTubeTranscriptApi # NEW
20
 
21
- # ── μƒμˆ˜ ──────────────────────────────────────────────────────
 
 
22
  DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
23
 
24
- # YouTube URL μ •κ·œμ‹(캑처 κ·Έλ£Ή 6이 μ˜μƒ ID)
25
  _YT_REGEX = re.compile(
26
  r"(https?://)?(www\.)?"
27
  r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
@@ -29,128 +39,104 @@ _YT_REGEX = re.compile(
29
  )
30
 
31
 
32
- # =================================================================
33
- # Helper : video-ID μΆ”μΆœ + μžλ§‰ κ°€μ Έμ˜€κΈ°
34
- # =================================================================
35
  def extract_video_id(url: str) -> str | None:
36
- """유튜브 URLμ—μ„œ 11-κΈ€μž λΉ„λ””μ˜€ ID λ°˜ν™˜(μ—†μœΌλ©΄ None)"""
37
  m = _YT_REGEX.match(url)
38
  return m.group(6) if m else None
39
 
40
 
41
  def fetch_transcript(video_id: str, pref_lang=("ko", "en")) -> str:
42
- """
43
- 유튜브 μžλ§‰μ„ 가져와
44
- **[MM:SS]** line ν˜•μ‹μœΌλ‘œ κ²°ν•©ν•œ λ’€ λ¬Έμžμ—΄λ‘œ λ°˜ν™˜.
45
- """
46
- transcript = None
47
  # μ–Έμ–΄ μš°μ„ μˆœμœ„λŒ€λ‘œ μ‹œλ„
 
48
  for lang in pref_lang:
49
  try:
50
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
51
  break
52
  except Exception:
53
  continue
54
- # κ·Έλž˜λ„ μ‹€νŒ¨ν•˜λ©΄ μž„μ˜ μ–Έμ–΄
55
- if transcript is None:
56
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
57
-
58
- lines = []
59
- for seg in transcript:
60
- t = str(timedelta(seconds=int(seg["start"]))) # H:MM:SS
61
- t_mmss = ":".join(t.split(":")[-2:]) # MM:SS
62
  lines.append(f"**[{t_mmss}]** {seg['text']}")
63
  return "\n".join(lines)
64
 
65
 
66
- # =================================================================
67
  # 메인 클래슀
68
- # =================================================================
69
  class YouTubeDownloader:
70
  def __init__(self):
 
71
  self.download_dir = tempfile.mkdtemp()
72
  self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
 
73
  self.downloads_folder = os.path.join(
74
  os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
75
  )
76
  os.makedirs(self.downloads_folder, exist_ok=True)
77
- self.gemini_model = None
78
 
79
- # ───────── Gemini ─────────
80
- def configure_gemini(self, api_key):
 
 
81
  try:
82
  genai.configure(api_key=api_key)
83
- self.gemini_model = genai.GenerativeModel(
84
- model_name="gemini-1.5-flash-latest"
85
- )
86
  return True, "βœ… Gemini API configured successfully!"
87
  except Exception as e:
88
  return False, f"❌ Failed to configure Gemini API: {e}"
89
 
90
  # ───────── 정리 ─────────
91
  def cleanup(self):
92
- try:
93
- if os.path.exists(self.download_dir):
94
- shutil.rmtree(self.download_dir)
95
- if os.path.exists(self.temp_downloads):
96
- shutil.rmtree(self.temp_downloads)
97
- except Exception:
98
- pass
99
-
100
- # ───────── URL 검증 ──────
101
- def is_valid_youtube_url(self, url):
102
  return _YT_REGEX.match(url) is not None
103
 
104
- # ───────── Gemini scene breakdown (μƒλž΅ 없이 전체 κ΅¬ν˜„) ──────
105
- def generate_scene_breakdown_gemini(self, video_info):
106
  if not self.gemini_model:
107
- return self.generate_scene_breakdown_fallback(video_info)
 
108
  try:
109
- duration = video_info.get("duration", 0)
110
- title = video_info.get("title", "")
111
- description = video_info.get("description", "")[:1500]
112
- if not duration:
113
- return [
114
- "**[Duration Unknown]**: Unable to generate timestamped breakdown – "
115
- "video duration not available"
116
- ]
117
 
118
  prompt = f"""
119
  Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
120
- with precise timestamps and specific descriptions:
121
-
122
- Title: {title}
123
- Duration: {duration} seconds
124
- Description: {description}
125
-
126
- IMPORTANT INSTRUCTIONS:
127
- 1. Create detailed scene descriptions that include:
128
- - Physical appearance of people (age, gender, clothing, hair, etc.)
129
- - Exact actions being performed
130
- - Dialogue or speech (include actual lines if audible, or infer probable spoken
131
- lines based on actions and setting; format them as "Character: line…")
132
- - Setting and environment details
133
- - Props, objects, or products being shown
134
- - Visual effects, text overlays, or graphics
135
- - Mood, tone, and atmosphere
136
- - Camera movements or angles (if apparent)
137
- 2. Dialogue Emphasis:
138
- - Include short dialogue lines in **every scene** wherever plausible.
139
- - Write lines like: Character: "Actual or inferred line…"
140
- - If dialogue is not available, intelligently infer probable phrases
141
- 3. Timestamp Guidelines:
142
- - <1 min : 2–3 s | 1–5 min : 3–5 s | 5–15 min : 5–10 s | >15 min : 10–15 s
143
- - Max 20 scenes
144
- 4. Format: **[MM:SS-MM:SS]** description
145
  """
146
  resp = self.gemini_model.generate_content(prompt)
147
  if not resp or not resp.text:
148
- return self.generate_scene_breakdown_fallback(video_info)
149
 
150
  scenes, cur = [], ""
151
  for line in resp.text.splitlines():
152
  line = line.strip()
153
- if line.startswith("**[") and "]**:" in line:
154
  if cur:
155
  scenes.append(cur.strip())
156
  cur = line
@@ -158,170 +144,93 @@ IMPORTANT INSTRUCTIONS:
158
  cur += "\n" + line
159
  if cur:
160
  scenes.append(cur.strip())
161
- return scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
 
162
  except Exception:
163
- return self.generate_scene_breakdown_fallback(video_info)
164
 
165
- # ───────── fallback breakdown ──────
166
- def generate_scene_breakdown_fallback(self, video_info):
167
- duration = video_info.get("duration", 0)
168
  if not duration:
169
- return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
170
-
171
- if duration <= 60:
172
- seg = 3
173
- elif duration <= 300:
174
- seg = 5
175
- elif duration <= 900:
176
- seg = 10
177
- else:
178
- seg = 15
179
 
 
180
  total = min(duration // seg + 1, 20)
181
- vtype = self.detect_video_type_detailed(
182
- video_info.get("title", ""), video_info.get("description", "")
183
- )
184
- scenes = []
185
  for i in range(total):
186
  s, e = i * seg, min(i * seg + seg - 1, duration)
187
  scenes.append(
188
- f"**[{s//60:02d}:{s%60:02d}-{e//60:02d}:{e%60:02d}]**: "
189
- f"{self.generate_contextual_description(i, total, vtype, '', video_info.get('title',''))}"
190
  )
191
  return scenes
192
 
193
- # ───────── detect helpers (상세) ──────
194
- def detect_video_type_detailed(self, title, desc):
195
- t = (title + " " + desc).lower()
196
- if any(x in t for x in ["tutorial", "how to", "guide", "diy"]):
197
- return "tutorial"
198
- if any(x in t for x in ["review", "unboxing", "comparison"]):
199
- return "review"
200
- if any(x in t for x in ["vlog", "daily", "routine"]):
201
- return "vlog"
202
- if any(x in t for x in ["music", "song", "cover"]):
203
- return "music"
204
- if any(x in t for x in ["comedy", "prank", "challenge"]):
205
- return "entertainment"
206
- if any(x in t for x in ["news", "update", "report"]):
207
- return "news"
208
- if any(x in t for x in ["cooking", "recipe", "food"]):
209
- return "cooking"
210
- if any(x in t for x in ["workout", "fitness", "yoga"]):
211
- return "fitness"
212
- return "general"
213
-
214
- def generate_contextual_description(
215
- self, idx, total, vtype, uploader, title
216
- ):
217
- if idx == 0:
218
- return "The creator greets viewers and introduces the video."
219
- if idx == total - 1:
220
- return "The creator wraps up and thanks viewers."
221
- return "Content continues according to the video type."
222
-
223
- # ───────── quick-detect helpers (μš”μ•½) ──────
224
- def detect_video_type(self, title, desc):
225
- t = (title + " " + desc).lower()
226
- if any(x in t for x in ["music", "song", "album"]):
227
- return "🎡 Music"
228
- if any(x in t for x in ["tutorial", "guide"]):
229
- return "πŸ“š Tutorial"
230
- if any(x in t for x in ["comedy", "vlog"]):
231
- return "🎭 Entertainment"
232
- if any(x in t for x in ["news", "report"]):
233
- return "πŸ“° News"
234
- if any(x in t for x in ["review", "unboxing"]):
235
- return "⭐ Review"
236
- return "🎬 General"
237
-
238
- def detect_background_music(self, video_info):
239
- title = video_info.get("title", "").lower()
240
- if "music" in title:
241
- return "🎡 Original music"
242
- if "tutorial" in title:
243
- return "πŸ”‡ Minimal music"
244
- return "🎼 Background music"
245
-
246
- def detect_influencer_status(self, video_info):
247
- subs = video_info.get("channel_followers", 0)
248
- if subs > 10_000_000:
249
- return "🌟 Mega (10 M+)"
250
- if subs > 1_000_000:
251
- return "⭐ Major (1 M+)"
252
- if subs > 100_000:
253
- return "🎯 Mid (100 K+)"
254
- return "πŸ‘€"
255
-
256
  @staticmethod
257
- def format_number(n):
258
  if n >= 1_000_000:
259
  return f"{n/1_000_000:.1f} M"
260
  if n >= 1_000:
261
  return f"{n/1_000:.1f} K"
262
  return str(n)
263
 
264
- # ───���───── 리포트 ──────
265
- def format_video_info(self, info):
266
- title = info.get("title", "")
267
- uploader = info.get("uploader", "")
268
  duration = info.get("duration", 0)
269
  dur = f"{duration//60}:{duration%60:02d}"
270
- views = info.get("view_count", 0)
271
- likes = info.get("like_count", 0)
272
- comments = info.get("comment_count", 0)
273
- scenes = self.generate_scene_breakdown_gemini(info)
274
 
275
- return f"""
276
  🎬 **{title}**
277
- Uploader: {uploader} Duration: {dur}
278
-
279
- Views / Likes / Comments: {self.format_number(views)} / {self.format_number(likes)} / {self.format_number(comments)}
280
 
281
  {'-'*48}
282
- {"".join(scenes)}
283
  """
284
 
285
- # ───────── 메타데이터 μΆ”μΆœ ──────
286
- def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
287
- if not self.is_valid_youtube_url(url):
288
- return None, "❌ Invalid URL"
289
-
290
  if cookiefile and os.path.exists(cookiefile):
291
- cookiefile = cookiefile
292
  elif DEFAULT_COOKIE_FILE.exists():
293
- cookiefile = str(DEFAULT_COOKIE_FILE)
294
  else:
295
- cookiefile = None
296
 
297
- try:
298
- ydl_opts = {"noplaylist": True, "quiet": True}
299
- if cookiefile:
300
- ydl_opts["cookiefile"] = cookiefile
301
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
302
- info = ydl.extract_info(url, download=False)
303
- return info, "OK"
304
- except Exception as e:
305
- return None, f"yt-dlp error: {e}"
306
 
307
- # ───────── λ‹€μš΄λ‘œλ“œ ──────
308
  def download_video(
309
- self, url, quality="best", audio_only=False, progress=gr.Progress(), cookiefile=None
 
 
 
 
310
  ):
311
- if not self.is_valid_youtube_url(url):
312
- return None, "❌ Invalid URL"
313
  if cookiefile and os.path.exists(cookiefile):
314
- cookiefile = cookiefile
315
  elif DEFAULT_COOKIE_FILE.exists():
316
- cookiefile = str(DEFAULT_COOKIE_FILE)
317
  else:
318
- cookiefile = None
319
 
320
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
321
- ydl_opts = {
322
  "outtmpl": os.path.join(self.temp_downloads, f"%(title)s_{ts}.%(ext)s"),
323
  "noplaylist": True,
324
  }
 
325
  if audio_only:
326
  ydl_opts["format"] = "bestaudio/best"
327
  ydl_opts["postprocessors"] = [
@@ -334,31 +243,29 @@ Views / Likes / Comments: {self.format_number(views)} / {self.format_number(like
334
  ydl_opts["format"] = "best[height<=480]"
335
  else:
336
  ydl_opts["format"] = "best[height<=1080]"
337
- if cookiefile:
338
- ydl_opts["cookiefile"] = cookiefile
339
 
340
- try:
341
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
342
- ydl.extract_info(url, download=True)
343
- # 첫 파일 찾기
344
- for f in os.listdir(self.temp_downloads):
345
- if ts in f:
346
- temp_fp = os.path.join(self.temp_downloads, f)
347
- final_fp = os.path.join(self.downloads_folder, f)
348
- try:
349
- shutil.copy2(temp_fp, final_fp)
350
- saved = final_fp
351
- except Exception:
352
- saved = temp_fp
353
- return temp_fp, f"βœ… Saved: {saved}"
354
- return None, "❌ Downloaded file not found"
355
- except Exception as e:
356
- return None, f"❌ Download failed: {e}"
357
 
358
 
359
- # =================================================================
360
- # Gradio Helper ν•¨μˆ˜
361
- # =================================================================
362
  downloader = YouTubeDownloader()
363
 
364
 
@@ -367,117 +274,104 @@ def configure_api_key(api_key):
367
  return msg, gr.update(visible=ok)
368
 
369
 
370
- def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
371
- info, err = downloader.get_video_info(url, progress, cookies_file)
372
- return downloader.format_video_info(info) if info else f"❌ {err}"
 
 
 
373
 
374
 
375
- def download_with_cookies(url, qual, audio, cookies_file, progress=gr.Progress()):
376
- fp, msg = downloader.download_video(url, qual, audio, progress, cookies_file)
377
- return fp, msg
378
 
379
 
380
- def get_transcript(url, cookies_file):
381
  vid = extract_video_id(url)
382
  if not vid:
383
- return "❌ Invalid YouTube URL"
384
  try:
385
  return fetch_transcript(vid)
386
  except Exception as e:
387
  return f"❌ {e}"
388
 
389
 
390
- # =================================================================
391
  # UI
392
- # =================================================================
393
  def create_interface():
394
- with gr.Blocks(
395
- theme=gr.themes.Soft(), title="πŸŽ₯ YouTube Video Analyzer & Downloader Pro"
396
- ) as iface:
397
  gr.HTML("<h1>πŸŽ₯ YouTube Video Analyzer & Downloader Pro</h1>")
398
 
399
  # API μ„€μ •
400
  with gr.Group():
401
  gr.HTML("<h3>πŸ”‘ Google Gemini API Configuration</h3>")
402
  with gr.Row():
403
- api_key_in = gr.Textbox(
404
- label="πŸ”‘ Google API Key", type="password", placeholder="Paste your Google API key…"
405
- )
406
  api_btn = gr.Button("πŸ”§ Configure API", variant="secondary")
407
  api_status = gr.Textbox(
408
- label="API Status",
409
  value="❌ Gemini API not configured – Using fallback analysis",
410
  interactive=False,
411
  lines=1,
 
412
  )
413
 
414
  # 곡톡 μž…λ ₯
415
  with gr.Row():
416
  url_in = gr.Textbox(label="πŸ”— YouTube URL", placeholder="Paste YouTube video URL…")
417
- cookies_in = gr.File(
418
- label="πŸͺ Upload cookies.txt (optional)", file_types=[".txt"], type="filepath"
419
- )
420
 
421
  with gr.Tabs():
422
- # 뢄석 νƒ­
423
  with gr.TabItem("πŸ“Š Video Analysis"):
424
- analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
425
  analysis_out = gr.Textbox(label="πŸ“Š Analysis Report", lines=30, show_copy_button=True)
426
- analyze_btn.click(
427
- analyze_with_cookies, inputs=[url_in, cookies_in], outputs=analysis_out, show_progress=True
428
- )
429
- # λ‹€μš΄λ‘œλ“œ νƒ­
430
  with gr.TabItem("⬇️ Video Download"):
431
  with gr.Row():
432
- quality_dd = gr.Dropdown(
433
- choices=["best", "720p", "480p"], value="best", label="πŸ“Ί Quality"
434
- )
435
  audio_cb = gr.Checkbox(label="🎡 Audio only (MP3)")
436
- download_btn = gr.Button("⬇️ Download Video", variant="primary")
437
- dl_status = gr.Textbox(label="πŸ“₯ Download Status", lines=5, show_copy_button=True)
438
- dl_file = gr.File(label="πŸ“ Downloaded File", visible=False)
439
-
440
- def wrapped_dl(u, q, a, c, prog=gr.Progress()):
441
- fp, st = download_with_cookies(u, q, a, c, prog)
442
- return (st, gr.update(value=fp, visible=True)) if fp and os.path.exists(fp) else (
443
- st,
444
- gr.update(visible=False),
445
  )
446
 
447
- download_btn.click(
448
- wrapped_dl,
449
- inputs=[url_in, quality_dd, audio_cb, cookies_in],
450
- outputs=[dl_status, dl_file],
451
- show_progress=True,
452
- )
453
- # μžλ§‰ νƒ­ NEW
454
  with gr.TabItem("πŸ—’οΈ Transcript"):
455
- tr_btn = gr.Button("πŸ“œ Get Full Transcript", variant="primary")
456
- tr_out = gr.Textbox(
457
- label="πŸ—’οΈ Transcript (full)", lines=30, show_copy_button=True
458
- )
459
- tr_btn.click(
460
- get_transcript, inputs=[url_in, cookies_in], outputs=tr_out, show_progress=True
461
- )
462
-
463
- # API λ²„νŠΌ
464
  api_btn.click(configure_api_key, inputs=[api_key_in], outputs=[api_status])
465
 
466
  gr.HTML(
467
  """
468
  <div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
469
- <h3>πŸ’‘ Tip: μΏ ν‚€ 파일 μžλ™ μ‚¬μš©</h3>
470
- <p><code>www.youtube.com_cookies.txt</code> νŒŒμΌμ„ <strong>app.py</strong>와 같은
471
  폴더에 두면 μ—…λ‘œλ“œ 없이 μžλ™ μ‚¬μš©λ©λ‹ˆλ‹€.</p>
472
  </div>
473
  """
474
  )
 
475
  return iface
476
 
477
 
478
- # =================================================================
479
- # μ‹€ν–‰
480
- # =================================================================
481
  if __name__ == "__main__":
482
  demo = create_interface()
483
  import atexit
 
1
  #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
  """
4
  YouTube Video Analyzer & Downloader Pro
5
  ───────────────────────────────────────
6
+ β€’ `www.youtube.com_cookies.txt` 파일이 app.py와 같은 폴더에 있으면 μžλ™ μ‚¬μš©
7
  β€’ UIμ—μ„œ μΏ ν‚€λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ κ·Έ 파일이 *μš°μ„ * 적용
8
+ β€’ β€œTranscript” νƒ­μ—μ„œ **전체 μžλ§‰ + MM:SS νƒ€μž„μŠ€νƒ¬ν”„** 제곡
9
  """
10
 
11
+ # ──────────────────────────────────────
12
+ # ν‘œμ€€ 라이브러리
13
+ # ──────────────────────────────────────
14
+ import os
15
+ import re
16
+ import shutil
17
+ import tempfile
18
  from datetime import datetime, timedelta
19
  from pathlib import Path
20
 
21
+ # ──────────────────────────────────────
22
+ # μ„œλ“œνŒŒν‹° 라이브러리
23
+ # ──────────────────────────────────────
24
  import gradio as gr
25
  import yt_dlp
26
  import google.generativeai as genai
27
+ from youtube_transcript_api import YouTubeTranscriptApi
28
 
29
+ # ──────────────────────────────────────
30
+ # μƒμˆ˜
31
+ # ──────────────────────────────────────
32
  DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
33
 
34
+ # 유튜브 URL μ •κ·œμ‹
35
  _YT_REGEX = re.compile(
36
  r"(https?://)?(www\.)?"
37
  r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
 
39
  )
40
 
41
 
42
+ # ──────────────────────────────────────
43
+ # Helper : video-ID μΆ”μΆœ / μžλ§‰ κ°€μ Έμ˜€κΈ°
44
+ # ──────────────────────────────────────
45
  def extract_video_id(url: str) -> str | None:
 
46
  m = _YT_REGEX.match(url)
47
  return m.group(6) if m else None
48
 
49
 
50
  def fetch_transcript(video_id: str, pref_lang=("ko", "en")) -> str:
 
 
 
 
 
51
  # μ–Έμ–΄ μš°μ„ μˆœμœ„λŒ€λ‘œ μ‹œλ„
52
+ tr = None
53
  for lang in pref_lang:
54
  try:
55
+ tr = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
56
  break
57
  except Exception:
58
  continue
59
+ if tr is None:
60
+ tr = YouTubeTranscriptApi.get_transcript(video_id) # λ§ˆμ§€λ§‰ μ‹œλ„
61
+
62
+ lines: list[str] = []
63
+ for seg in tr:
64
+ t = str(timedelta(seconds=int(seg["start"]))) # H:MM:SS
65
+ t_mmss = ":".join(t.split(":")[-2:]) # MM:SS
 
66
  lines.append(f"**[{t_mmss}]** {seg['text']}")
67
  return "\n".join(lines)
68
 
69
 
70
+ # ──────────────────────────────────────
71
  # 메인 클래슀
72
+ # ──────────────────────────────────────
73
  class YouTubeDownloader:
74
  def __init__(self):
75
+ # μž„μ‹œ 디렉터리
76
  self.download_dir = tempfile.mkdtemp()
77
  self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
78
+ # Downloads 폴더
79
  self.downloads_folder = os.path.join(
80
  os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
81
  )
82
  os.makedirs(self.downloads_folder, exist_ok=True)
 
83
 
84
+ self.gemini_model = None # Gemini λͺ¨λΈ ν•Έλ“€
85
+
86
+ # ───────── Gemini μ„€μ • ─────────
87
+ def configure_gemini(self, api_key: str):
88
  try:
89
  genai.configure(api_key=api_key)
90
+ self.gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
 
 
91
  return True, "βœ… Gemini API configured successfully!"
92
  except Exception as e:
93
  return False, f"❌ Failed to configure Gemini API: {e}"
94
 
95
  # ───────── 정리 ─────────
96
  def cleanup(self):
97
+ for p in (self.download_dir, self.temp_downloads):
98
+ try:
99
+ shutil.rmtree(p, ignore_errors=True)
100
+ except Exception:
101
+ pass
102
+
103
+ # ───────── μœ νš¨μ„± 검사 ─────────
104
+ def is_valid_youtube_url(self, url: str) -> bool:
 
 
105
  return _YT_REGEX.match(url) is not None
106
 
107
+ # ───────── μž₯λ©΄ 뢄석 : Gemini ─────────
108
+ def generate_scene_breakdown_gemini(self, info: dict) -> list[str]:
109
  if not self.gemini_model:
110
+ return self.generate_scene_breakdown_fallback(info)
111
+
112
  try:
113
+ duration = info.get("duration", 0)
114
+ title = info.get("title", "")
115
+ description = info.get("description", "")[:1500]
 
 
 
 
 
116
 
117
  prompt = f"""
118
  Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
119
+ with precise timestamps (MM:SS-MM:SS) and specific descriptions:
120
+
121
+ Title : {title}
122
+ Duration : {duration} s
123
+ Description : {description}
124
+
125
+ Follow these rules:
126
+ β€’ Include visual details, actions, inferred dialogue, setting, props, graphics
127
+ β€’ Dialogue : short lines for **every** scene if plausible
128
+ β€’ Timestamp : 2-3 s (<1 min) / 3-5 s (1-5 min) / 5-10 s (5-15 min) / 10-15 s (>15 min)
129
+ β€’ ≀ 20 scenes total
130
+ β€’ Formatting β†’ **[MM:SS-MM:SS]** Description
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  """
132
  resp = self.gemini_model.generate_content(prompt)
133
  if not resp or not resp.text:
134
+ return self.generate_scene_breakdown_fallback(info)
135
 
136
  scenes, cur = [], ""
137
  for line in resp.text.splitlines():
138
  line = line.strip()
139
+ if line.startswith("**[") and "]**" in line:
140
  if cur:
141
  scenes.append(cur.strip())
142
  cur = line
 
144
  cur += "\n" + line
145
  if cur:
146
  scenes.append(cur.strip())
147
+ return scenes or self.generate_scene_breakdown_fallback(info)
148
+
149
  except Exception:
150
+ return self.generate_scene_breakdown_fallback(info)
151
 
152
+ # ───────── μž₯λ©΄ 뢄석 : Fallback ─────────
153
+ def generate_scene_breakdown_fallback(self, info: dict) -> list[str]:
154
+ duration = info.get("duration", 0)
155
  if not duration:
156
+ return ["**[00:00]** Unable to determine duration"]
 
 
 
 
 
 
 
 
 
157
 
158
+ seg = 3 if duration <= 60 else 5 if duration <= 300 else 10 if duration <= 900 else 15
159
  total = min(duration // seg + 1, 20)
160
+ scenes: list[str] = []
 
 
 
161
  for i in range(total):
162
  s, e = i * seg, min(i * seg + seg - 1, duration)
163
  scenes.append(
164
+ f"**[{s//60:02d}:{s%60:02d}-{e//60:02d}:{e%60:02d}]** "
165
+ "Content continues…"
166
  )
167
  return scenes
168
 
169
+ # ───────── 숫자 포맷 ─────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  @staticmethod
171
+ def fmt_num(n: int) -> str:
172
  if n >= 1_000_000:
173
  return f"{n/1_000_000:.1f} M"
174
  if n >= 1_000:
175
  return f"{n/1_000:.1f} K"
176
  return str(n)
177
 
178
+ # ───────── κ²°κ³Ό 리포트 ─────────
179
+ def format_video_info(self, info: dict) -> str:
180
+ title = info.get("title", "Unknown")
181
+ uploader = info.get("uploader", "Unknown")
182
  duration = info.get("duration", 0)
183
  dur = f"{duration//60}:{duration%60:02d}"
184
+ views = self.fmt_num(info.get("view_count", 0))
185
+ likes = self.fmt_num(info.get("like_count", 0))
186
+ comments = self.fmt_num(info.get("comment_count", 0))
187
+ scenes = "\n".join(self.generate_scene_breakdown_gemini(info))
188
 
189
+ return f"""\
190
  🎬 **{title}**
191
+ Uploader : {uploader}
192
+ Duration : {dur} Views / Likes / Comments : {views} / {likes} / {comments}
 
193
 
194
  {'-'*48}
195
+ {scenes}
196
  """
197
 
198
+ # ───────── 메타데이터 ─────────
199
+ def get_video_info(self, url: str, cookiefile: str | None = None):
 
 
 
200
  if cookiefile and os.path.exists(cookiefile):
201
+ ck = cookiefile
202
  elif DEFAULT_COOKIE_FILE.exists():
203
+ ck = str(DEFAULT_COOKIE_FILE)
204
  else:
205
+ ck = None
206
 
207
+ ydl_opts = {"noplaylist": True, "quiet": True}
208
+ if ck:
209
+ ydl_opts["cookiefile"] = ck
210
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
211
+ return ydl.extract_info(url, download=False)
 
 
 
 
212
 
213
+ # ───────── λ‹€μš΄λ‘œλ“œ ─────────
214
  def download_video(
215
+ self,
216
+ url: str,
217
+ quality: str = "best",
218
+ audio_only: bool = False,
219
+ cookiefile: str | None = None,
220
  ):
 
 
221
  if cookiefile and os.path.exists(cookiefile):
222
+ ck = cookiefile
223
  elif DEFAULT_COOKIE_FILE.exists():
224
+ ck = str(DEFAULT_COOKIE_FILE)
225
  else:
226
+ ck = None
227
 
228
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
229
+ ydl_opts: dict = {
230
  "outtmpl": os.path.join(self.temp_downloads, f"%(title)s_{ts}.%(ext)s"),
231
  "noplaylist": True,
232
  }
233
+
234
  if audio_only:
235
  ydl_opts["format"] = "bestaudio/best"
236
  ydl_opts["postprocessors"] = [
 
243
  ydl_opts["format"] = "best[height<=480]"
244
  else:
245
  ydl_opts["format"] = "best[height<=1080]"
 
 
246
 
247
+ if ck:
248
+ ydl_opts["cookiefile"] = ck
249
+
250
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
251
+ ydl.extract_info(url, download=True)
252
+
253
+ # temp 파일 찾���
254
+ for f in os.listdir(self.temp_downloads):
255
+ if ts in f:
256
+ src = os.path.join(self.temp_downloads, f)
257
+ dst = os.path.join(self.downloads_folder, f)
258
+ try:
259
+ shutil.copy2(src, dst)
260
+ return dst, "βœ… Saved to Downloads"
261
+ except Exception:
262
+ return src, "βœ… Saved to temp (copy failed)"
263
+ return None, "❌ File not found"
264
 
265
 
266
+ # ──────────────────────────────────────
267
+ # Gradio Helper
268
+ # ──────────────────────────────────────
269
  downloader = YouTubeDownloader()
270
 
271
 
 
274
  return msg, gr.update(visible=ok)
275
 
276
 
277
+ def analyze_fn(url, cookie):
278
+ try:
279
+ info = downloader.get_video_info(url, cookie)
280
+ return downloader.format_video_info(info)
281
+ except Exception as e:
282
+ return f"❌ {e}"
283
 
284
 
285
+ def download_fn(url, qual, audio, cookie):
286
+ fp, msg = downloader.download_video(url, qual, audio, cookie)
287
+ return msg, fp
288
 
289
 
290
+ def transcript_fn(url, _cookie):
291
  vid = extract_video_id(url)
292
  if not vid:
293
+ return "❌ Invalid URL"
294
  try:
295
  return fetch_transcript(vid)
296
  except Exception as e:
297
  return f"❌ {e}"
298
 
299
 
300
+ # ──────────────────────────────────────
301
  # UI
302
+ # ──────────────────────────────────────
303
  def create_interface():
304
+ with gr.Blocks(theme=gr.themes.Soft(), title="πŸŽ₯ YouTube Video Analyzer & Downloader Pro") as iface:
305
+
 
306
  gr.HTML("<h1>πŸŽ₯ YouTube Video Analyzer & Downloader Pro</h1>")
307
 
308
  # API μ„€μ •
309
  with gr.Group():
310
  gr.HTML("<h3>πŸ”‘ Google Gemini API Configuration</h3>")
311
  with gr.Row():
312
+ api_key_in = gr.Textbox(label="πŸ”‘ Google API Key", type="password")
 
 
313
  api_btn = gr.Button("πŸ”§ Configure API", variant="secondary")
314
  api_status = gr.Textbox(
 
315
  value="❌ Gemini API not configured – Using fallback analysis",
316
  interactive=False,
317
  lines=1,
318
+ label="API Status",
319
  )
320
 
321
  # 곡톡 μž…λ ₯
322
  with gr.Row():
323
  url_in = gr.Textbox(label="πŸ”— YouTube URL", placeholder="Paste YouTube video URL…")
324
+ cookie_in = gr.File(label="πŸͺ Upload cookies.txt (optional)", type="filepath", file_types=[".txt"])
 
 
325
 
326
  with gr.Tabs():
327
+ # 뢄석
328
  with gr.TabItem("πŸ“Š Video Analysis"):
329
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
330
  analysis_out = gr.Textbox(label="πŸ“Š Analysis Report", lines=30, show_copy_button=True)
331
+ analyze_btn.click(analyze_fn, [url_in, cookie_in], analysis_out, show_progress=True)
332
+
333
+ # λ‹€μš΄λ‘œλ“œ
 
334
  with gr.TabItem("⬇️ Video Download"):
335
  with gr.Row():
336
+ quality_dd = gr.Dropdown(["best", "720p", "480p"], value="best", label="πŸ“Ί Quality")
 
 
337
  audio_cb = gr.Checkbox(label="🎡 Audio only (MP3)")
338
+ download_btn = gr.Button("⬇️ Download", variant="primary")
339
+ dl_status = gr.Textbox(label="πŸ“₯ Status", lines=5, show_copy_button=True)
340
+ dl_file = gr.File(label="πŸ“ File", visible=False)
341
+
342
+ def wrap_dl(u, q, a, c):
343
+ msg, fp = download_fn(u, q, a, c)
344
+ return (
345
+ msg,
346
+ gr.update(value=fp, visible=True) if fp and os.path.exists(fp) else gr.update(visible=False),
347
  )
348
 
349
+ download_btn.click(wrap_dl, [url_in, quality_dd, audio_cb, cookie_in], [dl_status, dl_file], show_progress=True)
350
+
351
+ # μžλ§‰
 
 
 
 
352
  with gr.TabItem("πŸ—’οΈ Transcript"):
353
+ tr_btn = gr.Button("πŸ“œ Get Transcript", variant="primary")
354
+ tr_out = gr.Textbox(label="Transcript", lines=30, show_copy_button=True)
355
+ tr_btn.click(transcript_fn, [url_in, cookie_in], tr_out, show_progress=True)
356
+
 
 
 
 
 
357
  api_btn.click(configure_api_key, inputs=[api_key_in], outputs=[api_status])
358
 
359
  gr.HTML(
360
  """
361
  <div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
362
+ <h3>πŸ’‘ Tip</h3>
363
+ <p><code>www.youtube.com_cookies.txt</code> νŒŒμΌμ„ <b>app.py</b>와 같은
364
  폴더에 두면 μ—…λ‘œλ“œ 없이 μžλ™ μ‚¬μš©λ©λ‹ˆλ‹€.</p>
365
  </div>
366
  """
367
  )
368
+
369
  return iface
370
 
371
 
372
+ # ──────────────────────────────────────
373
+ # Entrypoint
374
+ # ──────────────────────────────────────
375
  if __name__ == "__main__":
376
  demo = create_interface()
377
  import atexit