fantaxy commited on
Commit
11bafec
·
verified ·
1 Parent(s): 184adfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +666 -206
app.py CHANGED
@@ -1,268 +1,728 @@
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
  """
4
  YouTube Video Analyzer & Downloader Pro
5
- ───────────────────────────────────────
6
- · app.py 와 같은 폴더에 𝐰𝐰𝐰.𝐲𝐨𝐮𝐭𝐮𝐛𝐞.𝐜𝐨𝐨𝐤𝐢𝐞𝐬.𝐭𝐱𝐭 이 있으면 자동 사용
7
- · Gradio UI에서 쿠키 파일 업로드 시 → 업로드 파일이 우선
8
- · Gemini-1.5 Flash 장면 분석(선택적), 전체 Transcript, 다운로드 지원
 
9
  """
10
 
11
- # ───────────────────────── 표준 라이브러리 ─────────────────────────
12
- import os, re, shutil, tempfile
13
- from datetime import datetime, timedelta
 
 
 
 
 
 
 
14
  from pathlib import Path
15
 
16
- # ───────────────────────── 서드파티 라이브러리 ────────────────────
 
 
17
  import gradio as gr
18
  import yt_dlp
19
  import google.generativeai as genai
20
- from youtube_transcript_api import YouTubeTranscriptApi
21
-
22
- # ───────────────────────── 상수 (절대경로) ─────────────────────────
23
- DEFAULT_COOKIE_FILE = (Path(__file__).resolve().parent / "www.youtube.com_cookies.txt").resolve()
24
-
25
- # 유튜브 URL 정규식
26
- _YT_RE = re.compile(
27
- r"(https?://)?(www\.)?"
28
- r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
29
- r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
30
- )
31
-
32
- # ───────────────────────── Helper (ID, 자막) ─────────────────────
33
- def extract_video_id(url: str) -> str | None:
34
- m = _YT_RE.match(url)
35
- return m.group(6) if m else None
36
-
37
- def fetch_transcript(video_id: str, pref=("ko", "en")) -> str:
38
- tr = None
39
- for lang in pref:
40
- try:
41
- tr = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
42
- break
43
- except Exception:
44
- continue
45
- if tr is None:
46
- tr = YouTubeTranscriptApi.get_transcript(video_id)
47
- lines = []
48
- for seg in tr:
49
- t = str(timedelta(seconds=int(seg["start"])))
50
- mmss = ":".join(t.split(":")[-2:])
51
- lines.append(f"**[{mmss}]** {seg['text']}")
52
- return "\n".join(lines)
53
 
54
  # =================================================================
55
  # Main Class
56
  # =================================================================
57
  class YouTubeDownloader:
58
  def __init__(self):
59
- # temp dirs
60
- self.temp_dir = tempfile.mkdtemp(prefix="yt_tmp_")
61
- self.temp_downloads = tempfile.mkdtemp(prefix="yt_dl_")
62
- # Downloads target
63
- self.dl_folder = Path.home() / "Downloads" / "YouTube_Downloads"
64
- self.dl_folder.mkdir(parents=True, exist_ok=True)
65
- # Gemini
 
 
 
66
  self.gemini_model = None
67
 
68
- # ───────── Gemini 설정 ─────────
69
- def configure_gemini(self, api_key: str):
 
 
70
  try:
71
  genai.configure(api_key=api_key)
72
- self.gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest")
73
- return True, "✅ Gemini API configured!"
 
 
74
  except Exception as e:
75
- return False, f"❌ Gemini 설정 실패: {e}"
76
 
77
- # ───────── 쿠키 선택 + 디버그 ─────────
78
- @staticmethod
79
- def choose_cookie(ui_cookie: str | None):
80
- if ui_cookie and os.path.exists(ui_cookie):
81
- ck = ui_cookie
82
- elif DEFAULT_COOKIE_FILE.exists():
83
- ck = str(DEFAULT_COOKIE_FILE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  else:
85
- ck = None
86
- print(">>> COOKIE =", ck, "EXISTS?", os.path.exists(ck) if ck else None)
87
- return ck
88
-
89
- # ───────── URL 검증 ─────────
90
- @staticmethod
91
- def valid_url(url: str) -> bool:
92
- return bool(_YT_RE.match(url))
93
-
94
- # ───────── 숫자 포맷 ─────────
95
- @staticmethod
96
- def fmt(n: int) -> str:
97
- if n >= 1_000_000: return f"{n/1_000_000:.1f} M"
98
- if n >= 1_000: return f"{n/1_000:.1f} K"
99
- return str(n)
100
-
101
- # ───────── Scene 분석 (fallback만 간단 구현) ─────────
102
- def scene_breakdown(self, info: dict):
103
- dur = info.get("duration", 0)
104
- seg = 3 if dur <= 60 else 5 if dur <= 300 else 10
105
  scenes = []
106
- for s in range(0, dur, seg):
107
- e = min(s + seg - 1, dur)
108
- scenes.append(f"**[{s//60:02d}:{s%60:02d}-{e//60:02d}:{e%60:02d}]** …")
109
- return "\n".join(scenes[:20])
110
-
111
- # ───────── Video Info ─────────
112
- def get_info(self, url: str, ui_cookie: str | None, progress):
113
- if not self.valid_url(url):
114
- return None, "❌ URL 오류"
115
- ck = self.choose_cookie(ui_cookie)
116
- if ck is None:
117
- return None, "❌ 쿠키 파일을 찾을 수 없습니다"
118
- ydl_opts = {"quiet": True, "noplaylist": True, "cookiefile": ck}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  try:
120
- progress(0.1, desc="metadata…")
 
 
 
 
 
121
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
122
  info = ydl.extract_info(url, download=False)
123
- progress(1.0)
124
- return info, "OK"
 
 
125
  except Exception as e:
126
- return None, f"yt-dlp 오류: {e}"
127
 
128
- # ───────── Download ─────────
129
- def download(
130
- self, url: str, qual: str, audio: bool, ui_cookie: str | None, progress
 
 
 
 
 
 
 
131
  ):
132
- if not self.valid_url(url):
133
- return None, "❌ URL 오류"
134
- ck = self.choose_cookie(ui_cookie)
135
- if ck is None:
136
- return None, "❌ 쿠키 파일을 찾을 수 없습니다"
137
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
138
- ydl_opts: dict = {
139
- "outtmpl": str(Path(self.temp_downloads) / f"%(title)s_{ts}.%(ext)s"),
140
- "quiet": True,
141
- "noplaylist": True,
142
- "cookiefile": ck,
143
- }
144
- if audio:
145
- ydl_opts["format"] = "bestaudio/best"
146
- ydl_opts["postprocessors"] = [
147
- {"key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192"}
148
- ]
149
  else:
150
- ydl_opts["format"] = (
151
- "best[height<=720]" if qual == "720p"
152
- else "best[height<=480]" if qual == "480p"
153
- else "best[height<=1080]"
154
- )
155
  try:
156
- progress(0.1, desc="Downloading…")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
158
  ydl.extract_info(url, download=True)
159
- # temp → Downloads
160
- for f in Path(self.temp_downloads).iterdir():
161
- if ts in f.name:
162
- dest = self.dl_folder / f.name
163
- try:
164
- shutil.copy2(f, dest)
165
- saved = dest
166
- except Exception:
167
- saved = f
168
- progress(1.0)
169
- return saved, f"✅ 저장 위치: {saved}"
170
- return None, "❌ 파일을 찾을 없습니다"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  except Exception as e:
172
- return None, f"❌ 다운로드 실패: {e}"
 
 
 
 
 
 
173
 
174
- # ───────────────────────── 인스턴스 ─────────────────────────
175
- yt_dl = YouTubeDownloader()
176
 
177
- # ───────────────────────── Gradio 함수 ──────────────────────
178
- def api_config(key):
179
- ok, msg = yt_dl.configure_gemini(key.strip()) if key else (False, " API 키 필요")
 
180
  return msg, gr.update(visible=ok)
181
 
182
- def analyze(url, cfile, progress=gr.Progress()):
183
- info, err = yt_dl.get_info(url, cfile, progress)
184
- if not info:
185
- return err
186
- report = (
187
- f"**Title** : {info.get('title')}\n"
188
- f"**Duration** : {info.get('duration',0)//60}:{info.get('duration',0)%60:02d}\n"
189
- f"**Views / Likes** : {yt_dl.fmt(info.get('view_count',0))} / {yt_dl.fmt(info.get('like_count',0))}\n"
190
- f"{'-'*40}\n"
191
- f"{yt_dl.scene_breakdown(info)}"
192
- )
193
- return report
194
-
195
- def download(url, q, a, cfile, progress=gr.Progress()):
196
- path, msg = yt_dl.download(url, q, a, cfile, progress)
197
- return msg, path
198
-
199
- def transcript(url, _cfile):
200
- vid = extract_video_id(url)
201
- if not vid:
202
- return "❌ URL 오류"
203
  try:
204
- return fetch_transcript(vid)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  except Exception as e:
206
- return f"❌ 자막 오류: {e}"
207
 
208
- # ───────────────────────── UI ───────────────────────────────
209
- def create_ui():
210
- with gr.Blocks(theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader") as ui:
211
- gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader</h1>")
212
 
213
- # API
 
 
 
 
 
 
 
 
 
214
  with gr.Group():
 
215
  with gr.Row():
216
- api_key = gr.Textbox(type="password", label="Gemini API Key")
217
- api_btn = gr.Button("Configure")
218
- api_stat = gr.Textbox(label="API Status", value="❌ Gemini 미설정", interactive=False)
219
- api_btn.click(api_config, [api_key], [api_stat])
 
 
 
 
 
 
 
 
220
 
221
- # 입력
222
  with gr.Row():
223
- url = gr.Textbox(label="🔗 YouTube URL")
224
- cookie = gr.File(label="🍪 cookies.txt (선택)", type="filepath", file_types=[".txt"])
 
 
 
 
 
 
 
225
 
226
  with gr.Tabs():
 
 
 
 
 
 
 
 
 
 
 
227
 
228
- with gr.TabItem("📊 Analyze"):
229
- a_btn = gr.Button("Analyze", variant="primary")
230
- a_out = gr.Textbox(lines=20, label="Analysis", show_copy_button=True)
231
- a_btn.click(analyze, [url, cookie], a_out, show_progress=True)
232
-
233
- with gr.TabItem("⬇️ Download"):
234
  with gr.Row():
235
- q_dd = gr.Dropdown(["best", "720p", "480p"], value="best", label="Quality")
236
- a_cb = gr.Checkbox(label="Audio only (MP3)")
237
- d_btn = gr.Button("Download", variant="primary")
238
- d_stat = gr.Textbox(lines=5, label="Status")
239
- d_file = gr.File(label="File", visible=False)
240
-
241
- def wrap_dl(u,q,a,c,pg=gr.Progress()):
242
- msg, fp = download(u,q,a,c,pg)
243
- return msg, gr.update(value=fp, visible=bool(fp and os.path.exists(fp)))
244
- d_btn.click(wrap_dl, [url,q_dd,a_cb,cookie], [d_stat,d_file], show_progress=True)
245
-
246
- with gr.TabItem("🗒️ Transcript"):
247
- t_btn = gr.Button("Get Transcript", variant="primary")
248
- t_out = gr.Textbox(lines=30, label="Transcript", show_copy_button=True)
249
- t_btn.click(transcript, [url, cookie], t_out, show_progress=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  gr.HTML(
252
- f"""
253
  <div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
254
- <b>Tip</b> : <code>{DEFAULT_COOKIE_FILE.name}</code> 파일을 <b>app.py</b> 옆에 두면
255
- 자동으로 쿠키가 적용됩니다.
 
256
  </div>
257
  """
258
  )
259
- return ui
260
 
261
- # ───────────────────────── Entrypoint ───────────────────────
 
 
 
262
  if __name__ == "__main__":
263
- demo = create_ui()
264
  import atexit
265
 
266
- atexit.register(lambda: shutil.rmtree(yt_dl.temp_dir, ignore_errors=True))
267
- atexit.register(lambda: shutil.rmtree(yt_dl.temp_downloads, ignore_errors=True))
268
  demo.launch(debug=True, show_error=True)
 
1
  #!/usr/bin/env python3
 
2
  """
3
  YouTube Video Analyzer & Downloader Pro
4
+ (쿠키 자동 처리 버전)
5
+
6
+ · `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면
7
+ 자동으로 사용합니다.
8
+ · Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 **우선** 적용됩니다.
9
  """
10
 
11
+ # ──────────────────────────────────────────────────────────────
12
+ # 표준 라이브러리
13
+ # ──────────────────────────────────────────────────────────────
14
+ import os
15
+ import re
16
+ import json
17
+ import uuid
18
+ import shutil
19
+ import tempfile
20
+ from datetime import datetime
21
  from pathlib import Path
22
 
23
+ # ──────────────────────────────────────────────────────────────
24
+ # 외부 라이브러리
25
+ # ──────────────────────────────────────────────────────────────
26
  import gradio as gr
27
  import yt_dlp
28
  import google.generativeai as genai
29
+
30
+ # ──────────────────────────────────────────────────────────────
31
+ # 기본 쿠키 파일 경로 ― 파일명이 동일하면 자동 사용
32
+ # ──────────────────────────────────────────────────────────────
33
+ DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
34
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # =================================================================
37
  # Main Class
38
  # =================================================================
39
  class YouTubeDownloader:
40
  def __init__(self):
41
+ # 임시 디렉터리 (Gradio 호환)
42
+ self.download_dir = tempfile.mkdtemp()
43
+ self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
44
+
45
+ # 사용자 Downloads 하위 폴더
46
+ self.downloads_folder = os.path.join(
47
+ os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
48
+ )
49
+ os.makedirs(self.downloads_folder, exist_ok=True)
50
+
51
  self.gemini_model = None
52
 
53
+ # ---------------------------------------------------------
54
+ # Google Gemini API
55
+ # ---------------------------------------------------------
56
+ def configure_gemini(self, api_key):
57
  try:
58
  genai.configure(api_key=api_key)
59
+ self.gemini_model = genai.GenerativeModel(
60
+ model_name="gemini-1.5-flash-latest"
61
+ )
62
+ return True, "✅ Gemini API configured successfully!"
63
  except Exception as e:
64
+ return False, f"❌ Failed to configure Gemini API: {e}"
65
 
66
+ # ---------------------------------------------------------
67
+ # 임시 디렉터리 정리
68
+ # ---------------------------------------------------------
69
+ def cleanup(self):
70
+ try:
71
+ if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
72
+ shutil.rmtree(self.download_dir)
73
+ if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
74
+ shutil.rmtree(self.temp_downloads)
75
+ except Exception as e:
76
+ print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
77
+
78
+ # ---------------------------------------------------------
79
+ # 유튜브 URL 검증
80
+ # ---------------------------------------------------------
81
+ def is_valid_youtube_url(self, url):
82
+ youtube_regex = re.compile(
83
+ r"(https?://)?(www\.)?"
84
+ r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
85
+ r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
86
+ )
87
+ return youtube_regex.match(url) is not None
88
+
89
+ # ---------------------------------------------------------
90
+ # Gemini-AI 장면 분석
91
+ # ---------------------------------------------------------
92
+ def generate_scene_breakdown_gemini(self, video_info):
93
+ if not self.gemini_model:
94
+ return self.generate_scene_breakdown_fallback(video_info)
95
+
96
+ try:
97
+ duration = video_info.get("duration", 0)
98
+ title = video_info.get("title", "")
99
+ description = video_info.get("description", "")[:1500]
100
+
101
+ if not duration:
102
+ return [
103
+ "**[Duration Unknown]**: Unable to generate timestamped breakdown - "
104
+ "video duration not available"
105
+ ]
106
+
107
+ prompt = f"""
108
+ Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
109
+ with precise timestamps and specific descriptions:
110
+
111
+ Title: {title}
112
+ Duration: {duration} seconds
113
+ Description: {description}
114
+
115
+ IMPORTANT INSTRUCTIONS:
116
+ 1. Create detailed scene descriptions that include:
117
+ - Physical appearance of people (age, gender, clothing, hair, etc.)
118
+ - Exact actions being performed
119
+ - Dialogue or speech (include actual lines if audible, or infer probable spoken
120
+ lines based on actions and setting; format them as "Character: line…")
121
+ - Setting and environment details
122
+ - Props, objects, or products being shown
123
+ - Visual effects, text overlays, or graphics
124
+ - Mood, tone, and atmosphere
125
+ - Camera movements or angles (if apparent)
126
+ 2. Dialogue Emphasis:
127
+ - Include short dialogue lines in **every scene** wherever plausible.
128
+ - Write lines like: Character: "Actual or inferred line…"
129
+ - If dialogue is not available, intelligently infer probable phrases
130
+ (e.g., "Welcome!", "Try this now!", "It feels amazing!").
131
+ 3. Timestamp Guidelines:
132
+ - For videos under 1 minute: 2-3 second segments
133
+ - For videos 1-5 minutes: 3-5 second segments
134
+ - For videos 5-15 minutes: 5-10 second segments
135
+ - For videos over 15 minutes: 10-15 second segments
136
+ - Maximum 20 scenes total for longer videos
137
+ 4. Format each scene EXACTLY like this:
138
+ **[MM:SS-MM:SS]**: Detailed description…
139
+ 5. Write descriptions as if you're watching the video in real-time,
140
+ noting everything visible and audible.
141
+ """
142
+ response = self.gemini_model.generate_content(prompt)
143
+
144
+ if response and response.text:
145
+ scenes = []
146
+ lines = response.text.split("\n")
147
+ current_scene = ""
148
+
149
+ for line in lines:
150
+ line = line.strip()
151
+ if line.startswith("**[") and "]**:" in line:
152
+ if current_scene:
153
+ scenes.append(current_scene.strip())
154
+ current_scene = line
155
+ elif current_scene:
156
+ current_scene += "\n" + line
157
+
158
+ if current_scene:
159
+ scenes.append(current_scene.strip())
160
+
161
+ return (
162
+ scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
163
+ )
164
+
165
+ return self.generate_scene_breakdown_fallback(video_info)
166
+
167
+ except Exception as e:
168
+ print(f"Gemini API error: {e}")
169
+ return self.generate_scene_breakdown_fallback(video_info)
170
+
171
+ # ---------------------------------------------------------
172
+ # Fallback 장면 분석
173
+ # ---------------------------------------------------------
174
+ def generate_scene_breakdown_fallback(self, video_info):
175
+ duration = video_info.get("duration", 0)
176
+ title = video_info.get("title", "").lower()
177
+ description = video_info.get("description", "").lower()
178
+ uploader = video_info.get("uploader", "Content creator")
179
+
180
+ if not duration:
181
+ return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
182
+
183
+ if duration <= 60:
184
+ segment_length = 3
185
+ elif duration <= 300:
186
+ segment_length = 5
187
+ elif duration <= 900:
188
+ segment_length = 10
189
  else:
190
+ segment_length = 15
191
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  scenes = []
193
+ num_segments = min(duration // segment_length + 1, 20)
194
+ video_type = self.detect_video_type_detailed(title, description)
195
+
196
+ for i in range(num_segments):
197
+ start_time = i * segment_length
198
+ end_time = min(start_time + segment_length - 1, duration)
199
+
200
+ start_fmt = f"{start_time//60}:{start_time%60:02d}"
201
+ end_fmt = f"{end_time//60}:{end_time%60:02d}"
202
+ desc = self.generate_contextual_description(
203
+ i, num_segments, video_type, uploader, title
204
+ )
205
+ scenes.append(f"**[{start_fmt}-{end_fmt}]**: {desc}")
206
+
207
+ return scenes
208
+
209
+ # ---------------------------------------------------------
210
+ # 비디오 유형 감지(상세)
211
+ # ---------------------------------------------------------
212
+ def detect_video_type_detailed(self, title, description):
213
+ text = (title + " " + description).lower()
214
+ if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
215
+ return "tutorial"
216
+ if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
217
+ return "review"
218
+ if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
219
+ return "vlog"
220
+ if any(w in text for w in ["music", "song", "cover", "lyrics"]):
221
+ return "music"
222
+ if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
223
+ return "entertainment"
224
+ if any(w in text for w in ["news", "breaking", "update", "report"]):
225
+ return "news"
226
+ if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
227
+ return "cooking"
228
+ if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
229
+ return "fitness"
230
+ return "general"
231
+
232
+ # ---------------------------------------------------------
233
+ # 장면별 설명 생성
234
+ # ---------------------------------------------------------
235
+ def generate_contextual_description(
236
+ self, scene_index, total_scenes, video_type, uploader, title
237
+ ):
238
+ presenter_desc = "The content creator"
239
+ if "woman" in title or "girl" in title:
240
+ presenter_desc = "A woman"
241
+ elif "man" in title or "guy" in title:
242
+ presenter_desc = "A man"
243
+
244
+ if scene_index == 0:
245
+ if video_type == "tutorial":
246
+ return (
247
+ f"{presenter_desc} appears on screen, introducing themselves and the "
248
+ f"topic. They are in a well-lit workspace, wearing casual clothes."
249
+ )
250
+ if video_type == "vlog":
251
+ return (
252
+ f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
253
+ f"explains what today's vlog is about."
254
+ )
255
+ if video_type == "review":
256
+ return (
257
+ f"{presenter_desc} holds up the product to be reviewed, giving a brief "
258
+ f"overview of its features."
259
+ )
260
+ return (
261
+ f"{presenter_desc} starts the video with an engaging introduction to "
262
+ f"capture viewers' attention."
263
+ )
264
+
265
+ if scene_index == total_scenes - 1:
266
+ if video_type == "tutorial":
267
+ return (
268
+ f"{presenter_desc} shows the final result, thanks viewers, and "
269
+ f"encourages them to like and subscribe."
270
+ )
271
+ if video_type == "vlog":
272
+ return (
273
+ f"{presenter_desc} wraps up the day, sharing final thoughts and "
274
+ f"bidding farewell."
275
+ )
276
+ return (
277
+ f"{presenter_desc} concludes, summarizing key points and prompting "
278
+ f"engagement through likes and comments."
279
+ )
280
+
281
+ if video_type == "tutorial":
282
+ return (
283
+ f"{presenter_desc} demonstrates the next step, providing clear "
284
+ f"instructions with close-up shots."
285
+ )
286
+ if video_type == "review":
287
+ return (
288
+ f"{presenter_desc} examines a specific feature of the product, showing "
289
+ f"it in use and commenting on performance."
290
+ )
291
+ if video_type == "vlog":
292
+ return (
293
+ f"{presenter_desc} continues the day's activities, sharing candid "
294
+ f"moments and personal reflections."
295
+ )
296
+ if video_type == "cooking":
297
+ return (
298
+ f"{presenter_desc} prepares ingredients, chopping and mixing while "
299
+ f"explaining each step."
300
+ )
301
+ if video_type == "fitness":
302
+ return (
303
+ f"{presenter_desc} performs an exercise set, demonstrating proper form "
304
+ f"and offering tips."
305
+ )
306
+ return (
307
+ f"{presenter_desc} proceeds with the main content, engaging viewers through "
308
+ f"clear explanations."
309
+ )
310
+
311
+ # ---------------------------------------------------------
312
+ # 비디오 유형 (간략)
313
+ # ---------------------------------------------------------
314
+ def detect_video_type(self, title, description):
315
+ text = (title + " " + description).lower()
316
+ if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
317
+ return "🎵 Music Video"
318
+ if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
319
+ return "📚 Tutorial/Educational"
320
+ if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
321
+ return "🎭 Entertainment/Comedy"
322
+ if any(w in text for w in ["news", "breaking", "report", "update"]):
323
+ return "📰 News/Information"
324
+ if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
325
+ return "⭐ Review/Unboxing"
326
+ if any(w in text for w in ["commercial", "ad", "brand", "product"]):
327
+ return "📺 Commercial/Advertisement"
328
+ return "🎬 General Content"
329
+
330
+ # ---------------------------------------------------------
331
+ # 배경 음악 추정
332
+ # ---------------------------------------------------------
333
+ def detect_background_music(self, video_info):
334
+ title = video_info.get("title", "").lower()
335
+ if "music" in title or "song" in title:
336
+ return "🎵 Original Music/Soundtrack"
337
+ if "commercial" in title or "ad" in title:
338
+ return "🎶 Upbeat Commercial Music"
339
+ if "tutorial" in title or "how to" in title:
340
+ return "🔇 Minimal/No Background Music"
341
+ if "vlog" in title or "daily" in title:
342
+ return "🎼 Ambient Background Music"
343
+ return "🎵 Background Music"
344
+
345
+ # ---------------------------------------------------------
346
+ # 인플루언서 규모 추정
347
+ # ---------------------------------------------------------
348
+ def detect_influencer_status(self, video_info):
349
+ subs = video_info.get("channel_followers", 0)
350
+ views = video_info.get("view_count", 0)
351
+ if subs > 10_000_000:
352
+ return "🌟 Mega Influencer (10M+)"
353
+ if subs > 1_000_000:
354
+ return "⭐ Major Influencer (1M+)"
355
+ if subs > 100_000:
356
+ return "🎯 Mid-tier Influencer (100K+)"
357
+ if subs > 10_000:
358
+ return "📈 Micro Influencer (10K+)"
359
+ if views > 100_000:
360
+ return "🔥 Viral Content Creator"
361
+ return "👤 Regular Content Creator"
362
+
363
+ # ---------------------------------------------------------
364
+ # 숫자 포맷터
365
+ # ---------------------------------------------------------
366
+ def format_number(self, num):
367
+ if not num:
368
+ return "0"
369
+ if num >= 1_000_000_000:
370
+ return f"{num/1_000_000_000:.1f}B"
371
+ if num >= 1_000_000:
372
+ return f"{num/1_000_000:.1f}M"
373
+ if num >= 1_000:
374
+ return f"{num/1_000:.1f}K"
375
+ return str(num)
376
+
377
+ # ---------------------------------------------------------
378
+ # 최종 리포트 생성
379
+ # ---------------------------------------------------------
380
+ def format_video_info(self, video_info):
381
+ if not video_info:
382
+ return "❌ No video information available."
383
+
384
+ title = video_info.get("title", "Unknown")
385
+ uploader = video_info.get("uploader", "Unknown")
386
+ duration = video_info.get("duration", 0)
387
+ dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
388
+ views = video_info.get("view_count", 0)
389
+ likes = video_info.get("like_count", 0)
390
+ comments = video_info.get("comment_count", 0)
391
+ upload_date = video_info.get("upload_date", "Unknown")
392
+
393
+ if len(upload_date) == 8:
394
+ upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
395
+
396
+ scenes = self.generate_scene_breakdown_gemini(video_info)
397
+ vtype = self.detect_video_type(title, video_info.get("description", ""))
398
+ bgm = self.detect_background_music(video_info)
399
+ creator = self.detect_influencer_status(video_info)
400
+ engagement = (likes / views) * 100 if views else 0
401
+
402
+ report = f"""
403
+ 🎬 YOUTUBE VIDEO ANALYSIS REPORT
404
+ {'='*50}
405
+
406
+ 📋 BASIC INFORMATION
407
+ {'─'*25}
408
+ 📹 **Title:** {title}
409
+ 👤 **Uploader:** {uploader}
410
+ 📅 **Upload Date:** {upload_date}
411
+ ⏱️ **Duration:** {dur_str}
412
+ 🆔 **Video ID:** {video_info.get('id', 'Unknown')}
413
+
414
+ 📊 PERFORMANCE METRICS
415
+ {'─'*25}
416
+ 👀 **Views:** {self.format_number(views)} ({views:,})
417
+ 👍 **Likes:** {self.format_number(likes)} ({likes:,})
418
+ 💬 **Comments:** {self.format_number(comments)} ({comments:,})
419
+ 📈 **Engagement Rate:** {engagement:.2f}%
420
+
421
+ 🎯 CONTENT ANALYSIS
422
+ {'─'*25}
423
+ 📂 **Video Type:** {vtype}
424
+ 🎵 **Background Music:** {bgm}
425
+ 👑 **Creator Status:** {creator}
426
+
427
+ 🎬 DETAILED SCENE BREAKDOWN
428
+ {'─'*30}
429
+ {chr(10).join(scenes)}
430
+
431
+ 📝 DESCRIPTION PREVIEW
432
+ {'─'*25}
433
+ {video_info.get('description', 'No description available')[:500]}
434
+ {'...(truncated)' if len(video_info.get('description', '')) > 500 else ''}
435
+
436
+ {'='*50}
437
+ 📊 **Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
438
+ 🤖 **AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
439
+ """
440
+ return report.strip()
441
+
442
+ # ---------------------------------------------------------
443
+ # 메타데이터 추출
444
+ # ---------------------------------------------------------
445
+ def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
446
+ if not url or not url.strip():
447
+ return None, "❌ Please enter a YouTube URL"
448
+ if not self.is_valid_youtube_url(url):
449
+ return None, "❌ Invalid YouTube URL format"
450
+
451
+ # 쿠키 선택 순서: UI 업로드 → 기본 쿠키 → None
452
+ if cookiefile and os.path.exists(cookiefile):
453
+ cookiefile = cookiefile
454
+ elif DEFAULT_COOKIE_FILE.exists():
455
+ cookiefile = str(DEFAULT_COOKIE_FILE)
456
+ else:
457
+ cookiefile = None
458
+
459
  try:
460
+ progress(0.1, desc="Initializing YouTube extractor…")
461
+ ydl_opts = {"noplaylist": True, "extract_flat": False}
462
+ if cookiefile:
463
+ ydl_opts["cookiefile"] = cookiefile
464
+
465
+ progress(0.5, desc="Extracting video metadata…")
466
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
467
  info = ydl.extract_info(url, download=False)
468
+
469
+ progress(1.0, desc="✅ Analysis complete!")
470
+ return info, "✅ Video information extracted successfully"
471
+
472
  except Exception as e:
473
+ return None, f" Error: {e}"
474
 
475
+ # ---------------------------------------------------------
476
+ # 다운로드
477
+ # ---------------------------------------------------------
478
+ def download_video(
479
+ self,
480
+ url,
481
+ quality="best",
482
+ audio_only=False,
483
+ progress=gr.Progress(),
484
+ cookiefile=None,
485
  ):
486
+ if not url or not url.strip():
487
+ return None, "❌ Please enter a YouTube URL"
488
+ if not self.is_valid_youtube_url(url):
489
+ return None, "❌ Invalid YouTube URL format"
490
+
491
+ # 쿠키 선택 순서
492
+ if cookiefile and os.path.exists(cookiefile):
493
+ cookiefile = cookiefile
494
+ elif DEFAULT_COOKIE_FILE.exists():
495
+ cookiefile = str(DEFAULT_COOKIE_FILE)
 
 
 
 
 
 
 
496
  else:
497
+ cookiefile = None
498
+
 
 
 
499
  try:
500
+ progress(0.1, desc="Preparing download…")
501
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
502
+
503
+ ydl_opts = {
504
+ "outtmpl": os.path.join(
505
+ self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
506
+ ),
507
+ "noplaylist": True,
508
+ }
509
+
510
+ if audio_only:
511
+ ydl_opts["format"] = "bestaudio/best"
512
+ ydl_opts["postprocessors"] = [
513
+ {
514
+ "key": "FFmpegExtractAudio",
515
+ "preferredcodec": "mp3",
516
+ "preferredquality": "192",
517
+ }
518
+ ]
519
+ else:
520
+ if quality == "720p":
521
+ ydl_opts["format"] = "best[height<=720]"
522
+ elif quality == "480p":
523
+ ydl_opts["format"] = "best[height<=480]"
524
+ else: # "best"
525
+ ydl_opts["format"] = "best[height<=1080]"
526
+
527
+ if cookiefile:
528
+ ydl_opts["cookiefile"] = cookiefile
529
+
530
+ # 진행률 훅
531
+ def hook(d):
532
+ if d["status"] == "downloading":
533
+ if "total_bytes" in d:
534
+ pct = d["downloaded_bytes"] / d["total_bytes"] * 100
535
+ progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
536
+ else:
537
+ progress(0.5, desc="Downloading…")
538
+ elif d["status"] == "finished":
539
+ progress(0.8, desc="Processing download…")
540
+
541
+ ydl_opts["progress_hooks"] = [hook]
542
+
543
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
544
  ydl.extract_info(url, download=True)
545
+
546
+ progress(0.9, desc="Copying to Downloads folder…")
547
+
548
+ # temp 디렉터리에서 파일 찾기
549
+ downloaded_temp = None
550
+ for f in os.listdir(self.temp_downloads):
551
+ if timestamp in f:
552
+ downloaded_temp = os.path.join(self.temp_downloads, f)
553
+ break
554
+
555
+ if not downloaded_temp:
556
+ return None, "❌ Downloaded file not found"
557
+
558
+ final_name = os.path.basename(downloaded_temp)
559
+ final_path = os.path.join(self.downloads_folder, final_name)
560
+
561
+ try:
562
+ shutil.copy2(downloaded_temp, final_path)
563
+ saved = True
564
+ except Exception as e:
565
+ print(f"Copy warning: {e}")
566
+ saved = False
567
+ final_path = "File kept only in temp folder"
568
+
569
+ progress(1.0, desc="✅ Download complete!")
570
+
571
+ msg = (
572
+ "✅ Download successful!\n"
573
+ f"📁 Temp file: {os.path.basename(downloaded_temp)}\n"
574
+ f"📁 Saved to: {final_path if saved else 'Copy failed'}\n"
575
+ f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
576
+ )
577
+ return downloaded_temp, msg
578
+
579
  except Exception as e:
580
+ return None, f"❌ Download failed: {e}"
581
+
582
+
583
+ # =================================================================
584
+ # Helper functions for Gradio
585
+ # =================================================================
586
+ downloader = YouTubeDownloader()
587
 
 
 
588
 
589
+ def configure_api_key(api_key):
590
+ if not api_key or not api_key.strip():
591
+ return "❌ Please enter a valid Google API key", gr.update(visible=False)
592
+ ok, msg = downloader.configure_gemini(api_key.strip())
593
  return msg, gr.update(visible=ok)
594
 
595
+
596
+ def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
  try:
598
+ progress(0.05, desc="Starting analysis…")
599
+ cookiefile = cookies_file if cookies_file else None
600
+ info, msg = downloader.get_video_info(
601
+ url, progress=progress, cookiefile=cookiefile
602
+ )
603
+ if info:
604
+ progress(0.95, desc="Generating report…")
605
+ return downloader.format_video_info(info)
606
+ return f"❌ Analysis Failed: {msg}"
607
+ except Exception as e:
608
+ return f"❌ System Error: {e}"
609
+
610
+
611
+ def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
612
+ try:
613
+ progress(0.05, desc="Preparing download…")
614
+ cookiefile = cookies_file if cookies_file else None
615
+ file_path, status = downloader.download_video(
616
+ url, quality, audio_only, progress=progress, cookiefile=cookiefile
617
+ )
618
+ return (file_path, status) if file_path else (None, status)
619
  except Exception as e:
620
+ return None, f"❌ System Error: {e}"
621
 
 
 
 
 
622
 
623
+ # =================================================================
624
+ # Gradio UI
625
+ # =================================================================
626
+ def create_interface():
627
+ with gr.Blocks(
628
+ theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro"
629
+ ) as iface:
630
+ gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>")
631
+
632
+ # API 섹션
633
  with gr.Group():
634
+ gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>")
635
  with gr.Row():
636
+ api_key_in = gr.Textbox(
637
+ label="🔑 Google API Key",
638
+ placeholder="Paste your Google API key…",
639
+ type="password",
640
+ )
641
+ api_btn = gr.Button("🔧 Configure API", variant="secondary")
642
+ api_status = gr.Textbox(
643
+ label="API Status",
644
+ value="❌ Gemini API not configured – Using fallback analysis",
645
+ interactive=False,
646
+ lines=1,
647
+ )
648
 
649
+ # 메인 UI
650
  with gr.Row():
651
+ url_in = gr.Textbox(
652
+ label="🔗 YouTube URL",
653
+ placeholder="Paste YouTube video URL…",
654
+ )
655
+ cookies_in = gr.File(
656
+ label="🍪 Upload cookies.txt (optional)",
657
+ file_types=[".txt"],
658
+ type="filepath",
659
+ )
660
 
661
  with gr.Tabs():
662
+ with gr.TabItem("📊 Video Analysis"):
663
+ analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
664
+ analysis_out = gr.Textbox(
665
+ label="📊 Analysis Report", lines=25, show_copy_button=True
666
+ )
667
+ analyze_btn.click(
668
+ fn=analyze_with_cookies,
669
+ inputs=[url_in, cookies_in],
670
+ outputs=analysis_out,
671
+ show_progress=True,
672
+ )
673
 
674
+ with gr.TabItem("⬇️ Video Download"):
 
 
 
 
 
675
  with gr.Row():
676
+ quality_dd = gr.Dropdown(
677
+ choices=["best", "720p", "480p"],
678
+ value="best",
679
+ label="📺 Quality",
680
+ )
681
+ audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)")
682
+ download_btn = gr.Button("⬇️ Download Video", variant="primary")
683
+ dl_status = gr.Textbox(
684
+ label="📥 Download Status", lines=5, show_copy_button=True
685
+ )
686
+ dl_file = gr.File(label="📁 Downloaded File", visible=False)
687
+
688
+ def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
689
+ fp, st = download_with_cookies(url, q, a, cfile, progress)
690
+ if fp and os.path.exists(fp):
691
+ return st, gr.update(value=fp, visible=True)
692
+ return st, gr.update(visible=False)
693
+
694
+ download_btn.click(
695
+ fn=wrapped_download,
696
+ inputs=[url_in, quality_dd, audio_cb, cookies_in],
697
+ outputs=[dl_status, dl_file],
698
+ show_progress=True,
699
+ )
700
+
701
+ # API 버튼 동작
702
+ api_btn.click(
703
+ fn=configure_api_key,
704
+ inputs=[api_key_in],
705
+ outputs=[api_status],
706
+ )
707
 
708
  gr.HTML(
709
+ """
710
  <div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
711
+ <h3>💡 Tip: 쿠키 파일 자동 사용</h3>
712
+ <p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은
713
+ 폴더에 두면 자동으로 사용됩니다. 주기적으로 새 파일로 교체해 주세요.</p>
714
  </div>
715
  """
716
  )
717
+ return iface
718
 
719
+
720
+ # =================================================================
721
+ # Entrypoint
722
+ # =================================================================
723
  if __name__ == "__main__":
724
+ demo = create_interface()
725
  import atexit
726
 
727
+ atexit.register(downloader.cleanup)
 
728
  demo.launch(debug=True, show_error=True)