fantaxy commited on
Commit
50f280e
·
verified ·
1 Parent(s): 53d3f76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -951
app.py CHANGED
@@ -1,959 +1,35 @@
1
- #!/usr/bin/env python3
2
- """
3
- YouTube Video Analyzer & Downloader Pro
4
- (쿠키 자동 처리 버전)
5
-
6
- · `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면
7
- 자동으로 사용합니다.
8
- · Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 **우선** 적용됩니다.
9
- """
10
-
11
- # ──────────────────────────────────────────────────────────────
12
- # 표준 라이브러리
13
- # ──────────────────────────────────────────────────────────────
14
  import os
15
- import re
16
- import json
17
- import uuid
18
- import shutil
19
- import tempfile
20
- from datetime import datetime
21
- from pathlib import Path
22
-
23
- # ──────────────────────────────────────────────────────────────
24
- # 외부 라이브러리
25
- # ──────────────────────────────────────────────────────────────
26
- import gradio as gr
27
- import yt_dlp
28
- import google.generativeai as genai
29
-
30
- # ──────────────────────────────────────────────────────────────
31
- # 기본 쿠키 파일 경로 ― 파일명이 동일하면 자동 사용
32
- # ──────────────────────────────────────────────────────────────
33
- DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
34
-
35
-
36
- # =================================================================
37
- # Main Class
38
- # =================================================================
39
- class YouTubeDownloader:
40
- def __init__(self):
41
- # 임시 디렉터리 (Gradio 호환)
42
- self.download_dir = tempfile.mkdtemp()
43
- self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
44
-
45
- # 사용자 Downloads 하위 폴더
46
- self.downloads_folder = os.path.join(
47
- os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
48
- )
49
- os.makedirs(self.downloads_folder, exist_ok=True)
50
-
51
- self.gemini_model = None
52
-
53
- # ---------------------------------------------------------
54
- # Google Gemini API
55
- # ---------------------------------------------------------
56
- def configure_gemini(self, api_key):
57
- try:
58
- genai.configure(api_key=api_key)
59
- self.gemini_model = genai.GenerativeModel(
60
- model_name="gemini-1.5-flash-latest"
61
- )
62
- return True, "✅ Gemini API configured successfully!"
63
- except Exception as e:
64
- return False, f"❌ Failed to configure Gemini API: {e}"
65
-
66
- # ---------------------------------------------------------
67
- # 임시 디렉터리 정리
68
- # ---------------------------------------------------------
69
- def cleanup(self):
70
- try:
71
- if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
72
- shutil.rmtree(self.download_dir)
73
- if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
74
- shutil.rmtree(self.temp_downloads)
75
- except Exception as e:
76
- print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
77
-
78
- # ---------------------------------------------------------
79
- # 유튜브 URL 검증
80
- # ---------------------------------------------------------
81
- def is_valid_youtube_url(self, url):
82
- youtube_regex = re.compile(
83
- r"(https?://)?(www\.)?"
84
- r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
85
- r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
86
- )
87
- return youtube_regex.match(url) is not None
88
-
89
- # ---------------------------------------------------------
90
- # Gemini-AI 음성/대사 추출 (한글 우선)
91
- # ---------------------------------------------------------
92
- def generate_scene_breakdown_gemini(self, video_info):
93
- if not self.gemini_model:
94
- return self.generate_scene_breakdown_fallback(video_info)
95
-
96
- try:
97
- duration = video_info.get("duration", 0)
98
- title = video_info.get("title", "")
99
- description = video_info.get("description", "")[:1500]
100
-
101
- if not duration:
102
- return {
103
- "korean": ["**[재생시간 알 수 없음]**: 비디오 재생시간을 확인할 수 없어 타임스탬프를 생성할 수 없습니다"],
104
- "english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
105
- }
106
-
107
- prompt = f"""
108
- 이 YouTube 비디오의 음성/대사를 타임스탬프별로 추출해주세요.
109
-
110
- 제목: {title}
111
- 재생시간: {duration}초
112
- 설명: {description}
113
-
114
- 매우 중요한 지침:
115
- 1. 실제 영상에서 들리는 대사, 내레이션, 음성을 그대로 적어주세요
116
- 2. 장면 설명이 아닌 실제 음성 내용만 작성하세요
117
- 3. 음성이 없는 부분은 (...) 또는 (배경음악) 등으로 표시
118
- 4. 타임스탬프 가이드라인:
119
- - 대사나 내레이션이 시작하고 끝나는 지점 기준
120
- - 연속된 대사는 하나로 묶어서 표시
121
- - 최대한 자연스러운 단위로 구분
122
- 5. 형식:
123
- **[MM:SS-MM:SS]**: "실제 대사나 내레이션 내용"
124
- **[MM:SS-MM:SS]**: (배경음악) 또는 (...장면 전환...)
125
- 6. 모든 음성 내용을 빠짐없이 적어주세요
126
- 7. 자막이나 화면에 표시된 텍스트도 포함하세요
127
- 8. 장면 설명은 절대 하지 마세요. 오직 음성과 텍스트만 추출하세요.
128
-
129
- 예시:
130
- **[00:00-00:05]**: "안녕하세요. 오늘은 미륵산에서 발견된 백제 유적에 대해 알아보겠습니다."
131
- **[00:05-00:08]**: (배경음악)
132
- **[00:08-00:15]**: "미륵사지를 품고 있는 익산 미륵산의 정상부에서 백제시대에 만든 것으로 추정되는 저수조가 발굴됐습니다."
133
- **[00:15-00:18]**: (인터뷰 준비 중...)
134
- **[00:18-00:25]**: [이도학 교수] "이번 발굴은 백제 역사 연구에 중요한 전환점이 될 것입니다."
135
-
136
- 뉴스의 경우:
137
- - 앵커나 기자의 멘트는 그대로 적기
138
- - 인터뷰는 [인터뷰이 이름] "내용" 형식으로
139
- - 자막은 [자막] 내용 형식으로
140
- """
141
- response = self.gemini_model.generate_content(prompt)
142
-
143
- korean_scenes = []
144
- if response and response.text:
145
- lines = response.text.split("\n")
146
- current_scene = ""
147
-
148
- for line in lines:
149
- line = line.strip()
150
- if line.startswith("**[") and "]**:" in line:
151
- if current_scene:
152
- korean_scenes.append(current_scene.strip())
153
- current_scene = line
154
- elif current_scene:
155
- current_scene += "\n" + line
156
-
157
- if current_scene:
158
- korean_scenes.append(current_scene.strip())
159
-
160
- # 영어 번역 생성
161
- english_prompt = f"""
162
- Translate the following Korean speech/dialogue transcription to English, maintaining the exact same timestamps.
163
- Translate ONLY the actual speech content, not descriptions:
164
-
165
- {chr(10).join(korean_scenes)}
166
-
167
- Important rules:
168
- - Keep the format exactly the same: **[MM:SS-MM:SS]**: "English translation of speech"
169
- - For non-speech parts like (배경음악), translate as (background music)
170
- - For (...) keep as is
171
- - For interview tags like [이도학 교수], translate as [Professor Lee Do-hak]
172
- - For [자막], translate as [Subtitle]
173
- - Keep quotation marks for actual speech
174
- - Do NOT add any scene descriptions or explanations
175
- """
176
- english_response = self.gemini_model.generate_content(english_prompt)
177
-
178
- english_scenes = []
179
- if english_response and english_response.text:
180
- lines = english_response.text.split("\n")
181
- current_scene = ""
182
-
183
- for line in lines:
184
- line = line.strip()
185
- if line.startswith("**[") and "]**:" in line:
186
- if current_scene:
187
- english_scenes.append(current_scene.strip())
188
- current_scene = line
189
- elif current_scene:
190
- current_scene += "\n" + line
191
-
192
- if current_scene:
193
- english_scenes.append(current_scene.strip())
194
-
195
- # 중국어 번역 생성
196
- chinese_prompt = f"""
197
- 将以下韩语语音/对话转录翻译成中文,保持完全相同的时间戳。
198
- 只翻译实际的语音内容,不要描述:
199
-
200
- {chr(10).join(korean_scenes)}
201
-
202
- 重要规则:
203
- - 保持格式完全相同:**[MM:SS-MM:SS]**: "语音的中文翻译"
204
- - 对于像(배경음악)这样的非语音部分,翻译为(背景音乐)
205
- - 对于(...)保持原样
206
- - 对于像[이도학 교수]这样的采访标签,翻译为[李道学教授]
207
- - 对于[자막],翻译为[字幕]
208
- - 保留实际语音的引号
209
- - 不要添加任何场景描述或解释
210
- """
211
- chinese_response = self.gemini_model.generate_content(chinese_prompt)
212
-
213
- chinese_scenes = []
214
- if chinese_response and chinese_response.text:
215
- lines = chinese_response.text.split("\n")
216
- current_scene = ""
217
-
218
- for line in lines:
219
- line = line.strip()
220
- if line.startswith("**[") and "]**:" in line:
221
- if current_scene:
222
- chinese_scenes.append(current_scene.strip())
223
- current_scene = line
224
- elif current_scene:
225
- current_scene += "\n" + line
226
-
227
- if current_scene:
228
- chinese_scenes.append(current_scene.strip())
229
-
230
- # 태국어 번역 생성
231
- thai_prompt = f"""
232
- แปลคำบรรยายเสียง/บทสนทนาภาษาเกาหลีต่อไปนี้เป็นภาษาไทย โดยคงรูปแบบเวลาเดิมไว้
233
- แปลเฉพาะเนื้อหาเสียงจริงเท่านั้น ไม่ต้องบรรยาย:
234
-
235
- {chr(10).join(korean_scenes)}
236
-
237
- กฎสำคัญ:
238
- - คงรูปแบบเดิมไว้: **[MM:SS-MM:SS]**: "คำแปลภาษาไทยของเสียงพูด"
239
- - สำหรับส่วนที่ไม่ใช่เสียงพูด เช่น (배경음악) แปลเป็น (เพลงประกอบ)
240
- - สำหรับ (...) ให้คงเดิม
241
- - สำหรับป้ายสัมภาษณ์ เช่น [이도학 교수] แปลเป็น [ศาสตราจารย์ อี โด-ฮัก]
242
- - สำหรับ [자막] แปลเป็น [คำบรรยาย]
243
- - คงเครื่องหมายคำพูดสำหรับเสียงพูดจริง
244
- - ไม่ต้องเพิ่มคำบรรยายฉากหรือคำอธิบายใดๆ
245
- """
246
- thai_response = self.gemini_model.generate_content(thai_prompt)
247
-
248
- thai_scenes = []
249
- if thai_response and thai_response.text:
250
- lines = thai_response.text.split("\n")
251
- current_scene = ""
252
-
253
- for line in lines:
254
- line = line.strip()
255
- if line.startswith("**[") and "]**:" in line:
256
- if current_scene:
257
- thai_scenes.append(current_scene.strip())
258
- current_scene = line
259
- elif current_scene:
260
- current_scene += "\n" + line
261
-
262
- if current_scene:
263
- thai_scenes.append(current_scene.strip())
264
-
265
- # 러시아어 번역 생성
266
- russian_prompt = f"""
267
- Переведите следующую корейскую транскрипцию речи/диалога на русский язык, сохраняя точно такие же временные метки.
268
- Переводите ТОЛЬКО фактическое содержание речи, а не описания:
269
-
270
- {chr(10).join(korean_scenes)}
271
-
272
- Важные правила:
273
- - Сохраняйте формат точно таким же: **[MM:SS-MM:SS]**: "Русский перевод речи"
274
- - Для частей без речи, таких как (배경음악), переведите как (фоновая музыка)
275
- - Для (...) оставьте как есть
276
- - Для тегов интервью, таких как [이도학 교수], переведите как [Профессор Ли До Хак]
277
- - Для [자막] переведите как [Субтитры]
278
- - Сохраняйте кавычки для фактической речи
279
- - НЕ добавляйте никаких описаний сцен или объяснений
280
- """
281
- russian_response = self.gemini_model.generate_content(russian_prompt)
282
-
283
- russian_scenes = []
284
- if russian_response and russian_response.text:
285
- lines = russian_response.text.split("\n")
286
- current_scene = ""
287
-
288
- for line in lines:
289
- line = line.strip()
290
- if line.startswith("**[") and "]**:" in line:
291
- if current_scene:
292
- russian_scenes.append(current_scene.strip())
293
- current_scene = line
294
- elif current_scene:
295
- current_scene += "\n" + line
296
-
297
- if current_scene:
298
- russian_scenes.append(current_scene.strip())
299
-
300
- return {
301
- "korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"],
302
- "english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"],
303
- "chinese": chinese_scenes if chinese_scenes else [],
304
- "thai": thai_scenes if thai_scenes else [],
305
- "russian": russian_scenes if russian_scenes else []
306
- }
307
-
308
- except Exception as e:
309
- print(f"Gemini API error: {e}")
310
- return self.generate_scene_breakdown_fallback(video_info)
311
 
312
- # ---------------------------------------------------------
313
- # Fallback 음성/대사 추출 (5개 언어)
314
- # ---------------------------------------------------------
315
- def generate_scene_breakdown_fallback(self, video_info):
316
- duration = video_info.get("duration", 0)
317
- title = video_info.get("title", "").lower()
318
- description = video_info.get("description", "").lower()
319
- uploader = video_info.get("uploader", "콘텐츠 제작자")
320
-
321
- if not duration:
322
- return {
323
- "korean": ["**[재생시간 알 수 없음]**: 타임스탬프를 생성할 수 없습니다"],
324
- "english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"],
325
- "chinese": ["**[持续时间未知]**: 无法生成带时间戳的分解"],
326
- "thai": ["**[ไม่ทราบระยะเวลา]**: ไม่สามารถสร้างการแบ่งส่วนตามเวลาได้"],
327
- "russian": ["**[Продолжительность неизвестна]**: Невозможно создать временную разбивку"]
328
- }
329
-
330
- # 비디오 타입에 따른 대사 템플릿
331
- if duration <= 60:
332
- segment_length = 10
333
- elif duration <= 300:
334
- segment_length = 15
335
- elif duration <= 900:
336
- segment_length = 20
337
- else:
338
- segment_length = 30
339
-
340
- korean_scenes = []
341
- english_scenes = []
342
- chinese_scenes = []
343
- thai_scenes = []
344
- russian_scenes = []
345
 
346
- num_segments = min(duration // segment_length + 1, 20)
347
-
348
- for i in range(num_segments):
349
- start_time = i * segment_length
350
- end_time = min(start_time + segment_length - 1, duration)
351
-
352
- start_fmt = f"{start_time//60}:{start_time%60:02d}"
353
- end_fmt = f"{end_time//60}:{end_time%60:02d}"
354
-
355
- # 음성 추출이 불가능한 경우의 기본 템플릿
356
- if i == 0:
357
- korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (음성 추출을 위해 Gemini API가 필요합니다. 인트로 부분...)")
358
- english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Gemini API required for speech extraction. Intro section...)")
359
- chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (需要Gemini API进行语音提取。介绍部分...)")
360
- thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ต้องใช้ Gemini API สำหรับการดึงเสียง ส่วนเปิดตัว...)")
361
- russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Требуется Gemini API для извлечения речи. Вступительная часть...)")
362
- elif i == num_segments - 1:
363
- korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (아웃트로 부분...)")
364
- english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Outro section...)")
365
- chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (结尾部分...)")
366
- thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ส่วนจบ...)")
367
- russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Заключительная часть...)")
368
- else:
369
- korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (본문 내용...)")
370
- english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Main content...)")
371
- chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (主要内容...)")
372
- thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (เนื้อหาหลัก...)")
373
- russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Основное содержание...)")
374
-
375
- return {
376
- "korean": korean_scenes,
377
- "english": english_scenes,
378
- "chinese": chinese_scenes,
379
- "thai": thai_scenes,
380
- "russian": russian_scenes
381
- }
382
-
383
- # ---------------------------------------------------------
384
- # 비디오 유형 감지(상세)
385
- # ---------------------------------------------------------
386
- def detect_video_type_detailed(self, title, description):
387
- text = (title + " " + description).lower()
388
- if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
389
- return "tutorial"
390
- if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
391
- return "review"
392
- if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
393
- return "vlog"
394
- if any(w in text for w in ["music", "song", "cover", "lyrics"]):
395
- return "music"
396
- if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
397
- return "entertainment"
398
- if any(w in text for w in ["news", "breaking", "update", "report"]):
399
- return "news"
400
- if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
401
- return "cooking"
402
- if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
403
- return "fitness"
404
- return "general"
405
-
406
- # ---------------------------------------------------------
407
- # 장면별 설명 생성 (한글)
408
- # ---------------------------------------------------------
409
- def generate_contextual_description_korean(
410
- self, scene_index, total_scenes, video_type, uploader, title
411
- ):
412
- presenter_desc = "콘텐츠 제작자"
413
- if "woman" in title or "girl" in title or "여성" in title or "여자" in title:
414
- presenter_desc = "여성 진행자"
415
- elif "man" in title or "guy" in title or "남성" in title or "남자" in title:
416
- presenter_desc = "남성 진행자"
417
-
418
- if scene_index == 0:
419
- if video_type == "tutorial":
420
- return f"{presenter_desc}가 화면에 등장하여 자신을 소개하고 주제를 설명합니다. 밝은 조명의 작업 공간에서 캐주얼한 옷을 입고 있습니다."
421
- if video_type == "vlog":
422
- return f"{presenter_desc}가 카메라를 향해 밝게 인사하며 오늘의 브이로그 주제를 설명합니다."
423
- if video_type == "review":
424
- return f"{presenter_desc}가 리뷰할 제품을 들어 보이며 간단한 특징을 소개합니다."
425
- return f"{presenter_desc}가 시청자의 관심을 끌기 위한 매력적인 인트로로 영상을 시작합니다."
426
-
427
- if scene_index == total_scenes - 1:
428
- if video_type == "tutorial":
429
- return f"{presenter_desc}가 최종 결과물을 보여주고 시청자에게 감사 인사를 전하며 좋아요와 구독을 부탁합니다."
430
- if video_type == "vlog":
431
- return f"{presenter_desc}가 하루를 마무리하며 최종 생각을 공유하고 작별 인사를 합니다."
432
- return f"{presenter_desc}가 주요 내용을 요약하고 좋아요와 댓글을 통한 참여를 독려하며 마무리합니다."
433
-
434
- if video_type == "tutorial":
435
- return f"{presenter_desc}가 다음 단계를 시연하며 클로즈업 샷과 함께 명확한 지침을 제공합니다."
436
- if video_type == "review":
437
- return f"{presenter_desc}가 제품의 특정 기능을 살펴보며 사용 모습을 보여주고 성능에 대해 설명합니다."
438
- if video_type == "vlog":
439
- return f"{presenter_desc}가 일상 활동을 계속하며 솔직한 순간과 개인적인 생각을 공유합니다."
440
- if video_type == "cooking":
441
- return f"{presenter_desc}가 재료를 준비하며 자르고 섞으면서 각 단계를 설명합니다."
442
- if video_type == "fitness":
443
- return f"{presenter_desc}가 운동 세트를 수행하며 올바른 자세를 시연하고 팁을 제공합니다."
444
- return f"{presenter_desc}가 명확한 설명을 통해 시청자와 소통하며 주요 콘텐츠를 진행합니다."
445
-
446
- # ---------------------------------------------------------
447
- # 장면별 설명 생성 (영어 - 기존 메서드 유지)
448
- # ---------------------------------------------------------
449
- def generate_contextual_description(
450
- self, scene_index, total_scenes, video_type, uploader, title
451
- ):
452
- presenter_desc = "The content creator"
453
- if "woman" in title or "girl" in title:
454
- presenter_desc = "A woman"
455
- elif "man" in title or "guy" in title:
456
- presenter_desc = "A man"
457
-
458
- if scene_index == 0:
459
- if video_type == "tutorial":
460
- return (
461
- f"{presenter_desc} appears on screen, introducing themselves and the "
462
- f"topic. They are in a well-lit workspace, wearing casual clothes."
463
- )
464
- if video_type == "vlog":
465
- return (
466
- f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
467
- f"explains what today's vlog is about."
468
- )
469
- if video_type == "review":
470
- return (
471
- f"{presenter_desc} holds up the product to be reviewed, giving a brief "
472
- f"overview of its features."
473
- )
474
- return (
475
- f"{presenter_desc} starts the video with an engaging introduction to "
476
- f"capture viewers' attention."
477
- )
478
-
479
- if scene_index == total_scenes - 1:
480
- if video_type == "tutorial":
481
- return (
482
- f"{presenter_desc} shows the final result, thanks viewers, and "
483
- f"encourages them to like and subscribe."
484
- )
485
- if video_type == "vlog":
486
- return (
487
- f"{presenter_desc} wraps up the day, sharing final thoughts and "
488
- f"bidding farewell."
489
- )
490
- return (
491
- f"{presenter_desc} concludes, summarizing key points and prompting "
492
- f"engagement through likes and comments."
493
- )
494
-
495
- if video_type == "tutorial":
496
- return (
497
- f"{presenter_desc} demonstrates the next step, providing clear "
498
- f"instructions with close-up shots."
499
- )
500
- if video_type == "review":
501
- return (
502
- f"{presenter_desc} examines a specific feature of the product, showing "
503
- f"it in use and commenting on performance."
504
- )
505
- if video_type == "vlog":
506
- return (
507
- f"{presenter_desc} continues the day's activities, sharing candid "
508
- f"moments and personal reflections."
509
- )
510
- if video_type == "cooking":
511
- return (
512
- f"{presenter_desc} prepares ingredients, chopping and mixing while "
513
- f"explaining each step."
514
- )
515
- if video_type == "fitness":
516
- return (
517
- f"{presenter_desc} performs an exercise set, demonstrating proper form "
518
- f"and offering tips."
519
- )
520
- return (
521
- f"{presenter_desc} proceeds with the main content, engaging viewers through "
522
- f"clear explanations."
523
- )
524
-
525
- # ---------------------------------------------------------
526
- # 비디오 유형 (간략)
527
- # ---------------------------------------------------------
528
- def detect_video_type(self, title, description):
529
- text = (title + " " + description).lower()
530
- if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
531
- return "🎵 Music Video"
532
- if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
533
- return "📚 Tutorial/Educational"
534
- if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
535
- return "🎭 Entertainment/Comedy"
536
- if any(w in text for w in ["news", "breaking", "report", "update"]):
537
- return "📰 News/Information"
538
- if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
539
- return "⭐ Review/Unboxing"
540
- if any(w in text for w in ["commercial", "ad", "brand", "product"]):
541
- return "📺 Commercial/Advertisement"
542
- return "🎬 General Content"
543
-
544
- # ---------------------------------------------------------
545
- # 배경 음악 추정
546
- # ---------------------------------------------------------
547
- def detect_background_music(self, video_info):
548
- title = video_info.get("title", "").lower()
549
- if "music" in title or "song" in title:
550
- return "🎵 Original Music/Soundtrack"
551
- if "commercial" in title or "ad" in title:
552
- return "🎶 Upbeat Commercial Music"
553
- if "tutorial" in title or "how to" in title:
554
- return "🔇 Minimal/No Background Music"
555
- if "vlog" in title or "daily" in title:
556
- return "🎼 Ambient Background Music"
557
- return "🎵 Background Music"
558
-
559
- # ---------------------------------------------------------
560
- # 인플루언서 규모 추정
561
- # ---------------------------------------------------------
562
- def detect_influencer_status(self, video_info):
563
- subs = video_info.get("channel_followers", 0)
564
- views = video_info.get("view_count", 0)
565
- if subs > 10_000_000:
566
- return "🌟 Mega Influencer (10M+)"
567
- if subs > 1_000_000:
568
- return "⭐ Major Influencer (1M+)"
569
- if subs > 100_000:
570
- return "🎯 Mid-tier Influencer (100K+)"
571
- if subs > 10_000:
572
- return "📈 Micro Influencer (10K+)"
573
- if views > 100_000:
574
- return "🔥 Viral Content Creator"
575
- return "👤 Regular Content Creator"
576
-
577
- # ---------------------------------------------------------
578
- # 숫자 포맷터
579
- # ---------------------------------------------------------
580
- def format_number(self, num):
581
- if not num:
582
- return "0"
583
- if num >= 1_000_000_000:
584
- return f"{num/1_000_000_000:.1f}B"
585
- if num >= 1_000_000:
586
- return f"{num/1_000_000:.1f}M"
587
- if num >= 1_000:
588
- return f"{num/1_000:.1f}K"
589
- return str(num)
590
-
591
- # ---------------------------------------------------------
592
- # 최종 리포트 생성 (5개 언어)
593
- # ---------------------------------------------------------
594
- def format_video_info(self, video_info):
595
- if not video_info:
596
- return "❌ No video information available."
597
-
598
- title = video_info.get("title", "Unknown")
599
- uploader = video_info.get("uploader", "Unknown")
600
- duration = video_info.get("duration", 0)
601
- dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
602
- views = video_info.get("view_count", 0)
603
- likes = video_info.get("like_count", 0)
604
- comments = video_info.get("comment_count", 0)
605
- upload_date = video_info.get("upload_date", "Unknown")
606
-
607
- if len(upload_date) == 8:
608
- upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
609
-
610
- scene_data = self.generate_scene_breakdown_gemini(video_info)
611
- korean_scenes = scene_data.get("korean", [])
612
- english_scenes = scene_data.get("english", [])
613
- chinese_scenes = scene_data.get("chinese", [])
614
- thai_scenes = scene_data.get("thai", [])
615
- russian_scenes = scene_data.get("russian", [])
616
 
617
- vtype = self.detect_video_type(title, video_info.get("description", ""))
618
- bgm = self.detect_background_music(video_info)
619
- creator = self.detect_influencer_status(video_info)
620
- engagement = (likes / views) * 100 if views else 0
621
-
622
- report = f"""
623
- 🎬 YOUTUBE VIDEO ANALYSIS REPORT
624
- {'='*50}
625
-
626
- 📋 기본 정보 / BASIC INFORMATION
627
- {'─'*25}
628
- 📹 **제목/Title:** {title}
629
- 👤 **업로더/Uploader:** {uploader}
630
- 📅 **업로드 날짜/Upload Date:** {upload_date}
631
- ⏱️ **재생시간/Duration:** {dur_str}
632
- 🆔 **비디오 ID/Video ID:** {video_info.get('id', 'Unknown')}
633
-
634
- 📊 성과 지표 / PERFORMANCE METRICS
635
- {'─'*25}
636
- 👀 **조회수/Views:** {self.format_number(views)} ({views:,})
637
- 👍 **좋아요/Likes:** {self.format_number(likes)} ({likes:,})
638
- 💬 **댓글/Comments:** {self.format_number(comments)} ({comments:,})
639
- 📈 **참여율/Engagement Rate:** {engagement:.2f}%
640
-
641
- 🎯 콘텐츠 분석 / CONTENT ANALYSIS
642
- {'─'*25}
643
- 📂 **비디오 유형/Video Type:** {vtype}
644
- 🎵 **배경음악/Background Music:** {bgm}
645
- 👑 **제작자 상태/Creator Status:** {creator}
646
-
647
- 🎙️ 음성/대사 추출 (한국어) / SPEECH/DIALOGUE EXTRACTION (KOREAN)
648
- {'─'*30}
649
- {chr(10).join(korean_scenes)}
650
-
651
- 🎙️ 음성/대사 추출 (영어) / SPEECH/DIALOGUE EXTRACTION (ENGLISH)
652
- {'─'*30}
653
- {chr(10).join(english_scenes)}
654
-
655
- 🎙️ 音频/对话提取 (中文) / SPEECH/DIALOGUE EXTRACTION (CHINESE)
656
- {'─'*30}
657
- {chr(10).join(chinese_scenes) if chinese_scenes else "(중국어 번역 없음 / No Chinese translation available)"}
658
-
659
- 🎙️ การดึงเสียง/บทสนทนา (ไทย) / SPEECH/DIALOGUE EXTRACTION (THAI)
660
- {'─'*30}
661
- {chr(10).join(thai_scenes) if thai_scenes else "(태국어 번역 없음 / No Thai translation available)"}
662
-
663
- 🎙️ Извлечение речи/диалога (Русский) / SPEECH/DIALOGUE EXTRACTION (RUSSIAN)
664
- {'─'*30}
665
- {chr(10).join(russian_scenes) if russian_scenes else "(러시아어 번역 없음 / No Russian translation available)"}
666
-
667
- {'='*50}
668
- 📊 **분석 완료/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
669
- 🤖 **AI 강화/AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
670
- """
671
- return report.strip()
672
-
673
- # ---------------------------------------------------------
674
- # 메타데이터 추출
675
- # ---------------------------------------------------------
676
- def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
677
- if not url or not url.strip():
678
- return None, "❌ Please enter a YouTube URL"
679
- if not self.is_valid_youtube_url(url):
680
- return None, "❌ Invalid YouTube URL format"
681
-
682
- # 쿠키 선택 순서: UI 업로드 → 기본 쿠키 → None
683
- if cookiefile and os.path.exists(cookiefile):
684
- cookiefile = cookiefile
685
- elif DEFAULT_COOKIE_FILE.exists():
686
- cookiefile = str(DEFAULT_COOKIE_FILE)
687
- else:
688
- cookiefile = None
689
-
690
- try:
691
- progress(0.1, desc="Initializing YouTube extractor…")
692
- ydl_opts = {"noplaylist": True, "extract_flat": False}
693
- if cookiefile:
694
- ydl_opts["cookiefile"] = cookiefile
695
-
696
- progress(0.5, desc="Extracting video metadata…")
697
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
698
- info = ydl.extract_info(url, download=False)
699
-
700
- progress(1.0, desc="✅ Analysis complete!")
701
- return info, "✅ Video information extracted successfully"
702
-
703
- except Exception as e:
704
- return None, f"❌ Error: {e}"
705
-
706
- # ---------------------------------------------------------
707
- # 다운로드
708
- # ---------------------------------------------------------
709
- def download_video(
710
- self,
711
- url,
712
- quality="best",
713
- audio_only=False,
714
- progress=gr.Progress(),
715
- cookiefile=None,
716
- ):
717
- if not url or not url.strip():
718
- return None, "❌ Please enter a YouTube URL"
719
- if not self.is_valid_youtube_url(url):
720
- return None, "❌ Invalid YouTube URL format"
721
-
722
- # 쿠키 선택 순서
723
- if cookiefile and os.path.exists(cookiefile):
724
- cookiefile = cookiefile
725
- elif DEFAULT_COOKIE_FILE.exists():
726
- cookiefile = str(DEFAULT_COOKIE_FILE)
727
- else:
728
- cookiefile = None
729
-
730
  try:
731
- progress(0.1, desc="Preparing download…")
732
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
733
-
734
- ydl_opts = {
735
- "outtmpl": os.path.join(
736
- self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
737
- ),
738
- "noplaylist": True,
739
- }
740
-
741
- if audio_only:
742
- ydl_opts["format"] = "bestaudio/best"
743
- ydl_opts["postprocessors"] = [
744
- {
745
- "key": "FFmpegExtractAudio",
746
- "preferredcodec": "mp3",
747
- "preferredquality": "192",
748
- }
749
- ]
750
- else:
751
- if quality == "720p":
752
- ydl_opts["format"] = "best[height<=720]"
753
- elif quality == "480p":
754
- ydl_opts["format"] = "best[height<=480]"
755
- else: # "best"
756
- ydl_opts["format"] = "best[height<=1080]"
757
-
758
- if cookiefile:
759
- ydl_opts["cookiefile"] = cookiefile
760
-
761
- # 진행률 훅
762
- def hook(d):
763
- if d["status"] == "downloading":
764
- if "total_bytes" in d:
765
- pct = d["downloaded_bytes"] / d["total_bytes"] * 100
766
- progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
767
- else:
768
- progress(0.5, desc="Downloading…")
769
- elif d["status"] == "finished":
770
- progress(0.8, desc="Processing download…")
771
-
772
- ydl_opts["progress_hooks"] = [hook]
773
-
774
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
775
- ydl.extract_info(url, download=True)
776
-
777
- progress(0.9, desc="Copying to Downloads folder…")
778
-
779
- # temp 디렉터리에서 파일 찾기
780
- downloaded_temp = None
781
- for f in os.listdir(self.temp_downloads):
782
- if timestamp in f:
783
- downloaded_temp = os.path.join(self.temp_downloads, f)
784
- break
785
-
786
- if not downloaded_temp:
787
- return None, "❌ Downloaded file not found"
788
-
789
- final_name = os.path.basename(downloaded_temp)
790
- final_path = os.path.join(self.downloads_folder, final_name)
791
-
792
- try:
793
- shutil.copy2(downloaded_temp, final_path)
794
- saved = True
795
- except Exception as e:
796
- print(f"Copy warning: {e}")
797
- saved = False
798
- final_path = "File kept only in temp folder"
799
-
800
- progress(1.0, desc="✅ Download complete!")
801
-
802
- msg = (
803
- "✅ Download successful!\n"
804
- f"📁 Temp file: {os.path.basename(downloaded_temp)}\n"
805
- f"📁 Saved to: {final_path if saved else 'Copy failed'}\n"
806
- f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
807
- )
808
- return downloaded_temp, msg
809
-
810
- except Exception as e:
811
- return None, f"❌ Download failed: {e}"
812
-
813
-
814
- # =================================================================
815
- # Helper functions for Gradio
816
- # =================================================================
817
- downloader = YouTubeDownloader()
818
-
819
-
820
- def configure_api_key(api_key):
821
- if not api_key or not api_key.strip():
822
- return "❌ Please enter a valid Google API key", gr.update(visible=False)
823
- ok, msg = downloader.configure_gemini(api_key.strip())
824
- return msg, gr.update(visible=ok)
825
-
826
-
827
- def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
828
- try:
829
- progress(0.05, desc="Starting analysis…")
830
- cookiefile = cookies_file if cookies_file else None
831
- info, msg = downloader.get_video_info(
832
- url, progress=progress, cookiefile=cookiefile
833
- )
834
- if info:
835
- progress(0.95, desc="Generating report…")
836
- return downloader.format_video_info(info)
837
- return f"❌ Analysis Failed: {msg}"
838
- except Exception as e:
839
- return f"❌ System Error: {e}"
840
-
841
-
842
- def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
843
- try:
844
- progress(0.05, desc="Preparing download…")
845
- cookiefile = cookies_file if cookies_file else None
846
- file_path, status = downloader.download_video(
847
- url, quality, audio_only, progress=progress, cookiefile=cookiefile
848
- )
849
- return (file_path, status) if file_path else (None, status)
850
  except Exception as e:
851
- return None, f" System Error: {e}"
852
-
853
-
854
- # =================================================================
855
- # Gradio UI
856
- # =================================================================
857
- def create_interface():
858
- with gr.Blocks(
859
- theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro"
860
- ) as iface:
861
- gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>")
862
-
863
- # API 섹션
864
- with gr.Group():
865
- gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>")
866
- with gr.Row():
867
- api_key_in = gr.Textbox(
868
- label="🔑 Google API Key",
869
- placeholder="Paste your Google API key…",
870
- type="password",
871
- )
872
- api_btn = gr.Button("🔧 Configure API", variant="secondary")
873
- api_status = gr.Textbox(
874
- label="API Status",
875
- value="❌ Gemini API not configured – Using fallback analysis",
876
- interactive=False,
877
- lines=1,
878
- )
879
-
880
- # 메인 UI
881
- with gr.Row():
882
- url_in = gr.Textbox(
883
- label="🔗 YouTube URL",
884
- placeholder="Paste YouTube video URL…",
885
- )
886
- cookies_in = gr.File(
887
- label="🍪 Upload cookies.txt (optional)",
888
- file_types=[".txt"],
889
- type="filepath",
890
- )
891
 
892
- with gr.Tabs():
893
- with gr.TabItem("📊 Video Analysis"):
894
- analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
895
- analysis_out = gr.Textbox(
896
- label="📊 Analysis Report", lines=25, show_copy_button=True
897
- )
898
- analyze_btn.click(
899
- fn=analyze_with_cookies,
900
- inputs=[url_in, cookies_in],
901
- outputs=analysis_out,
902
- show_progress=True,
903
- )
904
-
905
- with gr.TabItem("⬇️ Video Download"):
906
- with gr.Row():
907
- quality_dd = gr.Dropdown(
908
- choices=["best", "720p", "480p"],
909
- value="best",
910
- label="📺 Quality",
911
- )
912
- audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)")
913
- download_btn = gr.Button("⬇️ Download Video", variant="primary")
914
- dl_status = gr.Textbox(
915
- label="📥 Download Status", lines=5, show_copy_button=True
916
- )
917
- dl_file = gr.File(label="📁 Downloaded File", visible=False)
918
-
919
- def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
920
- fp, st = download_with_cookies(url, q, a, cfile, progress)
921
- if fp and os.path.exists(fp):
922
- return st, gr.update(value=fp, visible=True)
923
- return st, gr.update(visible=False)
924
-
925
- download_btn.click(
926
- fn=wrapped_download,
927
- inputs=[url_in, quality_dd, audio_cb, cookies_in],
928
- outputs=[dl_status, dl_file],
929
- show_progress=True,
930
- )
931
-
932
- # API 버튼 동작
933
- api_btn.click(
934
- fn=configure_api_key,
935
- inputs=[api_key_in],
936
- outputs=[api_status],
937
- )
938
-
939
- gr.HTML(
940
- """
941
- <div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
942
- <h3>💡 Tip: 쿠키 파일 자동 사용</h3>
943
- <p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은
944
- 폴더에 두면 자동으로 사용됩니다. 주기적으로 새 파일로 교체해 주세요.</p>
945
- </div>
946
- """
947
- )
948
- return iface
949
-
950
-
951
- # =================================================================
952
- # Entrypoint
953
- # =================================================================
954
  if __name__ == "__main__":
955
- demo = create_interface()
956
- import atexit
957
-
958
- atexit.register(downloader.cleanup)
959
- demo.launch(debug=True, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import sys
3
+ import streamlit as st
4
+ from tempfile import NamedTemporaryFile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def main():
7
+ try:
8
+ # Get the code from secrets
9
+ code = os.environ.get("MAIN_CODE")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ if not code:
12
+ st.error("⚠️ The application code wasn't found in secrets. Please add the MAIN_CODE secret.")
13
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Create a temporary Python file
16
+ with NamedTemporaryFile(suffix='.py', delete=False, mode='w') as tmp:
17
+ tmp.write(code)
18
+ tmp_path = tmp.name
19
+
20
+ # Execute the code
21
+ exec(compile(code, tmp_path, 'exec'), globals())
22
+
23
+ # Clean up the temporary file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  try:
25
+ os.unlink(tmp_path)
26
+ except:
27
+ pass
28
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  except Exception as e:
30
+ st.error(f"⚠️ Error loading or executing the application: {str(e)}")
31
+ import traceback
32
+ st.code(traceback.format_exc())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if __name__ == "__main__":
35
+ main()