|
|
|
""" |
|
YouTube Video Analyzer & Downloader Pro |
|
(쿠키 자동 처리 버전) |
|
|
|
· `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면 |
|
자동으로 사용합니다. |
|
· Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 **우선** 적용됩니다. |
|
""" |
|
|
|
|
|
|
|
|
|
import os |
|
import re |
|
import json |
|
import uuid |
|
import shutil |
|
import tempfile |
|
from datetime import datetime |
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import yt_dlp |
|
import google.generativeai as genai |
|
|
|
|
|
|
|
|
|
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt") |
|
|
|
|
|
|
|
|
|
|
|
class YouTubeDownloader: |
|
def __init__(self): |
|
|
|
self.download_dir = tempfile.mkdtemp() |
|
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_") |
|
|
|
|
|
self.downloads_folder = os.path.join( |
|
os.path.expanduser("~"), "Downloads", "YouTube_Downloads" |
|
) |
|
os.makedirs(self.downloads_folder, exist_ok=True) |
|
|
|
self.gemini_model = None |
|
|
|
|
|
|
|
|
|
def configure_gemini(self, api_key): |
|
try: |
|
genai.configure(api_key=api_key) |
|
self.gemini_model = genai.GenerativeModel( |
|
model_name="gemini-1.5-flash-latest" |
|
) |
|
return True, "✅ Gemini API configured successfully!" |
|
except Exception as e: |
|
return False, f"❌ Failed to configure Gemini API: {e}" |
|
|
|
|
|
|
|
|
|
def cleanup(self): |
|
try: |
|
if hasattr(self, "download_dir") and os.path.exists(self.download_dir): |
|
shutil.rmtree(self.download_dir) |
|
if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads): |
|
shutil.rmtree(self.temp_downloads) |
|
except Exception as e: |
|
print(f"⚠️ Warning: Could not clean up temporary directory: {e}") |
|
|
|
|
|
|
|
|
|
def is_valid_youtube_url(self, url): |
|
youtube_regex = re.compile( |
|
r"(https?://)?(www\.)?" |
|
r"(youtube|youtu|youtube-nocookie)\.(com|be)/" |
|
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})" |
|
) |
|
return youtube_regex.match(url) is not None |
|
|
|
|
|
|
|
|
|
def generate_scene_breakdown_gemini(self, video_info): |
|
if not self.gemini_model: |
|
return self.generate_scene_breakdown_fallback(video_info) |
|
|
|
try: |
|
duration = video_info.get("duration", 0) |
|
title = video_info.get("title", "") |
|
description = video_info.get("description", "")[:1500] |
|
|
|
if not duration: |
|
return { |
|
"korean": ["**[재생시간 알 수 없음]**: 비디오 재생시간을 확인할 수 없어 타임스탬프를 생성할 수 없습니다"], |
|
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"] |
|
} |
|
|
|
prompt = f""" |
|
이 YouTube 비디오의 음성/대사를 타임스탬프별로 추출해주세요. |
|
|
|
제목: {title} |
|
재생시간: {duration}초 |
|
설명: {description} |
|
|
|
매우 중요한 지침: |
|
1. 실제 영상에서 들리는 대사, 내레이션, 음성을 그대로 적어주세요 |
|
2. 장면 설명이 아닌 실제 음성 내용만 작성하세요 |
|
3. 음성이 없는 부분은 (...) 또는 (배경음악) 등으로 표시 |
|
4. 타임스탬프 가이드라인: |
|
- 대사나 내레이션이 시작하고 끝나는 지점 기준 |
|
- 연속된 대사는 하나로 묶어서 표시 |
|
- 최대한 자연스러운 단위로 구분 |
|
5. 형식: |
|
**[MM:SS-MM:SS]**: "실제 대사나 내레이션 내용" |
|
**[MM:SS-MM:SS]**: (배경음악) 또는 (...장면 전환...) |
|
6. 모든 음성 내용을 빠짐없이 적어주세요 |
|
7. 자막이나 화면에 표시된 텍스트도 포함하세요 |
|
8. 장면 설명은 절대 하지 마세요. 오직 음성과 텍스트만 추출하세요. |
|
|
|
예시: |
|
**[00:00-00:05]**: "안녕하세요. 오늘은 미륵산에서 발견된 백제 유적에 대해 알아보겠습니다." |
|
**[00:05-00:08]**: (배경음악) |
|
**[00:08-00:15]**: "미륵사지를 품고 있는 익산 미륵산의 정상부에서 백제시대에 만든 것으로 추정되는 저수조가 발굴됐습니다." |
|
**[00:15-00:18]**: (인터뷰 준비 중...) |
|
**[00:18-00:25]**: [이도학 교수] "이번 발굴은 백제 역사 연구에 중요한 전환점이 될 것입니다." |
|
|
|
뉴스의 경우: |
|
- 앵커나 기자의 멘트는 그대로 적기 |
|
- 인터뷰는 [인터뷰이 이름] "내용" 형식으로 |
|
- 자막은 [자막] 내용 형식으로 |
|
""" |
|
response = self.gemini_model.generate_content(prompt) |
|
|
|
korean_scenes = [] |
|
if response and response.text: |
|
lines = response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
korean_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
korean_scenes.append(current_scene.strip()) |
|
|
|
|
|
english_prompt = f""" |
|
Translate the following Korean speech/dialogue transcription to English, maintaining the exact same timestamps. |
|
Translate ONLY the actual speech content, not descriptions: |
|
|
|
{chr(10).join(korean_scenes)} |
|
|
|
Important rules: |
|
- Keep the format exactly the same: **[MM:SS-MM:SS]**: "English translation of speech" |
|
- For non-speech parts like (배경음악), translate as (background music) |
|
- For (...) keep as is |
|
- For interview tags like [이도학 교수], translate as [Professor Lee Do-hak] |
|
- For [자막], translate as [Subtitle] |
|
- Keep quotation marks for actual speech |
|
- Do NOT add any scene descriptions or explanations |
|
""" |
|
english_response = self.gemini_model.generate_content(english_prompt) |
|
|
|
english_scenes = [] |
|
if english_response and english_response.text: |
|
lines = english_response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
english_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
english_scenes.append(current_scene.strip()) |
|
|
|
|
|
chinese_prompt = f""" |
|
将以下韩语语音/对话转录翻译成中文,保持完全相同的时间戳。 |
|
只翻译实际的语音内容,不要描述: |
|
|
|
{chr(10).join(korean_scenes)} |
|
|
|
重要规则: |
|
- 保持格式完全相同:**[MM:SS-MM:SS]**: "语音的中文翻译" |
|
- 对于像(배경음악)这样的非语音部分,翻译为(背景音乐) |
|
- 对于(...)保持原样 |
|
- 对于像[이도학 교수]这样的采访标签,翻译为[李道学教授] |
|
- 对于[자막],翻译为[字幕] |
|
- 保留实际语音的引号 |
|
- 不要添加任何场景描述或解释 |
|
""" |
|
chinese_response = self.gemini_model.generate_content(chinese_prompt) |
|
|
|
chinese_scenes = [] |
|
if chinese_response and chinese_response.text: |
|
lines = chinese_response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
chinese_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
chinese_scenes.append(current_scene.strip()) |
|
|
|
|
|
thai_prompt = f""" |
|
แปลคำบรรยายเสียง/บทสนทนาภาษาเกาหลีต่อไปนี้เป็นภาษาไทย โดยคงรูปแบบเวลาเดิมไว้ |
|
แปลเฉพาะเนื้อหาเสียงจริงเท่านั้น ไม่ต้องบรรยาย: |
|
|
|
{chr(10).join(korean_scenes)} |
|
|
|
กฎสำคัญ: |
|
- คงรูปแบบเดิมไว้: **[MM:SS-MM:SS]**: "คำแปลภาษาไทยของเสียงพูด" |
|
- สำหรับส่วนที่ไม่ใช่เสียงพูด เช่น (배경음악) แปลเป็น (เพลงประกอบ) |
|
- สำหรับ (...) ให้คงเดิม |
|
- สำหรับป้ายสัมภาษณ์ เช่น [이도학 교수] แปลเป็น [ศาสตราจารย์ อี โด-ฮัก] |
|
- สำหรับ [자막] แปลเป็น [คำบรรยาย] |
|
- คงเครื่องหมายคำพูดสำหรับเสียงพูดจริง |
|
- ไม่ต้องเพิ่มคำบรรยายฉากหรือคำอธิบายใดๆ |
|
""" |
|
thai_response = self.gemini_model.generate_content(thai_prompt) |
|
|
|
thai_scenes = [] |
|
if thai_response and thai_response.text: |
|
lines = thai_response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
thai_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
thai_scenes.append(current_scene.strip()) |
|
|
|
|
|
russian_prompt = f""" |
|
Переведите следующую корейскую транскрипцию речи/диалога на русский язык, сохраняя точно такие же временные метки. |
|
Переводите ТОЛЬКО фактическое содержание речи, а не описания: |
|
|
|
{chr(10).join(korean_scenes)} |
|
|
|
Важные правила: |
|
- Сохраняйте формат точно таким же: **[MM:SS-MM:SS]**: "Русский перевод речи" |
|
- Для частей без речи, таких как (배경음악), переведите как (фоновая музыка) |
|
- Для (...) оставьте как есть |
|
- Для тегов интервью, таких как [이도학 교수], переведите как [Профессор Ли До Хак] |
|
- Для [자막] переведите как [Субтитры] |
|
- Сохраняйте кавычки для фактической речи |
|
- НЕ добавляйте никаких описаний сцен или объяснений |
|
""" |
|
russian_response = self.gemini_model.generate_content(russian_prompt) |
|
|
|
russian_scenes = [] |
|
if russian_response and russian_response.text: |
|
lines = russian_response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
russian_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
russian_scenes.append(current_scene.strip()) |
|
|
|
return { |
|
"korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"], |
|
"english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"], |
|
"chinese": chinese_scenes if chinese_scenes else [], |
|
"thai": thai_scenes if thai_scenes else [], |
|
"russian": russian_scenes if russian_scenes else [] |
|
} |
|
|
|
except Exception as e: |
|
print(f"Gemini API error: {e}") |
|
return self.generate_scene_breakdown_fallback(video_info) |
|
|
|
|
|
|
|
|
|
def generate_scene_breakdown_fallback(self, video_info): |
|
duration = video_info.get("duration", 0) |
|
title = video_info.get("title", "").lower() |
|
description = video_info.get("description", "").lower() |
|
uploader = video_info.get("uploader", "콘텐츠 제작자") |
|
|
|
if not duration: |
|
return { |
|
"korean": ["**[재생시간 알 수 없음]**: 타임스탬프를 생성할 수 없습니다"], |
|
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"], |
|
"chinese": ["**[持续时间未知]**: 无法生成带时间戳的分解"], |
|
"thai": ["**[ไม่ทราบระยะเวลา]**: ไม่สามารถสร้างการแบ่งส่วนตามเวลาได้"], |
|
"russian": ["**[Продолжительность неизвестна]**: Невозможно создать временную разбивку"] |
|
} |
|
|
|
|
|
if duration <= 60: |
|
segment_length = 10 |
|
elif duration <= 300: |
|
segment_length = 15 |
|
elif duration <= 900: |
|
segment_length = 20 |
|
else: |
|
segment_length = 30 |
|
|
|
korean_scenes = [] |
|
english_scenes = [] |
|
chinese_scenes = [] |
|
thai_scenes = [] |
|
russian_scenes = [] |
|
|
|
num_segments = min(duration // segment_length + 1, 20) |
|
|
|
for i in range(num_segments): |
|
start_time = i * segment_length |
|
end_time = min(start_time + segment_length - 1, duration) |
|
|
|
start_fmt = f"{start_time//60}:{start_time%60:02d}" |
|
end_fmt = f"{end_time//60}:{end_time%60:02d}" |
|
|
|
|
|
if i == 0: |
|
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (음성 추출을 위해 Gemini API가 필요합니다. 인트로 부분...)") |
|
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Gemini API required for speech extraction. Intro section...)") |
|
chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (需要Gemini API进行语音提取。介绍部分...)") |
|
thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ต้องใช้ Gemini API สำหรับการดึงเสียง ส่วนเปิดตัว...)") |
|
russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Требуется Gemini API для извлечения речи. Вступительная часть...)") |
|
elif i == num_segments - 1: |
|
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (아웃트로 부분...)") |
|
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Outro section...)") |
|
chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (结尾部分...)") |
|
thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ส่วนจบ...)") |
|
russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Заключительная часть...)") |
|
else: |
|
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (본문 내용...)") |
|
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Main content...)") |
|
chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (主要内容...)") |
|
thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (เนื้อหาหลัก...)") |
|
russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Основное содержание...)") |
|
|
|
return { |
|
"korean": korean_scenes, |
|
"english": english_scenes, |
|
"chinese": chinese_scenes, |
|
"thai": thai_scenes, |
|
"russian": russian_scenes |
|
} |
|
|
|
|
|
|
|
|
|
def detect_video_type_detailed(self, title, description): |
|
text = (title + " " + description).lower() |
|
if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]): |
|
return "tutorial" |
|
if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]): |
|
return "review" |
|
if any(w in text for w in ["vlog", "daily", "routine", "day in"]): |
|
return "vlog" |
|
if any(w in text for w in ["music", "song", "cover", "lyrics"]): |
|
return "music" |
|
if any(w in text for w in ["comedy", "funny", "prank", "challenge"]): |
|
return "entertainment" |
|
if any(w in text for w in ["news", "breaking", "update", "report"]): |
|
return "news" |
|
if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]): |
|
return "cooking" |
|
if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]): |
|
return "fitness" |
|
return "general" |
|
|
|
|
|
|
|
|
|
def generate_contextual_description_korean( |
|
self, scene_index, total_scenes, video_type, uploader, title |
|
): |
|
presenter_desc = "콘텐츠 제작자" |
|
if "woman" in title or "girl" in title or "여성" in title or "여자" in title: |
|
presenter_desc = "여성 진행자" |
|
elif "man" in title or "guy" in title or "남성" in title or "남자" in title: |
|
presenter_desc = "남성 진행자" |
|
|
|
if scene_index == 0: |
|
if video_type == "tutorial": |
|
return f"{presenter_desc}가 화면에 등장하여 자신을 소개하고 주제를 설명합니다. 밝은 조명의 작업 공간에서 캐주얼한 옷을 입고 있습니다." |
|
if video_type == "vlog": |
|
return f"{presenter_desc}가 카메라를 향해 밝게 인사하며 오늘의 브이로그 주제를 설명합니다." |
|
if video_type == "review": |
|
return f"{presenter_desc}가 리뷰할 제품을 들어 보이며 간단한 특징을 소개합니다." |
|
return f"{presenter_desc}가 시청자의 관심을 끌기 위한 매력적인 인트로로 영상을 시작합니다." |
|
|
|
if scene_index == total_scenes - 1: |
|
if video_type == "tutorial": |
|
return f"{presenter_desc}가 최종 결과물을 보여주고 시청자에게 감사 인사를 전하며 좋아요와 구독을 부탁합니다." |
|
if video_type == "vlog": |
|
return f"{presenter_desc}가 하루를 마무리하며 최종 생각을 공유하고 작별 인사를 합니다." |
|
return f"{presenter_desc}가 주요 내용을 요약하고 좋아요와 댓글을 통한 참여를 독려하며 마무리합니다." |
|
|
|
if video_type == "tutorial": |
|
return f"{presenter_desc}가 다음 단계를 시연하며 클로즈업 샷과 함께 명확한 지침을 제공합니다." |
|
if video_type == "review": |
|
return f"{presenter_desc}가 제품의 특정 기능을 살펴보며 사용 모습을 보여주고 성능에 대해 설명합니다." |
|
if video_type == "vlog": |
|
return f"{presenter_desc}가 일상 활동을 계속하며 솔직한 순간과 개인적인 생각을 공유합니다." |
|
if video_type == "cooking": |
|
return f"{presenter_desc}가 재료를 준비하며 자르고 섞으면서 각 단계를 설명합니다." |
|
if video_type == "fitness": |
|
return f"{presenter_desc}가 운동 세트를 수행하며 올바른 자세를 시연하고 팁을 제공합니다." |
|
return f"{presenter_desc}가 명확한 설명을 통해 시청자와 소통하며 주요 콘텐츠를 진행합니다." |
|
|
|
|
|
|
|
|
|
def generate_contextual_description( |
|
self, scene_index, total_scenes, video_type, uploader, title |
|
): |
|
presenter_desc = "The content creator" |
|
if "woman" in title or "girl" in title: |
|
presenter_desc = "A woman" |
|
elif "man" in title or "guy" in title: |
|
presenter_desc = "A man" |
|
|
|
if scene_index == 0: |
|
if video_type == "tutorial": |
|
return ( |
|
f"{presenter_desc} appears on screen, introducing themselves and the " |
|
f"topic. They are in a well-lit workspace, wearing casual clothes." |
|
) |
|
if video_type == "vlog": |
|
return ( |
|
f"{presenter_desc} greets the camera cheerfully, perhaps waving, and " |
|
f"explains what today's vlog is about." |
|
) |
|
if video_type == "review": |
|
return ( |
|
f"{presenter_desc} holds up the product to be reviewed, giving a brief " |
|
f"overview of its features." |
|
) |
|
return ( |
|
f"{presenter_desc} starts the video with an engaging introduction to " |
|
f"capture viewers' attention." |
|
) |
|
|
|
if scene_index == total_scenes - 1: |
|
if video_type == "tutorial": |
|
return ( |
|
f"{presenter_desc} shows the final result, thanks viewers, and " |
|
f"encourages them to like and subscribe." |
|
) |
|
if video_type == "vlog": |
|
return ( |
|
f"{presenter_desc} wraps up the day, sharing final thoughts and " |
|
f"bidding farewell." |
|
) |
|
return ( |
|
f"{presenter_desc} concludes, summarizing key points and prompting " |
|
f"engagement through likes and comments." |
|
) |
|
|
|
if video_type == "tutorial": |
|
return ( |
|
f"{presenter_desc} demonstrates the next step, providing clear " |
|
f"instructions with close-up shots." |
|
) |
|
if video_type == "review": |
|
return ( |
|
f"{presenter_desc} examines a specific feature of the product, showing " |
|
f"it in use and commenting on performance." |
|
) |
|
if video_type == "vlog": |
|
return ( |
|
f"{presenter_desc} continues the day's activities, sharing candid " |
|
f"moments and personal reflections." |
|
) |
|
if video_type == "cooking": |
|
return ( |
|
f"{presenter_desc} prepares ingredients, chopping and mixing while " |
|
f"explaining each step." |
|
) |
|
if video_type == "fitness": |
|
return ( |
|
f"{presenter_desc} performs an exercise set, demonstrating proper form " |
|
f"and offering tips." |
|
) |
|
return ( |
|
f"{presenter_desc} proceeds with the main content, engaging viewers through " |
|
f"clear explanations." |
|
) |
|
|
|
|
|
|
|
|
|
def detect_video_type(self, title, description): |
|
text = (title + " " + description).lower() |
|
if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]): |
|
return "🎵 Music Video" |
|
if any(w in text for w in ["tutorial", "how to", "guide", "learn"]): |
|
return "📚 Tutorial/Educational" |
|
if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]): |
|
return "🎭 Entertainment/Comedy" |
|
if any(w in text for w in ["news", "breaking", "report", "update"]): |
|
return "📰 News/Information" |
|
if any(w in text for w in ["review", "unboxing", "test", "comparison"]): |
|
return "⭐ Review/Unboxing" |
|
if any(w in text for w in ["commercial", "ad", "brand", "product"]): |
|
return "📺 Commercial/Advertisement" |
|
return "🎬 General Content" |
|
|
|
|
|
|
|
|
|
def detect_background_music(self, video_info): |
|
title = video_info.get("title", "").lower() |
|
if "music" in title or "song" in title: |
|
return "🎵 Original Music/Soundtrack" |
|
if "commercial" in title or "ad" in title: |
|
return "🎶 Upbeat Commercial Music" |
|
if "tutorial" in title or "how to" in title: |
|
return "🔇 Minimal/No Background Music" |
|
if "vlog" in title or "daily" in title: |
|
return "🎼 Ambient Background Music" |
|
return "🎵 Background Music" |
|
|
|
|
|
|
|
|
|
def detect_influencer_status(self, video_info): |
|
subs = video_info.get("channel_followers", 0) |
|
views = video_info.get("view_count", 0) |
|
if subs > 10_000_000: |
|
return "🌟 Mega Influencer (10M+)" |
|
if subs > 1_000_000: |
|
return "⭐ Major Influencer (1M+)" |
|
if subs > 100_000: |
|
return "🎯 Mid-tier Influencer (100K+)" |
|
if subs > 10_000: |
|
return "📈 Micro Influencer (10K+)" |
|
if views > 100_000: |
|
return "🔥 Viral Content Creator" |
|
return "👤 Regular Content Creator" |
|
|
|
|
|
|
|
|
|
def format_number(self, num): |
|
if not num: |
|
return "0" |
|
if num >= 1_000_000_000: |
|
return f"{num/1_000_000_000:.1f}B" |
|
if num >= 1_000_000: |
|
return f"{num/1_000_000:.1f}M" |
|
if num >= 1_000: |
|
return f"{num/1_000:.1f}K" |
|
return str(num) |
|
|
|
|
|
|
|
|
|
def format_video_info(self, video_info): |
|
if not video_info: |
|
return "❌ No video information available." |
|
|
|
title = video_info.get("title", "Unknown") |
|
uploader = video_info.get("uploader", "Unknown") |
|
duration = video_info.get("duration", 0) |
|
dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown" |
|
views = video_info.get("view_count", 0) |
|
likes = video_info.get("like_count", 0) |
|
comments = video_info.get("comment_count", 0) |
|
upload_date = video_info.get("upload_date", "Unknown") |
|
|
|
if len(upload_date) == 8: |
|
upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" |
|
|
|
scene_data = self.generate_scene_breakdown_gemini(video_info) |
|
korean_scenes = scene_data.get("korean", []) |
|
english_scenes = scene_data.get("english", []) |
|
chinese_scenes = scene_data.get("chinese", []) |
|
thai_scenes = scene_data.get("thai", []) |
|
russian_scenes = scene_data.get("russian", []) |
|
|
|
vtype = self.detect_video_type(title, video_info.get("description", "")) |
|
bgm = self.detect_background_music(video_info) |
|
creator = self.detect_influencer_status(video_info) |
|
engagement = (likes / views) * 100 if views else 0 |
|
|
|
report = f""" |
|
🎬 YOUTUBE VIDEO ANALYSIS REPORT |
|
{'='*50} |
|
|
|
📋 기본 정보 / BASIC INFORMATION |
|
{'─'*25} |
|
📹 **제목/Title:** {title} |
|
👤 **업로더/Uploader:** {uploader} |
|
📅 **업로드 날짜/Upload Date:** {upload_date} |
|
⏱️ **재생시간/Duration:** {dur_str} |
|
🆔 **비디오 ID/Video ID:** {video_info.get('id', 'Unknown')} |
|
|
|
📊 성과 지표 / PERFORMANCE METRICS |
|
{'─'*25} |
|
👀 **조회수/Views:** {self.format_number(views)} ({views:,}) |
|
👍 **좋아요/Likes:** {self.format_number(likes)} ({likes:,}) |
|
💬 **댓글/Comments:** {self.format_number(comments)} ({comments:,}) |
|
📈 **참여율/Engagement Rate:** {engagement:.2f}% |
|
|
|
🎯 콘텐츠 분석 / CONTENT ANALYSIS |
|
{'─'*25} |
|
📂 **비디오 유형/Video Type:** {vtype} |
|
🎵 **배경음악/Background Music:** {bgm} |
|
👑 **제작자 상태/Creator Status:** {creator} |
|
|
|
🎙️ 음성/대사 추출 (한국어) / SPEECH/DIALOGUE EXTRACTION (KOREAN) |
|
{'─'*30} |
|
{chr(10).join(korean_scenes)} |
|
|
|
🎙️ 음성/대사 추출 (영어) / SPEECH/DIALOGUE EXTRACTION (ENGLISH) |
|
{'─'*30} |
|
{chr(10).join(english_scenes)} |
|
|
|
🎙️ 音频/对话提取 (中文) / SPEECH/DIALOGUE EXTRACTION (CHINESE) |
|
{'─'*30} |
|
{chr(10).join(chinese_scenes) if chinese_scenes else "(중국어 번역 없음 / No Chinese translation available)"} |
|
|
|
🎙️ การดึงเสียง/บทสนทนา (ไทย) / SPEECH/DIALOGUE EXTRACTION (THAI) |
|
{'─'*30} |
|
{chr(10).join(thai_scenes) if thai_scenes else "(태국어 번역 없음 / No Thai translation available)"} |
|
|
|
🎙️ Извлечение речи/диалога (Русский) / SPEECH/DIALOGUE EXTRACTION (RUSSIAN) |
|
{'─'*30} |
|
{chr(10).join(russian_scenes) if russian_scenes else "(러시아어 번역 없음 / No Russian translation available)"} |
|
|
|
{'='*50} |
|
📊 **분석 완료/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} |
|
🤖 **AI 강화/AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'} |
|
""" |
|
return report.strip() |
|
|
|
|
|
|
|
|
|
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None): |
|
if not url or not url.strip(): |
|
return None, "❌ Please enter a YouTube URL" |
|
if not self.is_valid_youtube_url(url): |
|
return None, "❌ Invalid YouTube URL format" |
|
|
|
|
|
if cookiefile and os.path.exists(cookiefile): |
|
cookiefile = cookiefile |
|
elif DEFAULT_COOKIE_FILE.exists(): |
|
cookiefile = str(DEFAULT_COOKIE_FILE) |
|
else: |
|
cookiefile = None |
|
|
|
try: |
|
progress(0.1, desc="Initializing YouTube extractor…") |
|
ydl_opts = {"noplaylist": True, "extract_flat": False} |
|
if cookiefile: |
|
ydl_opts["cookiefile"] = cookiefile |
|
|
|
progress(0.5, desc="Extracting video metadata…") |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info = ydl.extract_info(url, download=False) |
|
|
|
progress(1.0, desc="✅ Analysis complete!") |
|
return info, "✅ Video information extracted successfully" |
|
|
|
except Exception as e: |
|
return None, f"❌ Error: {e}" |
|
|
|
|
|
|
|
|
|
def download_video( |
|
self, |
|
url, |
|
quality="best", |
|
audio_only=False, |
|
progress=gr.Progress(), |
|
cookiefile=None, |
|
): |
|
if not url or not url.strip(): |
|
return None, "❌ Please enter a YouTube URL" |
|
if not self.is_valid_youtube_url(url): |
|
return None, "❌ Invalid YouTube URL format" |
|
|
|
|
|
if cookiefile and os.path.exists(cookiefile): |
|
cookiefile = cookiefile |
|
elif DEFAULT_COOKIE_FILE.exists(): |
|
cookiefile = str(DEFAULT_COOKIE_FILE) |
|
else: |
|
cookiefile = None |
|
|
|
try: |
|
progress(0.1, desc="Preparing download…") |
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
|
ydl_opts = { |
|
"outtmpl": os.path.join( |
|
self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s" |
|
), |
|
"noplaylist": True, |
|
} |
|
|
|
if audio_only: |
|
ydl_opts["format"] = "bestaudio/best" |
|
ydl_opts["postprocessors"] = [ |
|
{ |
|
"key": "FFmpegExtractAudio", |
|
"preferredcodec": "mp3", |
|
"preferredquality": "192", |
|
} |
|
] |
|
else: |
|
if quality == "720p": |
|
ydl_opts["format"] = "best[height<=720]" |
|
elif quality == "480p": |
|
ydl_opts["format"] = "best[height<=480]" |
|
else: |
|
ydl_opts["format"] = "best[height<=1080]" |
|
|
|
if cookiefile: |
|
ydl_opts["cookiefile"] = cookiefile |
|
|
|
|
|
def hook(d): |
|
if d["status"] == "downloading": |
|
if "total_bytes" in d: |
|
pct = d["downloaded_bytes"] / d["total_bytes"] * 100 |
|
progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%") |
|
else: |
|
progress(0.5, desc="Downloading…") |
|
elif d["status"] == "finished": |
|
progress(0.8, desc="Processing download…") |
|
|
|
ydl_opts["progress_hooks"] = [hook] |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.extract_info(url, download=True) |
|
|
|
progress(0.9, desc="Copying to Downloads folder…") |
|
|
|
|
|
downloaded_temp = None |
|
for f in os.listdir(self.temp_downloads): |
|
if timestamp in f: |
|
downloaded_temp = os.path.join(self.temp_downloads, f) |
|
break |
|
|
|
if not downloaded_temp: |
|
return None, "❌ Downloaded file not found" |
|
|
|
final_name = os.path.basename(downloaded_temp) |
|
final_path = os.path.join(self.downloads_folder, final_name) |
|
|
|
try: |
|
shutil.copy2(downloaded_temp, final_path) |
|
saved = True |
|
except Exception as e: |
|
print(f"Copy warning: {e}") |
|
saved = False |
|
final_path = "File kept only in temp folder" |
|
|
|
progress(1.0, desc="✅ Download complete!") |
|
|
|
msg = ( |
|
"✅ Download successful!\n" |
|
f"📁 Temp file: {os.path.basename(downloaded_temp)}\n" |
|
f"📁 Saved to: {final_path if saved else 'Copy failed'}\n" |
|
f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB" |
|
) |
|
return downloaded_temp, msg |
|
|
|
except Exception as e: |
|
return None, f"❌ Download failed: {e}" |
|
|
|
|
|
|
|
|
|
|
|
downloader = YouTubeDownloader() |
|
|
|
|
|
def configure_api_key(api_key): |
|
if not api_key or not api_key.strip(): |
|
return "❌ Please enter a valid Google API key", gr.update(visible=False) |
|
ok, msg = downloader.configure_gemini(api_key.strip()) |
|
return msg, gr.update(visible=ok) |
|
|
|
|
|
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()): |
|
try: |
|
progress(0.05, desc="Starting analysis…") |
|
cookiefile = cookies_file if cookies_file else None |
|
info, msg = downloader.get_video_info( |
|
url, progress=progress, cookiefile=cookiefile |
|
) |
|
if info: |
|
progress(0.95, desc="Generating report…") |
|
return downloader.format_video_info(info) |
|
return f"❌ Analysis Failed: {msg}" |
|
except Exception as e: |
|
return f"❌ System Error: {e}" |
|
|
|
|
|
def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()): |
|
try: |
|
progress(0.05, desc="Preparing download…") |
|
cookiefile = cookies_file if cookies_file else None |
|
file_path, status = downloader.download_video( |
|
url, quality, audio_only, progress=progress, cookiefile=cookiefile |
|
) |
|
return (file_path, status) if file_path else (None, status) |
|
except Exception as e: |
|
return None, f"❌ System Error: {e}" |
|
|
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks( |
|
theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro" |
|
) as iface: |
|
gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>") |
|
|
|
|
|
with gr.Group(): |
|
gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>") |
|
with gr.Row(): |
|
api_key_in = gr.Textbox( |
|
label="🔑 Google API Key", |
|
placeholder="Paste your Google API key…", |
|
type="password", |
|
) |
|
api_btn = gr.Button("🔧 Configure API", variant="secondary") |
|
api_status = gr.Textbox( |
|
label="API Status", |
|
value="❌ Gemini API not configured – Using fallback analysis", |
|
interactive=False, |
|
lines=1, |
|
) |
|
|
|
|
|
with gr.Row(): |
|
url_in = gr.Textbox( |
|
label="🔗 YouTube URL", |
|
placeholder="Paste YouTube video URL…", |
|
) |
|
cookies_in = gr.File( |
|
label="🍪 Upload cookies.txt (optional)", |
|
file_types=[".txt"], |
|
type="filepath", |
|
) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("📊 Video Analysis"): |
|
analyze_btn = gr.Button("🔍 Analyze Video", variant="primary") |
|
analysis_out = gr.Textbox( |
|
label="📊 Analysis Report", lines=25, show_copy_button=True |
|
) |
|
analyze_btn.click( |
|
fn=analyze_with_cookies, |
|
inputs=[url_in, cookies_in], |
|
outputs=analysis_out, |
|
show_progress=True, |
|
) |
|
|
|
with gr.TabItem("⬇️ Video Download"): |
|
with gr.Row(): |
|
quality_dd = gr.Dropdown( |
|
choices=["best", "720p", "480p"], |
|
value="best", |
|
label="📺 Quality", |
|
) |
|
audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)") |
|
download_btn = gr.Button("⬇️ Download Video", variant="primary") |
|
dl_status = gr.Textbox( |
|
label="📥 Download Status", lines=5, show_copy_button=True |
|
) |
|
dl_file = gr.File(label="📁 Downloaded File", visible=False) |
|
|
|
def wrapped_download(url, q, a, cfile, progress=gr.Progress()): |
|
fp, st = download_with_cookies(url, q, a, cfile, progress) |
|
if fp and os.path.exists(fp): |
|
return st, gr.update(value=fp, visible=True) |
|
return st, gr.update(visible=False) |
|
|
|
download_btn.click( |
|
fn=wrapped_download, |
|
inputs=[url_in, quality_dd, audio_cb, cookies_in], |
|
outputs=[dl_status, dl_file], |
|
show_progress=True, |
|
) |
|
|
|
|
|
api_btn.click( |
|
fn=configure_api_key, |
|
inputs=[api_key_in], |
|
outputs=[api_status], |
|
) |
|
|
|
gr.HTML( |
|
""" |
|
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;"> |
|
<h3>💡 Tip: 쿠키 파일 자동 사용</h3> |
|
<p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은 |
|
폴더에 두면 자동으로 사용됩니다. 주기적으로 새 파일로 교체해 주세요.</p> |
|
</div> |
|
""" |
|
) |
|
return iface |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
import atexit |
|
|
|
atexit.register(downloader.cleanup) |
|
demo.launch(debug=True, show_error=True) |