YTB-TEST / app.py
fantaxy's picture
Update app.py
c6c37f5 verified
raw
history blame
43.5 kB
#!/usr/bin/env python3
"""
YouTube Video Analyzer & Downloader Pro
(쿠키 자동 처리 버전)
· `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면
자동으로 사용합니다.
· Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 **우선** 적용됩니다.
"""
# ──────────────────────────────────────────────────────────────
# 표준 라이브러리
# ──────────────────────────────────────────────────────────────
import os
import re
import json
import uuid
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
# ──────────────────────────────────────────────────────────────
# 외부 라이브러리
# ──────────────────────────────────────────────────────────────
import gradio as gr
import yt_dlp
import google.generativeai as genai
# ──────────────────────────────────────────────────────────────
# 기본 쿠키 파일 경로 ― 파일명이 동일하면 자동 사용
# ──────────────────────────────────────────────────────────────
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
# =================================================================
# Main Class
# =================================================================
class YouTubeDownloader:
def __init__(self):
# 임시 디렉터리 (Gradio 호환)
self.download_dir = tempfile.mkdtemp()
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
# 사용자 Downloads 하위 폴더
self.downloads_folder = os.path.join(
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
)
os.makedirs(self.downloads_folder, exist_ok=True)
self.gemini_model = None
# ---------------------------------------------------------
# Google Gemini API
# ---------------------------------------------------------
def configure_gemini(self, api_key):
try:
genai.configure(api_key=api_key)
self.gemini_model = genai.GenerativeModel(
model_name="gemini-1.5-flash-latest"
)
return True, "✅ Gemini API configured successfully!"
except Exception as e:
return False, f"❌ Failed to configure Gemini API: {e}"
# ---------------------------------------------------------
# 임시 디렉터리 정리
# ---------------------------------------------------------
def cleanup(self):
try:
if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
shutil.rmtree(self.download_dir)
if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
shutil.rmtree(self.temp_downloads)
except Exception as e:
print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
# ---------------------------------------------------------
# 유튜브 URL 검증
# ---------------------------------------------------------
def is_valid_youtube_url(self, url):
youtube_regex = re.compile(
r"(https?://)?(www\.)?"
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
)
return youtube_regex.match(url) is not None
# ---------------------------------------------------------
# Gemini-AI 음성/대사 추출 (한글 우선)
# ---------------------------------------------------------
def generate_scene_breakdown_gemini(self, video_info):
if not self.gemini_model:
return self.generate_scene_breakdown_fallback(video_info)
try:
duration = video_info.get("duration", 0)
title = video_info.get("title", "")
description = video_info.get("description", "")[:1500]
if not duration:
return {
"korean": ["**[재생시간 알 수 없음]**: 비디오 재생시간을 확인할 수 없어 타임스탬프를 생성할 수 없습니다"],
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
}
prompt = f"""
이 YouTube 비디오의 음성/대사를 타임스탬프별로 추출해주세요.
제목: {title}
재생시간: {duration}
설명: {description}
매우 중요한 지침:
1. 실제 영상에서 들리는 대사, 내레이션, 음성을 그대로 적어주세요
2. 장면 설명이 아닌 실제 음성 내용만 작성하세요
3. 음성이 없는 부분은 (...) 또는 (배경음악) 등으로 표시
4. 타임스탬프 가이드라인:
- 대사나 내레이션이 시작하고 끝나는 지점 기준
- 연속된 대사는 하나로 묶어서 표시
- 최대한 자연스러운 단위로 구분
5. 형식:
**[MM:SS-MM:SS]**: "실제 대사나 내레이션 내용"
**[MM:SS-MM:SS]**: (배경음악) 또는 (...장면 전환...)
6. 모든 음성 내용을 빠짐없이 적어주세요
7. 자막이나 화면에 표시된 텍스트도 포함하세요
8. 장면 설명은 절대 하지 마세요. 오직 음성과 텍스트만 추출하세요.
예시:
**[00:00-00:05]**: "안녕하세요. 오늘은 미륵산에서 발견된 백제 유적에 대해 알아보겠습니다."
**[00:05-00:08]**: (배경음악)
**[00:08-00:15]**: "미륵사지를 품고 있는 익산 미륵산의 정상부에서 백제시대에 만든 것으로 추정되는 저수조가 발굴됐습니다."
**[00:15-00:18]**: (인터뷰 준비 중...)
**[00:18-00:25]**: [이도학 교수] "이번 발굴은 백제 역사 연구에 중요한 전환점이 될 것입니다."
뉴스의 경우:
- 앵커나 기자의 멘트는 그대로 적기
- 인터뷰는 [인터뷰이 이름] "내용" 형식으로
- 자막은 [자막] 내용 형식으로
"""
response = self.gemini_model.generate_content(prompt)
korean_scenes = []
if response and response.text:
lines = response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
korean_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
korean_scenes.append(current_scene.strip())
# 영어 번역 생성
english_prompt = f"""
Translate the following Korean speech/dialogue transcription to English, maintaining the exact same timestamps.
Translate ONLY the actual speech content, not descriptions:
{chr(10).join(korean_scenes)}
Important rules:
- Keep the format exactly the same: **[MM:SS-MM:SS]**: "English translation of speech"
- For non-speech parts like (배경음악), translate as (background music)
- For (...) keep as is
- For interview tags like [이도학 교수], translate as [Professor Lee Do-hak]
- For [자막], translate as [Subtitle]
- Keep quotation marks for actual speech
- Do NOT add any scene descriptions or explanations
"""
english_response = self.gemini_model.generate_content(english_prompt)
english_scenes = []
if english_response and english_response.text:
lines = english_response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
english_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
english_scenes.append(current_scene.strip())
# 중국어 번역 생성
chinese_prompt = f"""
将以下韩语语音/对话转录翻译成中文,保持完全相同的时间戳。
只翻译实际的语音内容,不要描述:
{chr(10).join(korean_scenes)}
重要规则:
- 保持格式完全相同:**[MM:SS-MM:SS]**: "语音的中文翻译"
- 对于像(배경음악)这样的非语音部分,翻译为(背景音乐)
- 对于(...)保持原样
- 对于像[이도학 교수]这样的采访标签,翻译为[李道学教授]
- 对于[자막],翻译为[字幕]
- 保留实际语音的引号
- 不要添加任何场景描述或解释
"""
chinese_response = self.gemini_model.generate_content(chinese_prompt)
chinese_scenes = []
if chinese_response and chinese_response.text:
lines = chinese_response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
chinese_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
chinese_scenes.append(current_scene.strip())
# 태국어 번역 생성
thai_prompt = f"""
แปลคำบรรยายเสียง/บทสนทนาภาษาเกาหลีต่อไปนี้เป็นภาษาไทย โดยคงรูปแบบเวลาเดิมไว้
แปลเฉพาะเนื้อหาเสียงจริงเท่านั้น ไม่ต้องบรรยาย:
{chr(10).join(korean_scenes)}
กฎสำคัญ:
- คงรูปแบบเดิมไว้: **[MM:SS-MM:SS]**: "คำแปลภาษาไทยของเสียงพูด"
- สำหรับส่วนที่ไม่ใช่เสียงพูด เช่น (배경음악) แปลเป็น (เพลงประกอบ)
- สำหรับ (...) ให้คงเดิม
- สำหรับป้ายสัมภาษณ์ เช่น [이도학 교수] แปลเป็น [ศาสตราจารย์ อี โด-ฮัก]
- สำหรับ [자막] แปลเป็น [คำบรรยาย]
- คงเครื่องหมายคำพูดสำหรับเสียงพูดจริง
- ไม่ต้องเพิ่มคำบรรยายฉากหรือคำอธิบายใดๆ
"""
thai_response = self.gemini_model.generate_content(thai_prompt)
thai_scenes = []
if thai_response and thai_response.text:
lines = thai_response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
thai_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
thai_scenes.append(current_scene.strip())
# 러시아어 번역 생성
russian_prompt = f"""
Переведите следующую корейскую транскрипцию речи/диалога на русский язык, сохраняя точно такие же временные метки.
Переводите ТОЛЬКО фактическое содержание речи, а не описания:
{chr(10).join(korean_scenes)}
Важные правила:
- Сохраняйте формат точно таким же: **[MM:SS-MM:SS]**: "Русский перевод речи"
- Для частей без речи, таких как (배경음악), переведите как (фоновая музыка)
- Для (...) оставьте как есть
- Для тегов интервью, таких как [이도학 교수], переведите как [Профессор Ли До Хак]
- Для [자막] переведите как [Субтитры]
- Сохраняйте кавычки для фактической речи
- НЕ добавляйте никаких описаний сцен или объяснений
"""
russian_response = self.gemini_model.generate_content(russian_prompt)
russian_scenes = []
if russian_response and russian_response.text:
lines = russian_response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
russian_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
russian_scenes.append(current_scene.strip())
return {
"korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"],
"english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"],
"chinese": chinese_scenes if chinese_scenes else [],
"thai": thai_scenes if thai_scenes else [],
"russian": russian_scenes if russian_scenes else []
}
except Exception as e:
print(f"Gemini API error: {e}")
return self.generate_scene_breakdown_fallback(video_info)
# ---------------------------------------------------------
# Fallback 음성/대사 추출 (5개 언어)
# ---------------------------------------------------------
def generate_scene_breakdown_fallback(self, video_info):
duration = video_info.get("duration", 0)
title = video_info.get("title", "").lower()
description = video_info.get("description", "").lower()
uploader = video_info.get("uploader", "콘텐츠 제작자")
if not duration:
return {
"korean": ["**[재생시간 알 수 없음]**: 타임스탬프를 생성할 수 없습니다"],
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"],
"chinese": ["**[持续时间未知]**: 无法生成带时间戳的分解"],
"thai": ["**[ไม่ทราบระยะเวลา]**: ไม่สามารถสร้างการแบ่งส่วนตามเวลาได้"],
"russian": ["**[Продолжительность неизвестна]**: Невозможно создать временную разбивку"]
}
# 비디오 타입에 따른 대사 템플릿
if duration <= 60:
segment_length = 10
elif duration <= 300:
segment_length = 15
elif duration <= 900:
segment_length = 20
else:
segment_length = 30
korean_scenes = []
english_scenes = []
chinese_scenes = []
thai_scenes = []
russian_scenes = []
num_segments = min(duration // segment_length + 1, 20)
for i in range(num_segments):
start_time = i * segment_length
end_time = min(start_time + segment_length - 1, duration)
start_fmt = f"{start_time//60}:{start_time%60:02d}"
end_fmt = f"{end_time//60}:{end_time%60:02d}"
# 음성 추출이 불가능한 경우의 기본 템플릿
if i == 0:
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (음성 추출을 위해 Gemini API가 필요합니다. 인트로 부분...)")
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Gemini API required for speech extraction. Intro section...)")
chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (需要Gemini API进行语音提取。介绍部分...)")
thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ต้องใช้ Gemini API สำหรับการดึงเสียง ส่วนเปิดตัว...)")
russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Требуется Gemini API для извлечения речи. Вступительная часть...)")
elif i == num_segments - 1:
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (아웃트로 부분...)")
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Outro section...)")
chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (结尾部分...)")
thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ส่วนจบ...)")
russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Заключительная часть...)")
else:
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (본문 내용...)")
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Main content...)")
chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (主要内容...)")
thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (เนื้อหาหลัก...)")
russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Основное содержание...)")
return {
"korean": korean_scenes,
"english": english_scenes,
"chinese": chinese_scenes,
"thai": thai_scenes,
"russian": russian_scenes
}
# ---------------------------------------------------------
# 비디오 유형 감지(상세)
# ---------------------------------------------------------
def detect_video_type_detailed(self, title, description):
text = (title + " " + description).lower()
if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
return "tutorial"
if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
return "review"
if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
return "vlog"
if any(w in text for w in ["music", "song", "cover", "lyrics"]):
return "music"
if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
return "entertainment"
if any(w in text for w in ["news", "breaking", "update", "report"]):
return "news"
if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
return "cooking"
if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
return "fitness"
return "general"
# ---------------------------------------------------------
# 장면별 설명 생성 (한글)
# ---------------------------------------------------------
def generate_contextual_description_korean(
self, scene_index, total_scenes, video_type, uploader, title
):
presenter_desc = "콘텐츠 제작자"
if "woman" in title or "girl" in title or "여성" in title or "여자" in title:
presenter_desc = "여성 진행자"
elif "man" in title or "guy" in title or "남성" in title or "남자" in title:
presenter_desc = "남성 진행자"
if scene_index == 0:
if video_type == "tutorial":
return f"{presenter_desc}가 화면에 등장하여 자신을 소개하고 주제를 설명합니다. 밝은 조명의 작업 공간에서 캐주얼한 옷을 입고 있습니다."
if video_type == "vlog":
return f"{presenter_desc}가 카메라를 향해 밝게 인사하며 오늘의 브이로그 주제를 설명합니다."
if video_type == "review":
return f"{presenter_desc}가 리뷰할 제품을 들어 보이며 간단한 특징을 소개합니다."
return f"{presenter_desc}가 시청자의 관심을 끌기 위한 매력적인 인트로로 영상을 시작합니다."
if scene_index == total_scenes - 1:
if video_type == "tutorial":
return f"{presenter_desc}가 최종 결과물을 보여주고 시청자에게 감사 인사를 전하며 좋아요와 구독을 부탁합니다."
if video_type == "vlog":
return f"{presenter_desc}가 하루를 마무리하며 최종 생각을 공유하고 작별 인사를 합니다."
return f"{presenter_desc}가 주요 내용을 요약하고 좋아요와 댓글을 통한 참여를 독려하며 마무리합니다."
if video_type == "tutorial":
return f"{presenter_desc}가 다음 단계를 시연하며 클로즈업 샷과 함께 명확한 지침을 제공합니다."
if video_type == "review":
return f"{presenter_desc}가 제품의 특정 기능을 살펴보며 사용 모습을 보여주고 성능에 대해 설명합니다."
if video_type == "vlog":
return f"{presenter_desc}가 일상 활동을 계속하며 솔직한 순간과 개인적인 생각을 공유합니다."
if video_type == "cooking":
return f"{presenter_desc}가 재료를 준비하며 자르고 섞으면서 각 단계를 설명합니다."
if video_type == "fitness":
return f"{presenter_desc}가 운동 세트를 수행하며 올바른 자세를 시연하고 팁을 제공합니다."
return f"{presenter_desc}가 명확한 설명을 통해 시청자와 소통하며 주요 콘텐츠를 진행합니다."
# ---------------------------------------------------------
# 장면별 설명 생성 (영어 - 기존 메서드 유지)
# ---------------------------------------------------------
def generate_contextual_description(
self, scene_index, total_scenes, video_type, uploader, title
):
presenter_desc = "The content creator"
if "woman" in title or "girl" in title:
presenter_desc = "A woman"
elif "man" in title or "guy" in title:
presenter_desc = "A man"
if scene_index == 0:
if video_type == "tutorial":
return (
f"{presenter_desc} appears on screen, introducing themselves and the "
f"topic. They are in a well-lit workspace, wearing casual clothes."
)
if video_type == "vlog":
return (
f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
f"explains what today's vlog is about."
)
if video_type == "review":
return (
f"{presenter_desc} holds up the product to be reviewed, giving a brief "
f"overview of its features."
)
return (
f"{presenter_desc} starts the video with an engaging introduction to "
f"capture viewers' attention."
)
if scene_index == total_scenes - 1:
if video_type == "tutorial":
return (
f"{presenter_desc} shows the final result, thanks viewers, and "
f"encourages them to like and subscribe."
)
if video_type == "vlog":
return (
f"{presenter_desc} wraps up the day, sharing final thoughts and "
f"bidding farewell."
)
return (
f"{presenter_desc} concludes, summarizing key points and prompting "
f"engagement through likes and comments."
)
if video_type == "tutorial":
return (
f"{presenter_desc} demonstrates the next step, providing clear "
f"instructions with close-up shots."
)
if video_type == "review":
return (
f"{presenter_desc} examines a specific feature of the product, showing "
f"it in use and commenting on performance."
)
if video_type == "vlog":
return (
f"{presenter_desc} continues the day's activities, sharing candid "
f"moments and personal reflections."
)
if video_type == "cooking":
return (
f"{presenter_desc} prepares ingredients, chopping and mixing while "
f"explaining each step."
)
if video_type == "fitness":
return (
f"{presenter_desc} performs an exercise set, demonstrating proper form "
f"and offering tips."
)
return (
f"{presenter_desc} proceeds with the main content, engaging viewers through "
f"clear explanations."
)
# ---------------------------------------------------------
# 비디오 유형 (간략)
# ---------------------------------------------------------
def detect_video_type(self, title, description):
text = (title + " " + description).lower()
if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
return "🎵 Music Video"
if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
return "📚 Tutorial/Educational"
if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
return "🎭 Entertainment/Comedy"
if any(w in text for w in ["news", "breaking", "report", "update"]):
return "📰 News/Information"
if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
return "⭐ Review/Unboxing"
if any(w in text for w in ["commercial", "ad", "brand", "product"]):
return "📺 Commercial/Advertisement"
return "🎬 General Content"
# ---------------------------------------------------------
# 배경 음악 추정
# ---------------------------------------------------------
def detect_background_music(self, video_info):
title = video_info.get("title", "").lower()
if "music" in title or "song" in title:
return "🎵 Original Music/Soundtrack"
if "commercial" in title or "ad" in title:
return "🎶 Upbeat Commercial Music"
if "tutorial" in title or "how to" in title:
return "🔇 Minimal/No Background Music"
if "vlog" in title or "daily" in title:
return "🎼 Ambient Background Music"
return "🎵 Background Music"
# ---------------------------------------------------------
# 인플루언서 규모 추정
# ---------------------------------------------------------
def detect_influencer_status(self, video_info):
subs = video_info.get("channel_followers", 0)
views = video_info.get("view_count", 0)
if subs > 10_000_000:
return "🌟 Mega Influencer (10M+)"
if subs > 1_000_000:
return "⭐ Major Influencer (1M+)"
if subs > 100_000:
return "🎯 Mid-tier Influencer (100K+)"
if subs > 10_000:
return "📈 Micro Influencer (10K+)"
if views > 100_000:
return "🔥 Viral Content Creator"
return "👤 Regular Content Creator"
# ---------------------------------------------------------
# 숫자 포맷터
# ---------------------------------------------------------
def format_number(self, num):
if not num:
return "0"
if num >= 1_000_000_000:
return f"{num/1_000_000_000:.1f}B"
if num >= 1_000_000:
return f"{num/1_000_000:.1f}M"
if num >= 1_000:
return f"{num/1_000:.1f}K"
return str(num)
# ---------------------------------------------------------
# 최종 리포트 생성 (5개 언어)
# ---------------------------------------------------------
def format_video_info(self, video_info):
if not video_info:
return "❌ No video information available."
title = video_info.get("title", "Unknown")
uploader = video_info.get("uploader", "Unknown")
duration = video_info.get("duration", 0)
dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
views = video_info.get("view_count", 0)
likes = video_info.get("like_count", 0)
comments = video_info.get("comment_count", 0)
upload_date = video_info.get("upload_date", "Unknown")
if len(upload_date) == 8:
upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
scene_data = self.generate_scene_breakdown_gemini(video_info)
korean_scenes = scene_data.get("korean", [])
english_scenes = scene_data.get("english", [])
chinese_scenes = scene_data.get("chinese", [])
thai_scenes = scene_data.get("thai", [])
russian_scenes = scene_data.get("russian", [])
vtype = self.detect_video_type(title, video_info.get("description", ""))
bgm = self.detect_background_music(video_info)
creator = self.detect_influencer_status(video_info)
engagement = (likes / views) * 100 if views else 0
report = f"""
🎬 YOUTUBE VIDEO ANALYSIS REPORT
{'='*50}
📋 기본 정보 / BASIC INFORMATION
{'─'*25}
📹 **제목/Title:** {title}
👤 **업로더/Uploader:** {uploader}
📅 **업로드 날짜/Upload Date:** {upload_date}
⏱️ **재생시간/Duration:** {dur_str}
🆔 **비디오 ID/Video ID:** {video_info.get('id', 'Unknown')}
📊 성과 지표 / PERFORMANCE METRICS
{'─'*25}
👀 **조회수/Views:** {self.format_number(views)} ({views:,})
👍 **좋아요/Likes:** {self.format_number(likes)} ({likes:,})
💬 **댓글/Comments:** {self.format_number(comments)} ({comments:,})
📈 **참여율/Engagement Rate:** {engagement:.2f}%
🎯 콘텐츠 분석 / CONTENT ANALYSIS
{'─'*25}
📂 **비디오 유형/Video Type:** {vtype}
🎵 **배경음악/Background Music:** {bgm}
👑 **제작자 상태/Creator Status:** {creator}
🎙️ 음성/대사 추출 (한국어) / SPEECH/DIALOGUE EXTRACTION (KOREAN)
{'─'*30}
{chr(10).join(korean_scenes)}
🎙️ 음성/대사 추출 (영어) / SPEECH/DIALOGUE EXTRACTION (ENGLISH)
{'─'*30}
{chr(10).join(english_scenes)}
🎙️ 音频/对话提取 (中文) / SPEECH/DIALOGUE EXTRACTION (CHINESE)
{'─'*30}
{chr(10).join(chinese_scenes) if chinese_scenes else "(중국어 번역 없음 / No Chinese translation available)"}
🎙️ การดึงเสียง/บทสนทนา (ไทย) / SPEECH/DIALOGUE EXTRACTION (THAI)
{'─'*30}
{chr(10).join(thai_scenes) if thai_scenes else "(태국어 번역 없음 / No Thai translation available)"}
🎙️ Извлечение речи/диалога (Русский) / SPEECH/DIALOGUE EXTRACTION (RUSSIAN)
{'─'*30}
{chr(10).join(russian_scenes) if russian_scenes else "(러시아어 번역 없음 / No Russian translation available)"}
{'='*50}
📊 **분석 완료/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
🤖 **AI 강화/AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
"""
return report.strip()
# ---------------------------------------------------------
# 메타데이터 추출
# ---------------------------------------------------------
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
# 쿠키 선택 순서: UI 업로드 → 기본 쿠키 → None
if cookiefile and os.path.exists(cookiefile):
cookiefile = cookiefile
elif DEFAULT_COOKIE_FILE.exists():
cookiefile = str(DEFAULT_COOKIE_FILE)
else:
cookiefile = None
try:
progress(0.1, desc="Initializing YouTube extractor…")
ydl_opts = {"noplaylist": True, "extract_flat": False}
if cookiefile:
ydl_opts["cookiefile"] = cookiefile
progress(0.5, desc="Extracting video metadata…")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
progress(1.0, desc="✅ Analysis complete!")
return info, "✅ Video information extracted successfully"
except Exception as e:
return None, f"❌ Error: {e}"
# ---------------------------------------------------------
# 다운로드
# ---------------------------------------------------------
def download_video(
self,
url,
quality="best",
audio_only=False,
progress=gr.Progress(),
cookiefile=None,
):
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
# 쿠키 선택 순서
if cookiefile and os.path.exists(cookiefile):
cookiefile = cookiefile
elif DEFAULT_COOKIE_FILE.exists():
cookiefile = str(DEFAULT_COOKIE_FILE)
else:
cookiefile = None
try:
progress(0.1, desc="Preparing download…")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
ydl_opts = {
"outtmpl": os.path.join(
self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
),
"noplaylist": True,
}
if audio_only:
ydl_opts["format"] = "bestaudio/best"
ydl_opts["postprocessors"] = [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
]
else:
if quality == "720p":
ydl_opts["format"] = "best[height<=720]"
elif quality == "480p":
ydl_opts["format"] = "best[height<=480]"
else: # "best"
ydl_opts["format"] = "best[height<=1080]"
if cookiefile:
ydl_opts["cookiefile"] = cookiefile
# 진행률 훅
def hook(d):
if d["status"] == "downloading":
if "total_bytes" in d:
pct = d["downloaded_bytes"] / d["total_bytes"] * 100
progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
else:
progress(0.5, desc="Downloading…")
elif d["status"] == "finished":
progress(0.8, desc="Processing download…")
ydl_opts["progress_hooks"] = [hook]
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.extract_info(url, download=True)
progress(0.9, desc="Copying to Downloads folder…")
# temp 디렉터리에서 파일 찾기
downloaded_temp = None
for f in os.listdir(self.temp_downloads):
if timestamp in f:
downloaded_temp = os.path.join(self.temp_downloads, f)
break
if not downloaded_temp:
return None, "❌ Downloaded file not found"
final_name = os.path.basename(downloaded_temp)
final_path = os.path.join(self.downloads_folder, final_name)
try:
shutil.copy2(downloaded_temp, final_path)
saved = True
except Exception as e:
print(f"Copy warning: {e}")
saved = False
final_path = "File kept only in temp folder"
progress(1.0, desc="✅ Download complete!")
msg = (
"✅ Download successful!\n"
f"📁 Temp file: {os.path.basename(downloaded_temp)}\n"
f"📁 Saved to: {final_path if saved else 'Copy failed'}\n"
f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
)
return downloaded_temp, msg
except Exception as e:
return None, f"❌ Download failed: {e}"
# =================================================================
# Helper functions for Gradio
# =================================================================
downloader = YouTubeDownloader()
def configure_api_key(api_key):
if not api_key or not api_key.strip():
return "❌ Please enter a valid Google API key", gr.update(visible=False)
ok, msg = downloader.configure_gemini(api_key.strip())
return msg, gr.update(visible=ok)
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
try:
progress(0.05, desc="Starting analysis…")
cookiefile = cookies_file if cookies_file else None
info, msg = downloader.get_video_info(
url, progress=progress, cookiefile=cookiefile
)
if info:
progress(0.95, desc="Generating report…")
return downloader.format_video_info(info)
return f"❌ Analysis Failed: {msg}"
except Exception as e:
return f"❌ System Error: {e}"
def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
try:
progress(0.05, desc="Preparing download…")
cookiefile = cookies_file if cookies_file else None
file_path, status = downloader.download_video(
url, quality, audio_only, progress=progress, cookiefile=cookiefile
)
return (file_path, status) if file_path else (None, status)
except Exception as e:
return None, f"❌ System Error: {e}"
# =================================================================
# Gradio UI
# =================================================================
def create_interface():
with gr.Blocks(
theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro"
) as iface:
gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>")
# API 섹션
with gr.Group():
gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>")
with gr.Row():
api_key_in = gr.Textbox(
label="🔑 Google API Key",
placeholder="Paste your Google API key…",
type="password",
)
api_btn = gr.Button("🔧 Configure API", variant="secondary")
api_status = gr.Textbox(
label="API Status",
value="❌ Gemini API not configured – Using fallback analysis",
interactive=False,
lines=1,
)
# 메인 UI
with gr.Row():
url_in = gr.Textbox(
label="🔗 YouTube URL",
placeholder="Paste YouTube video URL…",
)
cookies_in = gr.File(
label="🍪 Upload cookies.txt (optional)",
file_types=[".txt"],
type="filepath",
)
with gr.Tabs():
with gr.TabItem("📊 Video Analysis"):
analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
analysis_out = gr.Textbox(
label="📊 Analysis Report", lines=25, show_copy_button=True
)
analyze_btn.click(
fn=analyze_with_cookies,
inputs=[url_in, cookies_in],
outputs=analysis_out,
show_progress=True,
)
with gr.TabItem("⬇️ Video Download"):
with gr.Row():
quality_dd = gr.Dropdown(
choices=["best", "720p", "480p"],
value="best",
label="📺 Quality",
)
audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)")
download_btn = gr.Button("⬇️ Download Video", variant="primary")
dl_status = gr.Textbox(
label="📥 Download Status", lines=5, show_copy_button=True
)
dl_file = gr.File(label="📁 Downloaded File", visible=False)
def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
fp, st = download_with_cookies(url, q, a, cfile, progress)
if fp and os.path.exists(fp):
return st, gr.update(value=fp, visible=True)
return st, gr.update(visible=False)
download_btn.click(
fn=wrapped_download,
inputs=[url_in, quality_dd, audio_cb, cookies_in],
outputs=[dl_status, dl_file],
show_progress=True,
)
# API 버튼 동작
api_btn.click(
fn=configure_api_key,
inputs=[api_key_in],
outputs=[api_status],
)
gr.HTML(
"""
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
<h3>💡 Tip: 쿠키 파일 자동 사용</h3>
<p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은
폴더에 두면 자동으로 사용됩니다. 주기적으로 새 파일로 교체해 주세요.</p>
</div>
"""
)
return iface
# =================================================================
# Entrypoint
# =================================================================
if __name__ == "__main__":
demo = create_interface()
import atexit
atexit.register(downloader.cleanup)
demo.launch(debug=True, show_error=True)