YTB-TEST

Running

App Files Files Community

YTB-TEST / app.py

fantaxy

Update app.py

c6c37f5 verified about 2 months ago

raw

history blame

43.5 kB

	#!/usr/bin/env python3
	"""
	YouTube Video Analyzer & Downloader Pro
	(쿠키 자동 처리 버전)

	· `www.youtube.com_cookies.txt` 파일이 app.py와 같은 폴더에 있으면
	자동으로 사용합니다.
	· Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 우선 적용됩니다.
	"""

	# ──────────────────────────────────────────────────────────────
	# 표준 라이브러리
	# ──────────────────────────────────────────────────────────────
	import os
	import re
	import json
	import uuid
	import shutil
	import tempfile
	from datetime import datetime
	from pathlib import Path

	# ──────────────────────────────────────────────────────────────
	# 외부 라이브러리
	# ──────────────────────────────────────────────────────────────
	import gradio as gr
	import yt_dlp
	import google.generativeai as genai

	# ──────────────────────────────────────────────────────────────
	# 기본 쿠키 파일 경로 ― 파일명이 동일하면 자동 사용
	# ──────────────────────────────────────────────────────────────
	DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")


	# =================================================================
	# Main Class
	# =================================================================
	class YouTubeDownloader:
	def __init__(self):
	# 임시 디렉터리 (Gradio 호환)
	self.download_dir = tempfile.mkdtemp()
	self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")

	# 사용자 Downloads 하위 폴더
	self.downloads_folder = os.path.join(
	os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
	)
	os.makedirs(self.downloads_folder, exist_ok=True)

	self.gemini_model = None

	# ---------------------------------------------------------
	# Google Gemini API
	# ---------------------------------------------------------
	def configure_gemini(self, api_key):
	try:
	genai.configure(api_key=api_key)
	self.gemini_model = genai.GenerativeModel(
	model_name="gemini-1.5-flash-latest"
	)
	return True, "✅ Gemini API configured successfully!"
	except Exception as e:
	return False, f"❌ Failed to configure Gemini API: {e}"

	# ---------------------------------------------------------
	# 임시 디렉터리 정리
	# ---------------------------------------------------------
	def cleanup(self):
	try:
	if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
	shutil.rmtree(self.download_dir)
	if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
	shutil.rmtree(self.temp_downloads)
	except Exception as e:
	print(f"⚠️ Warning: Could not clean up temporary directory: {e}")

	# ---------------------------------------------------------
	# 유튜브 URL 검증
	# ---------------------------------------------------------
	def is_valid_youtube_url(self, url):
	youtube_regex = re.compile(
	r"(https?://)?(www\.)?"
	r"(youtube\|youtu\|youtube-nocookie)\.(com\|be)/"
	r"(watch\?v=\|embed/\|v/\|.+\?v=)?([^&=%\?]{11})"
	)
	return youtube_regex.match(url) is not None

	# ---------------------------------------------------------
	# Gemini-AI 음성/대사 추출 (한글 우선)
	# ---------------------------------------------------------
	def generate_scene_breakdown_gemini(self, video_info):
	if not self.gemini_model:
	return self.generate_scene_breakdown_fallback(video_info)

	try:
	duration = video_info.get("duration", 0)
	title = video_info.get("title", "")
	description = video_info.get("description", "")[:1500]

	if not duration:
	return {
	"korean": ["[재생시간 알 수 없음]: 비디오 재생시간을 확인할 수 없어 타임스탬프를 생성할 수 없습니다"],
	"english": ["[Duration Unknown]: Unable to generate timestamped breakdown - video duration not available"]
	}

	prompt = f"""
	이 YouTube 비디오의 음성/대사를 타임스탬프별로 추출해주세요.

	제목: {title}
	재생시간: {duration}초
	설명: {description}

	매우 중요한 지침:
	1. 실제 영상에서 들리는 대사, 내레이션, 음성을 그대로 적어주세요
	2. 장면 설명이 아닌 실제 음성 내용만 작성하세요
	3. 음성이 없는 부분은 (...) 또는 (배경음악) 등으로 표시
	4. 타임스탬프 가이드라인:
	- 대사나 내레이션이 시작하고 끝나는 지점 기준
	- 연속된 대사는 하나로 묶어서 표시
	- 최대한 자연스러운 단위로 구분
	5. 형식:
	[MM:SS-MM:SS]: "실제 대사나 내레이션 내용"
	[MM:SS-MM:SS]: (배경음악) 또는 (...장면 전환...)
	6. 모든 음성 내용을 빠짐없이 적어주세요
	7. 자막이나 화면에 표시된 텍스트도 포함하세요
	8. 장면 설명은 절대 하지 마세요. 오직 음성과 텍스트만 추출하세요.

	예시:
	[00:00-00:05]: "안녕하세요. 오늘은 미륵산에서 발견된 백제 유적에 대해 알아보겠습니다."
	[00:05-00:08]: (배경음악)
	[00:08-00:15]: "미륵사지를 품고 있는 익산 미륵산의 정상부에서 백제시대에 만든 것으로 추정되는 저수조가 발굴됐습니다."
	[00:15-00:18]: (인터뷰 준비 중...)
	[00:18-00:25]: [이도학 교수] "이번 발굴은 백제 역사 연구에 중요한 전환점이 될 것입니다."

	뉴스의 경우:
	- 앵커나 기자의 멘트는 그대로 적기
	- 인터뷰는 [인터뷰이 이름] "내용" 형식으로
	- 자막은 [자막] 내용 형식으로
	"""
	response = self.gemini_model.generate_content(prompt)

	korean_scenes = []
	if response and response.text:
	lines = response.text.split("\n")
	current_scene = ""

	for line in lines:
	line = line.strip()
	if line.startswith("[") and "]:" in line:
	if current_scene:
	korean_scenes.append(current_scene.strip())
	current_scene = line
	elif current_scene:
	current_scene += "\n" + line

	if current_scene:
	korean_scenes.append(current_scene.strip())

	# 영어 번역 생성
	english_prompt = f"""
	Translate the following Korean speech/dialogue transcription to English, maintaining the exact same timestamps.
	Translate ONLY the actual speech content, not descriptions:

	{chr(10).join(korean_scenes)}

	Important rules:
	- Keep the format exactly the same: [MM:SS-MM:SS]: "English translation of speech"
	- For non-speech parts like (배경음악), translate as (background music)
	- For (...) keep as is
	- For interview tags like [이도학 교수], translate as [Professor Lee Do-hak]
	- For [자막], translate as [Subtitle]
	- Keep quotation marks for actual speech
	- Do NOT add any scene descriptions or explanations
	"""
	english_response = self.gemini_model.generate_content(english_prompt)

	english_scenes = []
	if english_response and english_response.text:
	lines = english_response.text.split("\n")
	current_scene = ""

	for line in lines:
	line = line.strip()
	if line.startswith("[") and "]:" in line:
	if current_scene:
	english_scenes.append(current_scene.strip())
	current_scene = line
	elif current_scene:
	current_scene += "\n" + line

	if current_scene:
	english_scenes.append(current_scene.strip())

	# 중국어 번역 생성
	chinese_prompt = f"""
	将以下韩语语音/对话转录翻译成中文，保持完全相同的时间戳。
	只翻译实际的语音内容，不要描述：

	{chr(10).join(korean_scenes)}

	重要规则：
	- 保持格式完全相同：[MM:SS-MM:SS]: "语音的中文翻译"
	- 对于像(배경음악)这样的非语音部分，翻译为(背景音乐)
	- 对于(...)保持原样
	- 对于像[이도학 교수]这样的采访标签，翻译为[李道学教授]
	- 对于[자막]，翻译为[字幕]
	- 保留实际语音的引号
	- 不要添加任何场景描述或解释
	"""
	chinese_response = self.gemini_model.generate_content(chinese_prompt)

	chinese_scenes = []
	if chinese_response and chinese_response.text:
	lines = chinese_response.text.split("\n")
	current_scene = ""

	for line in lines:
	line = line.strip()
	if line.startswith("[") and "]:" in line:
	if current_scene:
	chinese_scenes.append(current_scene.strip())
	current_scene = line
	elif current_scene:
	current_scene += "\n" + line

	if current_scene:
	chinese_scenes.append(current_scene.strip())

	# 태국어 번역 생성
	thai_prompt = f"""
	แปลคำบรรยายเสียง/บทสนทนาภาษาเกาหลีต่อไปนี้เป็นภาษาไทย โดยคงรูปแบบเวลาเดิมไว้
	แปลเฉพาะเนื้อหาเสียงจริงเท่านั้น ไม่ต้องบรรยาย:

	{chr(10).join(korean_scenes)}

	กฎสำคัญ:
	- คงรูปแบบเดิมไว้: [MM:SS-MM:SS]: "คำแปลภาษาไทยของเสียงพูด"
	- สำหรับส่วนที่ไม่ใช่เสียงพูด เช่น (배경음악) แปลเป็น (เพลงประกอบ)
	- สำหรับ (...) ให้คงเดิม
	- สำหรับป้ายสัมภาษณ์ เช่น [이도학 교수] แปลเป็น [ศาสตราจารย์ อี โด-ฮัก]
	- สำหรับ [자막] แปลเป็น [คำบรรยาย]
	- คงเครื่องหมายคำพูดสำหรับเสียงพูดจริง
	- ไม่ต้องเพิ่มคำบรรยายฉากหรือคำอธิบายใดๆ
	"""
	thai_response = self.gemini_model.generate_content(thai_prompt)

	thai_scenes = []
	if thai_response and thai_response.text:
	lines = thai_response.text.split("\n")
	current_scene = ""

	for line in lines:
	line = line.strip()
	if line.startswith("[") and "]:" in line:
	if current_scene:
	thai_scenes.append(current_scene.strip())
	current_scene = line
	elif current_scene:
	current_scene += "\n" + line

	if current_scene:
	thai_scenes.append(current_scene.strip())

	# 러시아어 번역 생성
	russian_prompt = f"""
	Переведите следующую корейскую транскрипцию речи/диалога на русский язык, сохраняя точно такие же временные метки.
	Переводите ТОЛЬКО фактическое содержание речи, а не описания:

	{chr(10).join(korean_scenes)}

	Важные правила:
	- Сохраняйте формат точно таким же: [MM:SS-MM:SS]: "Русский перевод речи"
	- Для частей без речи, таких как (배경음악), переведите как (фоновая музыка)
	- Для (...) оставьте как есть
	- Для тегов интервью, таких как [이도학 교수], переведите как [Профессор Ли До Хак]
	- Для [자막] переведите как [Субтитры]
	- Сохраняйте кавычки для фактической речи
	- НЕ добавляйте никаких описаний сцен или объяснений
	"""
	russian_response = self.gemini_model.generate_content(russian_prompt)

	russian_scenes = []
	if russian_response and russian_response.text:
	lines = russian_response.text.split("\n")
	current_scene = ""

	for line in lines:
	line = line.strip()
	if line.startswith("[") and "]:" in line:
	if current_scene:
	russian_scenes.append(current_scene.strip())
	current_scene = line
	elif current_scene:
	current_scene += "\n" + line

	if current_scene:
	russian_scenes.append(current_scene.strip())

	return {
	"korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"],
	"english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"],
	"chinese": chinese_scenes if chinese_scenes else [],
	"thai": thai_scenes if thai_scenes else [],
	"russian": russian_scenes if russian_scenes else []
	}

	except Exception as e:
	print(f"Gemini API error: {e}")
	return self.generate_scene_breakdown_fallback(video_info)

	# ---------------------------------------------------------
	# Fallback 음성/대사 추출 (5개 언어)
	# ---------------------------------------------------------
	def generate_scene_breakdown_fallback(self, video_info):
	duration = video_info.get("duration", 0)
	title = video_info.get("title", "").lower()
	description = video_info.get("description", "").lower()
	uploader = video_info.get("uploader", "콘텐츠 제작자")

	if not duration:
	return {
	"korean": ["[재생시간 알 수 없음]: 타임스탬프를 생성할 수 없습니다"],
	"english": ["[Duration Unknown]: Unable to generate timestamped breakdown"],
	"chinese": ["[持续时间未知]: 无法生成带时间戳的分解"],
	"thai": ["[ไม่ทราบระยะเวลา]: ไม่สามารถสร้างการแบ่งส่วนตามเวลาได้"],
	"russian": ["[Продолжительность неизвестна]: Невозможно создать временную разбивку"]
	}

	# 비디오 타입에 따른 대사 템플릿
	if duration <= 60:
	segment_length = 10
	elif duration <= 300:
	segment_length = 15
	elif duration <= 900:
	segment_length = 20
	else:
	segment_length = 30

	korean_scenes = []
	english_scenes = []
	chinese_scenes = []
	thai_scenes = []
	russian_scenes = []

	num_segments = min(duration // segment_length + 1, 20)

	for i in range(num_segments):
	start_time = i * segment_length
	end_time = min(start_time + segment_length - 1, duration)

	start_fmt = f"{start_time//60}:{start_time%60:02d}"
	end_fmt = f"{end_time//60}:{end_time%60:02d}"

	# 음성 추출이 불가능한 경우의 기본 템플릿
	if i == 0:
	korean_scenes.append(f"[{start_fmt}-{end_fmt}]: (음성 추출을 위해 Gemini API가 필요합니다. 인트로 부분...)")
	english_scenes.append(f"[{start_fmt}-{end_fmt}]: (Gemini API required for speech extraction. Intro section...)")
	chinese_scenes.append(f"[{start_fmt}-{end_fmt}]: (需要Gemini API进行语音提取。介绍部分...)")
	thai_scenes.append(f"[{start_fmt}-{end_fmt}]: (ต้องใช้ Gemini API สำหรับการดึงเสียง ส่วนเปิดตัว...)")
	russian_scenes.append(f"[{start_fmt}-{end_fmt}]: (Требуется Gemini API для извлечения речи. Вступительная часть...)")
	elif i == num_segments - 1:
	korean_scenes.append(f"[{start_fmt}-{end_fmt}]: (아웃트로 부분...)")
	english_scenes.append(f"[{start_fmt}-{end_fmt}]: (Outro section...)")
	chinese_scenes.append(f"[{start_fmt}-{end_fmt}]: (结尾部分...)")
	thai_scenes.append(f"[{start_fmt}-{end_fmt}]: (ส่วนจบ...)")
	russian_scenes.append(f"[{start_fmt}-{end_fmt}]: (Заключительная часть...)")
	else:
	korean_scenes.append(f"[{start_fmt}-{end_fmt}]: (본문 내용...)")
	english_scenes.append(f"[{start_fmt}-{end_fmt}]: (Main content...)")
	chinese_scenes.append(f"[{start_fmt}-{end_fmt}]: (主要内容...)")
	thai_scenes.append(f"[{start_fmt}-{end_fmt}]: (เนื้อหาหลัก...)")
	russian_scenes.append(f"[{start_fmt}-{end_fmt}]: (Основное содержание...)")

	return {
	"korean": korean_scenes,
	"english": english_scenes,
	"chinese": chinese_scenes,
	"thai": thai_scenes,
	"russian": russian_scenes
	}

	# ---------------------------------------------------------
	# 비디오 유형 감지(상세)
	# ---------------------------------------------------------
	def detect_video_type_detailed(self, title, description):
	text = (title + " " + description).lower()
	if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
	return "tutorial"
	if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
	return "review"
	if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
	return "vlog"
	if any(w in text for w in ["music", "song", "cover", "lyrics"]):
	return "music"
	if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
	return "entertainment"
	if any(w in text for w in ["news", "breaking", "update", "report"]):
	return "news"
	if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
	return "cooking"
	if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
	return "fitness"
	return "general"

	# ---------------------------------------------------------
	# 장면별 설명 생성 (한글)
	# ---------------------------------------------------------
	def generate_contextual_description_korean(
	self, scene_index, total_scenes, video_type, uploader, title
	):
	presenter_desc = "콘텐츠 제작자"
	if "woman" in title or "girl" in title or "여성" in title or "여자" in title:
	presenter_desc = "여성 진행자"
	elif "man" in title or "guy" in title or "남성" in title or "남자" in title:
	presenter_desc = "남성 진행자"

	if scene_index == 0:
	if video_type == "tutorial":
	return f"{presenter_desc}가 화면에 등장하여 자신을 소개하고 주제를 설명합니다. 밝은 조명의 작업 공간에서 캐주얼한 옷을 입고 있습니다."
	if video_type == "vlog":
	return f"{presenter_desc}가 카메라를 향해 밝게 인사하며 오늘의 브이로그 주제를 설명합니다."
	if video_type == "review":
	return f"{presenter_desc}가 리뷰할 제품을 들어 보이며 간단한 특징을 소개합니다."
	return f"{presenter_desc}가 시청자의 관심을 끌기 위한 매력적인 인트로로 영상을 시작합니다."

	if scene_index == total_scenes - 1:
	if video_type == "tutorial":
	return f"{presenter_desc}가 최종 결과물을 보여주고 시청자에게 감사 인사를 전하며 좋아요와 구독을 부탁합니다."
	if video_type == "vlog":
	return f"{presenter_desc}가 하루를 마무리하며 최종 생각을 공유하고 작별 인사를 합니다."
	return f"{presenter_desc}가 주요 내용을 요약하고 좋아요와 댓글을 통한 참여를 독려하며 마무리합니다."

	if video_type == "tutorial":
	return f"{presenter_desc}가 다음 단계를 시연하며 클로즈업 샷과 함께 명확한 지침을 제공합니다."
	if video_type == "review":
	return f"{presenter_desc}가 제품의 특정 기능을 살펴보며 사용 모습을 보여주고 성능에 대해 설명합니다."
	if video_type == "vlog":
	return f"{presenter_desc}가 일상 활동을 계속하며 솔직한 순간과 개인적인 생각을 공유합니다."
	if video_type == "cooking":
	return f"{presenter_desc}가 재료를 준비하며 자르고 섞으면서 각 단계를 설명합니다."
	if video_type == "fitness":
	return f"{presenter_desc}가 운동 세트를 수행하며 올바른 자세를 시연하고 팁을 제공합니다."
	return f"{presenter_desc}가 명확한 설명을 통해 시청자와 소통하며 주요 콘텐츠를 진행합니다."

	# ---------------------------------------------------------
	# 장면별 설명 생성 (영어 - 기존 메서드 유지)
	# ---------------------------------------------------------
	def generate_contextual_description(
	self, scene_index, total_scenes, video_type, uploader, title
	):
	presenter_desc = "The content creator"
	if "woman" in title or "girl" in title:
	presenter_desc = "A woman"
	elif "man" in title or "guy" in title:
	presenter_desc = "A man"

	if scene_index == 0:
	if video_type == "tutorial":
	return (
	f"{presenter_desc} appears on screen, introducing themselves and the "
	f"topic. They are in a well-lit workspace, wearing casual clothes."
	)
	if video_type == "vlog":
	return (
	f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
	f"explains what today's vlog is about."
	)
	if video_type == "review":
	return (
	f"{presenter_desc} holds up the product to be reviewed, giving a brief "
	f"overview of its features."
	)
	return (
	f"{presenter_desc} starts the video with an engaging introduction to "
	f"capture viewers' attention."
	)

	if scene_index == total_scenes - 1:
	if video_type == "tutorial":
	return (
	f"{presenter_desc} shows the final result, thanks viewers, and "
	f"encourages them to like and subscribe."
	)
	if video_type == "vlog":
	return (
	f"{presenter_desc} wraps up the day, sharing final thoughts and "
	f"bidding farewell."
	)
	return (
	f"{presenter_desc} concludes, summarizing key points and prompting "
	f"engagement through likes and comments."
	)

	if video_type == "tutorial":
	return (
	f"{presenter_desc} demonstrates the next step, providing clear "
	f"instructions with close-up shots."
	)
	if video_type == "review":
	return (
	f"{presenter_desc} examines a specific feature of the product, showing "
	f"it in use and commenting on performance."
	)
	if video_type == "vlog":
	return (
	f"{presenter_desc} continues the day's activities, sharing candid "
	f"moments and personal reflections."
	)
	if video_type == "cooking":
	return (
	f"{presenter_desc} prepares ingredients, chopping and mixing while "
	f"explaining each step."
	)
	if video_type == "fitness":
	return (
	f"{presenter_desc} performs an exercise set, demonstrating proper form "
	f"and offering tips."
	)
	return (
	f"{presenter_desc} proceeds with the main content, engaging viewers through "
	f"clear explanations."
	)

	# ---------------------------------------------------------
	# 비디오 유형 (간략)
	# ---------------------------------------------------------
	def detect_video_type(self, title, description):
	text = (title + " " + description).lower()
	if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
	return "🎵 Music Video"
	if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
	return "📚 Tutorial/Educational"
	if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
	return "🎭 Entertainment/Comedy"
	if any(w in text for w in ["news", "breaking", "report", "update"]):
	return "📰 News/Information"
	if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
	return "⭐ Review/Unboxing"
	if any(w in text for w in ["commercial", "ad", "brand", "product"]):
	return "📺 Commercial/Advertisement"
	return "🎬 General Content"

	# ---------------------------------------------------------
	# 배경 음악 추정
	# ---------------------------------------------------------
	def detect_background_music(self, video_info):
	title = video_info.get("title", "").lower()
	if "music" in title or "song" in title:
	return "🎵 Original Music/Soundtrack"
	if "commercial" in title or "ad" in title:
	return "🎶 Upbeat Commercial Music"
	if "tutorial" in title or "how to" in title:
	return "🔇 Minimal/No Background Music"
	if "vlog" in title or "daily" in title:
	return "🎼 Ambient Background Music"
	return "🎵 Background Music"

	# ---------------------------------------------------------
	# 인플루언서 규모 추정
	# ---------------------------------------------------------
	def detect_influencer_status(self, video_info):
	subs = video_info.get("channel_followers", 0)
	views = video_info.get("view_count", 0)
	if subs > 10_000_000:
	return "🌟 Mega Influencer (10M+)"
	if subs > 1_000_000:
	return "⭐ Major Influencer (1M+)"
	if subs > 100_000:
	return "🎯 Mid-tier Influencer (100K+)"
	if subs > 10_000:
	return "📈 Micro Influencer (10K+)"
	if views > 100_000:
	return "🔥 Viral Content Creator"
	return "👤 Regular Content Creator"

	# ---------------------------------------------------------
	# 숫자 포맷터
	# ---------------------------------------------------------
	def format_number(self, num):
	if not num:
	return "0"
	if num >= 1_000_000_000:
	return f"{num/1_000_000_000:.1f}B"
	if num >= 1_000_000:
	return f"{num/1_000_000:.1f}M"
	if num >= 1_000:
	return f"{num/1_000:.1f}K"
	return str(num)

	# ---------------------------------------------------------
	# 최종 리포트 생성 (5개 언어)
	# ---------------------------------------------------------
	def format_video_info(self, video_info):
	if not video_info:
	return "❌ No video information available."

	title = video_info.get("title", "Unknown")
	uploader = video_info.get("uploader", "Unknown")
	duration = video_info.get("duration", 0)
	dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
	views = video_info.get("view_count", 0)
	likes = video_info.get("like_count", 0)
	comments = video_info.get("comment_count", 0)
	upload_date = video_info.get("upload_date", "Unknown")

	if len(upload_date) == 8:
	upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"

	scene_data = self.generate_scene_breakdown_gemini(video_info)
	korean_scenes = scene_data.get("korean", [])
	english_scenes = scene_data.get("english", [])
	chinese_scenes = scene_data.get("chinese", [])
	thai_scenes = scene_data.get("thai", [])
	russian_scenes = scene_data.get("russian", [])

	vtype = self.detect_video_type(title, video_info.get("description", ""))
	bgm = self.detect_background_music(video_info)
	creator = self.detect_influencer_status(video_info)
	engagement = (likes / views) * 100 if views else 0

	report = f"""
	🎬 YOUTUBE VIDEO ANALYSIS REPORT
	{'='*50}

	📋 기본 정보 / BASIC INFORMATION
	{'─'*25}
	📹 제목/Title: {title}
	👤 업로더/Uploader: {uploader}
	📅 업로드 날짜/Upload Date: {upload_date}
	⏱️ 재생시간/Duration: {dur_str}
	🆔 비디오 ID/Video ID: {video_info.get('id', 'Unknown')}

	📊 성과 지표 / PERFORMANCE METRICS
	{'─'*25}
	👀 조회수/Views: {self.format_number(views)} ({views:,})
	👍 좋아요/Likes: {self.format_number(likes)} ({likes:,})
	💬 댓글/Comments: {self.format_number(comments)} ({comments:,})
	📈 참여율/Engagement Rate: {engagement:.2f}%

	🎯 콘텐츠 분석 / CONTENT ANALYSIS
	{'─'*25}
	📂 비디오 유형/Video Type: {vtype}
	🎵 배경음악/Background Music: {bgm}
	👑 제작자 상태/Creator Status: {creator}

	🎙️ 음성/대사 추출 (한국어) / SPEECH/DIALOGUE EXTRACTION (KOREAN)
	{'─'*30}
	{chr(10).join(korean_scenes)}

	🎙️ 음성/대사 추출 (영어) / SPEECH/DIALOGUE EXTRACTION (ENGLISH)
	{'─'*30}
	{chr(10).join(english_scenes)}

	🎙️ 音频/对话提取 (中文) / SPEECH/DIALOGUE EXTRACTION (CHINESE)
	{'─'*30}
	{chr(10).join(chinese_scenes) if chinese_scenes else "(중국어 번역 없음 / No Chinese translation available)"}

	🎙️ การดึงเสียง/บทสนทนา (ไทย) / SPEECH/DIALOGUE EXTRACTION (THAI)
	{'─'*30}
	{chr(10).join(thai_scenes) if thai_scenes else "(태국어 번역 없음 / No Thai translation available)"}

	🎙️ Извлечение речи/диалога (Русский) / SPEECH/DIALOGUE EXTRACTION (RUSSIAN)
	{'─'*30}
	{chr(10).join(russian_scenes) if russian_scenes else "(러시아어 번역 없음 / No Russian translation available)"}

	{'='*50}
	📊 분석 완료/Analysis completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	🤖 AI 강화/AI Enhancement: {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
	"""
	return report.strip()

	# ---------------------------------------------------------
	# 메타데이터 추출
	# ---------------------------------------------------------
	def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
	if not url or not url.strip():
	return None, "❌ Please enter a YouTube URL"
	if not self.is_valid_youtube_url(url):
	return None, "❌ Invalid YouTube URL format"

	# 쿠키 선택 순서: UI 업로드 → 기본 쿠키 → None
	if cookiefile and os.path.exists(cookiefile):
	cookiefile = cookiefile
	elif DEFAULT_COOKIE_FILE.exists():
	cookiefile = str(DEFAULT_COOKIE_FILE)
	else:
	cookiefile = None

	try:
	progress(0.1, desc="Initializing YouTube extractor…")
	ydl_opts = {"noplaylist": True, "extract_flat": False}
	if cookiefile:
	ydl_opts["cookiefile"] = cookiefile

	progress(0.5, desc="Extracting video metadata…")
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=False)

	progress(1.0, desc="✅ Analysis complete!")
	return info, "✅ Video information extracted successfully"

	except Exception as e:
	return None, f"❌ Error: {e}"

	# ---------------------------------------------------------
	# 다운로드
	# ---------------------------------------------------------
	def download_video(
	self,
	url,
	quality="best",
	audio_only=False,
	progress=gr.Progress(),
	cookiefile=None,
	):
	if not url or not url.strip():
	return None, "❌ Please enter a YouTube URL"
	if not self.is_valid_youtube_url(url):
	return None, "❌ Invalid YouTube URL format"

	# 쿠키 선택 순서
	if cookiefile and os.path.exists(cookiefile):
	cookiefile = cookiefile
	elif DEFAULT_COOKIE_FILE.exists():
	cookiefile = str(DEFAULT_COOKIE_FILE)
	else:
	cookiefile = None

	try:
	progress(0.1, desc="Preparing download…")
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	ydl_opts = {
	"outtmpl": os.path.join(
	self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
	),
	"noplaylist": True,
	}

	if audio_only:
	ydl_opts["format"] = "bestaudio/best"
	ydl_opts["postprocessors"] = [
	{
	"key": "FFmpegExtractAudio",
	"preferredcodec": "mp3",
	"preferredquality": "192",
	}
	]
	else:
	if quality == "720p":
	ydl_opts["format"] = "best[height<=720]"
	elif quality == "480p":
	ydl_opts["format"] = "best[height<=480]"
	else: # "best"
	ydl_opts["format"] = "best[height<=1080]"

	if cookiefile:
	ydl_opts["cookiefile"] = cookiefile

	# 진행률 훅
	def hook(d):
	if d["status"] == "downloading":
	if "total_bytes" in d:
	pct = d["downloaded_bytes"] / d["total_bytes"] * 100
	progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
	else:
	progress(0.5, desc="Downloading…")
	elif d["status"] == "finished":
	progress(0.8, desc="Processing download…")

	ydl_opts["progress_hooks"] = [hook]

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.extract_info(url, download=True)

	progress(0.9, desc="Copying to Downloads folder…")

	# temp 디렉터리에서 파일 찾기
	downloaded_temp = None
	for f in os.listdir(self.temp_downloads):
	if timestamp in f:
	downloaded_temp = os.path.join(self.temp_downloads, f)
	break

	if not downloaded_temp:
	return None, "❌ Downloaded file not found"

	final_name = os.path.basename(downloaded_temp)
	final_path = os.path.join(self.downloads_folder, final_name)

	try:
	shutil.copy2(downloaded_temp, final_path)
	saved = True
	except Exception as e:
	print(f"Copy warning: {e}")
	saved = False
	final_path = "File kept only in temp folder"

	progress(1.0, desc="✅ Download complete!")

	msg = (
	"✅ Download successful!\n"
	f"📁 Temp file: {os.path.basename(downloaded_temp)}\n"
	f"📁 Saved to: {final_path if saved else 'Copy failed'}\n"
	f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
	)
	return downloaded_temp, msg

	except Exception as e:
	return None, f"❌ Download failed: {e}"


	# =================================================================
	# Helper functions for Gradio
	# =================================================================
	downloader = YouTubeDownloader()


	def configure_api_key(api_key):
	if not api_key or not api_key.strip():
	return "❌ Please enter a valid Google API key", gr.update(visible=False)
	ok, msg = downloader.configure_gemini(api_key.strip())
	return msg, gr.update(visible=ok)


	def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
	try:
	progress(0.05, desc="Starting analysis…")
	cookiefile = cookies_file if cookies_file else None
	info, msg = downloader.get_video_info(
	url, progress=progress, cookiefile=cookiefile
	)
	if info:
	progress(0.95, desc="Generating report…")
	return downloader.format_video_info(info)
	return f"❌ Analysis Failed: {msg}"
	except Exception as e:
	return f"❌ System Error: {e}"


	def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
	try:
	progress(0.05, desc="Preparing download…")
	cookiefile = cookies_file if cookies_file else None
	file_path, status = downloader.download_video(
	url, quality, audio_only, progress=progress, cookiefile=cookiefile
	)
	return (file_path, status) if file_path else (None, status)
	except Exception as e:
	return None, f"❌ System Error: {e}"


	# =================================================================
	# Gradio UI
	# =================================================================
	def create_interface():
	with gr.Blocks(
	theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro"
	) as iface:
	gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>")

	# API 섹션
	with gr.Group():
	gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>")
	with gr.Row():
	api_key_in = gr.Textbox(
	label="🔑 Google API Key",
	placeholder="Paste your Google API key…",
	type="password",
	)
	api_btn = gr.Button("🔧 Configure API", variant="secondary")
	api_status = gr.Textbox(
	label="API Status",
	value="❌ Gemini API not configured – Using fallback analysis",
	interactive=False,
	lines=1,
	)

	# 메인 UI
	with gr.Row():
	url_in = gr.Textbox(
	label="🔗 YouTube URL",
	placeholder="Paste YouTube video URL…",
	)
	cookies_in = gr.File(
	label="🍪 Upload cookies.txt (optional)",
	file_types=[".txt"],
	type="filepath",
	)

	with gr.Tabs():
	with gr.TabItem("📊 Video Analysis"):
	analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
	analysis_out = gr.Textbox(
	label="📊 Analysis Report", lines=25, show_copy_button=True
	)
	analyze_btn.click(
	fn=analyze_with_cookies,
	inputs=[url_in, cookies_in],
	outputs=analysis_out,
	show_progress=True,
	)

	with gr.TabItem("⬇️ Video Download"):
	with gr.Row():
	quality_dd = gr.Dropdown(
	choices=["best", "720p", "480p"],
	value="best",
	label="📺 Quality",
	)
	audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)")
	download_btn = gr.Button("⬇️ Download Video", variant="primary")
	dl_status = gr.Textbox(
	label="📥 Download Status", lines=5, show_copy_button=True
	)
	dl_file = gr.File(label="📁 Downloaded File", visible=False)

	def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
	fp, st = download_with_cookies(url, q, a, cfile, progress)
	if fp and os.path.exists(fp):
	return st, gr.update(value=fp, visible=True)
	return st, gr.update(visible=False)

	download_btn.click(
	fn=wrapped_download,
	inputs=[url_in, quality_dd, audio_cb, cookies_in],
	outputs=[dl_status, dl_file],
	show_progress=True,
	)

	# API 버튼 동작
	api_btn.click(
	fn=configure_api_key,
	inputs=[api_key_in],
	outputs=[api_status],
	)

	gr.HTML(
	"""
	<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
	<h3>💡 Tip: 쿠키 파일 자동 사용</h3>
	<p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은
	폴더에 두면 자동으로 사용됩니다. 주기적으로 새 파일로 교체해 주세요.</p>
	</div>
	"""
	)
	return iface


	# =================================================================
	# Entrypoint
	# =================================================================
	if __name__ == "__main__":
	demo = create_interface()
	import atexit

	atexit.register(downloader.cleanup)
	demo.launch(debug=True, show_error=True)