YTB-TEST

Running

App Files Files Community

YTB-TEST / app.py

fantaxy

Update app.py

763c94d verified 2 months ago

raw

history blame

29.3 kB

	#!/usr/bin/env python3
	"""
	YouTube Video Analyzer & Downloader Pro
	(쿠키 자동 처리 버전)

	· `www.youtube.com_cookies.txt` 파일이 app.py와 같은 폴더에 있으면
	자동으로 사용합니다.
	· Gradio UI에서 쿠키 파일을 업로드하면, 업로드된 파일이 우선 적용됩니다.
	"""

	# ──────────────────────────────────────────────────────────────
	# 표준 라이브러리
	# ──────────────────────────────────────────────────────────────
	import os
	import re
	import json
	import uuid
	import shutil
	import tempfile
	from datetime import datetime
	from pathlib import Path

	# ──────────────────────────────────────────────────────────────
	# 외부 라이브러리
	# ──────────────────────────────────────────────────────────────
	import gradio as gr
	import yt_dlp
	import google.generativeai as genai

	# ──────────────────────────────────────────────────────────────
	# 기본 쿠키 파일 경로 ― 파일명이 동일하면 자동 사용
	# ──────────────────────────────────────────────────────────────
	DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")


	# =================================================================
	# Main Class
	# =================================================================
	class YouTubeDownloader:
	def __init__(self):
	# 임시 디렉터리 (Gradio 호환)
	self.download_dir = tempfile.mkdtemp()
	self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")

	# 사용자 Downloads 하위 폴더
	self.downloads_folder = os.path.join(
	os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
	)
	os.makedirs(self.downloads_folder, exist_ok=True)

	self.gemini_model = None

	# ---------------------------------------------------------
	# Google Gemini API
	# ---------------------------------------------------------
	def configure_gemini(self, api_key):
	try:
	genai.configure(api_key=api_key)
	self.gemini_model = genai.GenerativeModel(
	model_name="gemini-1.5-flash-latest"
	)
	return True, "✅ Gemini API configured successfully!"
	except Exception as e:
	return False, f"❌ Failed to configure Gemini API: {e}"

	# ---------------------------------------------------------
	# 임시 디렉터리 정리
	# ---------------------------------------------------------
	def cleanup(self):
	try:
	if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
	shutil.rmtree(self.download_dir)
	if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
	shutil.rmtree(self.temp_downloads)
	except Exception as e:
	print(f"⚠️ Warning: Could not clean up temporary directory: {e}")

	# ---------------------------------------------------------
	# 유튜브 URL 검증
	# ---------------------------------------------------------
	def is_valid_youtube_url(self, url):
	youtube_regex = re.compile(
	r"(https?://)?(www\.)?"
	r"(youtube\|youtu\|youtube-nocookie)\.(com\|be)/"
	r"(watch\?v=\|embed/\|v/\|.+\?v=)?([^&=%\?]{11})"
	)
	return youtube_regex.match(url) is not None

	# ---------------------------------------------------------
	# Gemini-AI 장면 분석
	# ---------------------------------------------------------
	def generate_scene_breakdown_gemini(self, video_info):
	if not self.gemini_model:
	return self.generate_scene_breakdown_fallback(video_info)

	try:
	duration = video_info.get("duration", 0)
	title = video_info.get("title", "")
	description = video_info.get("description", "")[:1500]

	if not duration:
	return [
	"[Duration Unknown]: Unable to generate timestamped breakdown - "
	"video duration not available"
	]

	prompt = f"""
	Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
	with precise timestamps and specific descriptions:

	Title: {title}
	Duration: {duration} seconds
	Description: {description}

	IMPORTANT INSTRUCTIONS:
	1. Create detailed scene descriptions that include:
	- Physical appearance of people (age, gender, clothing, hair, etc.)
	- Exact actions being performed
	- Dialogue or speech (include actual lines if audible, or infer probable spoken
	lines based on actions and setting; format them as "Character: line…")
	- Setting and environment details
	- Props, objects, or products being shown
	- Visual effects, text overlays, or graphics
	- Mood, tone, and atmosphere
	- Camera movements or angles (if apparent)
	2. Dialogue Emphasis:
	- Include short dialogue lines in every scene wherever plausible.
	- Write lines like: Character: "Actual or inferred line…"
	- If dialogue is not available, intelligently infer probable phrases
	(e.g., "Welcome!", "Try this now!", "It feels amazing!").
	3. Timestamp Guidelines:
	- For videos under 1 minute: 2-3 second segments
	- For videos 1-5 minutes: 3-5 second segments
	- For videos 5-15 minutes: 5-10 second segments
	- For videos over 15 minutes: 10-15 second segments
	- Maximum 20 scenes total for longer videos
	4. Format each scene EXACTLY like this:
	[MM:SS-MM:SS]: Detailed description…
	5. Write descriptions as if you're watching the video in real-time,
	noting everything visible and audible.
	"""
	response = self.gemini_model.generate_content(prompt)

	if response and response.text:
	scenes = []
	lines = response.text.split("\n")
	current_scene = ""

	for line in lines:
	line = line.strip()
	if line.startswith("[") and "]:" in line:
	if current_scene:
	scenes.append(current_scene.strip())
	current_scene = line
	elif current_scene:
	current_scene += "\n" + line

	if current_scene:
	scenes.append(current_scene.strip())

	return (
	scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
	)

	return self.generate_scene_breakdown_fallback(video_info)

	except Exception as e:
	print(f"Gemini API error: {e}")
	return self.generate_scene_breakdown_fallback(video_info)

	# ---------------------------------------------------------
	# Fallback 장면 분석
	# ---------------------------------------------------------
	def generate_scene_breakdown_fallback(self, video_info):
	duration = video_info.get("duration", 0)
	title = video_info.get("title", "").lower()
	description = video_info.get("description", "").lower()
	uploader = video_info.get("uploader", "Content creator")

	if not duration:
	return ["[Duration Unknown]: Unable to generate timestamped breakdown"]

	if duration <= 60:
	segment_length = 3
	elif duration <= 300:
	segment_length = 5
	elif duration <= 900:
	segment_length = 10
	else:
	segment_length = 15

	scenes = []
	num_segments = min(duration // segment_length + 1, 20)
	video_type = self.detect_video_type_detailed(title, description)

	for i in range(num_segments):
	start_time = i * segment_length
	end_time = min(start_time + segment_length - 1, duration)

	start_fmt = f"{start_time//60}:{start_time%60:02d}"
	end_fmt = f"{end_time//60}:{end_time%60:02d}"
	desc = self.generate_contextual_description(
	i, num_segments, video_type, uploader, title
	)
	scenes.append(f"[{start_fmt}-{end_fmt}]: {desc}")

	return scenes

	# ---------------------------------------------------------
	# 비디오 유형 감지(상세)
	# ---------------------------------------------------------
	def detect_video_type_detailed(self, title, description):
	text = (title + " " + description).lower()
	if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
	return "tutorial"
	if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
	return "review"
	if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
	return "vlog"
	if any(w in text for w in ["music", "song", "cover", "lyrics"]):
	return "music"
	if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
	return "entertainment"
	if any(w in text for w in ["news", "breaking", "update", "report"]):
	return "news"
	if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
	return "cooking"
	if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
	return "fitness"
	return "general"

	# ---------------------------------------------------------
	# 장면별 설명 생성
	# ---------------------------------------------------------
	def generate_contextual_description(
	self, scene_index, total_scenes, video_type, uploader, title
	):
	presenter_desc = "The content creator"
	if "woman" in title or "girl" in title:
	presenter_desc = "A woman"
	elif "man" in title or "guy" in title:
	presenter_desc = "A man"

	if scene_index == 0:
	if video_type == "tutorial":
	return (
	f"{presenter_desc} appears on screen, introducing themselves and the "
	f"topic. They are in a well-lit workspace, wearing casual clothes."
	)
	if video_type == "vlog":
	return (
	f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
	f"explains what today's vlog is about."
	)
	if video_type == "review":
	return (
	f"{presenter_desc} holds up the product to be reviewed, giving a brief "
	f"overview of its features."
	)
	return (
	f"{presenter_desc} starts the video with an engaging introduction to "
	f"capture viewers' attention."
	)

	if scene_index == total_scenes - 1:
	if video_type == "tutorial":
	return (
	f"{presenter_desc} shows the final result, thanks viewers, and "
	f"encourages them to like and subscribe."
	)
	if video_type == "vlog":
	return (
	f"{presenter_desc} wraps up the day, sharing final thoughts and "
	f"bidding farewell."
	)
	return (
	f"{presenter_desc} concludes, summarizing key points and prompting "
	f"engagement through likes and comments."
	)

	if video_type == "tutorial":
	return (
	f"{presenter_desc} demonstrates the next step, providing clear "
	f"instructions with close-up shots."
	)
	if video_type == "review":
	return (
	f"{presenter_desc} examines a specific feature of the product, showing "
	f"it in use and commenting on performance."
	)
	if video_type == "vlog":
	return (
	f"{presenter_desc} continues the day's activities, sharing candid "
	f"moments and personal reflections."
	)
	if video_type == "cooking":
	return (
	f"{presenter_desc} prepares ingredients, chopping and mixing while "
	f"explaining each step."
	)
	if video_type == "fitness":
	return (
	f"{presenter_desc} performs an exercise set, demonstrating proper form "
	f"and offering tips."
	)
	return (
	f"{presenter_desc} proceeds with the main content, engaging viewers through "
	f"clear explanations."
	)

	# ---------------------------------------------------------
	# 비디오 유형 (간략)
	# ---------------------------------------------------------
	def detect_video_type(self, title, description):
	text = (title + " " + description).lower()
	if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
	return "🎵 Music Video"
	if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
	return "📚 Tutorial/Educational"
	if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
	return "🎭 Entertainment/Comedy"
	if any(w in text for w in ["news", "breaking", "report", "update"]):
	return "📰 News/Information"
	if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
	return "⭐ Review/Unboxing"
	if any(w in text for w in ["commercial", "ad", "brand", "product"]):
	return "📺 Commercial/Advertisement"
	return "🎬 General Content"

	# ---------------------------------------------------------
	# 배경 음악 추정
	# ---------------------------------------------------------
	def detect_background_music(self, video_info):
	title = video_info.get("title", "").lower()
	if "music" in title or "song" in title:
	return "🎵 Original Music/Soundtrack"
	if "commercial" in title or "ad" in title:
	return "🎶 Upbeat Commercial Music"
	if "tutorial" in title or "how to" in title:
	return "🔇 Minimal/No Background Music"
	if "vlog" in title or "daily" in title:
	return "🎼 Ambient Background Music"
	return "🎵 Background Music"

	# ---------------------------------------------------------
	# 인플루언서 규모 추정
	# ---------------------------------------------------------
	def detect_influencer_status(self, video_info):
	subs = video_info.get("channel_followers", 0)
	views = video_info.get("view_count", 0)
	if subs > 10_000_000:
	return "🌟 Mega Influencer (10M+)"
	if subs > 1_000_000:
	return "⭐ Major Influencer (1M+)"
	if subs > 100_000:
	return "🎯 Mid-tier Influencer (100K+)"
	if subs > 10_000:
	return "📈 Micro Influencer (10K+)"
	if views > 100_000:
	return "🔥 Viral Content Creator"
	return "👤 Regular Content Creator"

	# ---------------------------------------------------------
	# 숫자 포맷터
	# ---------------------------------------------------------
	def format_number(self, num):
	if not num:
	return "0"
	if num >= 1_000_000_000:
	return f"{num/1_000_000_000:.1f}B"
	if num >= 1_000_000:
	return f"{num/1_000_000:.1f}M"
	if num >= 1_000:
	return f"{num/1_000:.1f}K"
	return str(num)

	# ---------------------------------------------------------
	# 최종 리포트 생성
	# ---------------------------------------------------------
	def format_video_info(self, video_info):
	if not video_info:
	return "❌ No video information available."

	title = video_info.get("title", "Unknown")
	uploader = video_info.get("uploader", "Unknown")
	duration = video_info.get("duration", 0)
	dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
	views = video_info.get("view_count", 0)
	likes = video_info.get("like_count", 0)
	comments = video_info.get("comment_count", 0)
	upload_date = video_info.get("upload_date", "Unknown")

	if len(upload_date) == 8:
	upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"

	scenes = self.generate_scene_breakdown_gemini(video_info)
	vtype = self.detect_video_type(title, video_info.get("description", ""))
	bgm = self.detect_background_music(video_info)
	creator = self.detect_influencer_status(video_info)
	engagement = (likes / views) * 100 if views else 0

	report = f"""
	🎬 YOUTUBE VIDEO ANALYSIS REPORT
	{'='*50}

	📋 BASIC INFORMATION
	{'─'*25}
	📹 Title: {title}
	👤 Uploader: {uploader}
	📅 Upload Date: {upload_date}
	⏱️ Duration: {dur_str}
	🆔 Video ID: {video_info.get('id', 'Unknown')}

	📊 PERFORMANCE METRICS
	{'─'*25}
	👀 Views: {self.format_number(views)} ({views:,})
	👍 Likes: {self.format_number(likes)} ({likes:,})
	💬 Comments: {self.format_number(comments)} ({comments:,})
	📈 Engagement Rate: {engagement:.2f}%

	🎯 CONTENT ANALYSIS
	{'─'*25}
	📂 Video Type: {vtype}
	🎵 Background Music: {bgm}
	👑 Creator Status: {creator}

	🎬 DETAILED SCENE BREAKDOWN
	{'─'*30}
	{chr(10).join(scenes)}

	📝 DESCRIPTION PREVIEW
	{'─'*25}
	{video_info.get('description', 'No description available')[:500]}
	{'...(truncated)' if len(video_info.get('description', '')) > 500 else ''}

	{'='*50}
	📊 Analysis completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	🤖 AI Enhancement: {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
	"""
	return report.strip()

	# ---------------------------------------------------------
	# 메타데이터 추출
	# ---------------------------------------------------------
	def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
	if not url or not url.strip():
	return None, "❌ Please enter a YouTube URL"
	if not self.is_valid_youtube_url(url):
	return None, "❌ Invalid YouTube URL format"

	# 쿠키 선택 순서: UI 업로드 → 기본 쿠키 → None
	if cookiefile and os.path.exists(cookiefile):
	cookiefile = cookiefile
	elif DEFAULT_COOKIE_FILE.exists():
	cookiefile = str(DEFAULT_COOKIE_FILE)
	else:
	cookiefile = None

	try:
	progress(0.1, desc="Initializing YouTube extractor…")
	ydl_opts = {"noplaylist": True, "extract_flat": False}
	if cookiefile:
	ydl_opts["cookiefile"] = cookiefile

	progress(0.5, desc="Extracting video metadata…")
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=False)

	progress(1.0, desc="✅ Analysis complete!")
	return info, "✅ Video information extracted successfully"

	except Exception as e:
	return None, f"❌ Error: {e}"

	# ---------------------------------------------------------
	# 다운로드
	# ---------------------------------------------------------
	def download_video(
	self,
	url,
	quality="best",
	audio_only=False,
	progress=gr.Progress(),
	cookiefile=None,
	):
	if not url or not url.strip():
	return None, "❌ Please enter a YouTube URL"
	if not self.is_valid_youtube_url(url):
	return None, "❌ Invalid YouTube URL format"

	# 쿠키 선택 순서
	if cookiefile and os.path.exists(cookiefile):
	cookiefile = cookiefile
	elif DEFAULT_COOKIE_FILE.exists():
	cookiefile = str(DEFAULT_COOKIE_FILE)
	else:
	cookiefile = None

	try:
	progress(0.1, desc="Preparing download…")
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	ydl_opts = {
	"outtmpl": os.path.join(
	self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
	),
	"noplaylist": True,
	}

	if audio_only:
	ydl_opts["format"] = "bestaudio/best"
	ydl_opts["postprocessors"] = [
	{
	"key": "FFmpegExtractAudio",
	"preferredcodec": "mp3",
	"preferredquality": "192",
	}
	]
	else:
	if quality == "720p":
	ydl_opts["format"] = "best[height<=720]"
	elif quality == "480p":
	ydl_opts["format"] = "best[height<=480]"
	else: # "best"
	ydl_opts["format"] = "best[height<=1080]"

	if cookiefile:
	ydl_opts["cookiefile"] = cookiefile

	# 진행률 훅
	def hook(d):
	if d["status"] == "downloading":
	if "total_bytes" in d:
	pct = d["downloaded_bytes"] / d["total_bytes"] * 100
	progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
	else:
	progress(0.5, desc="Downloading…")
	elif d["status"] == "finished":
	progress(0.8, desc="Processing download…")

	ydl_opts["progress_hooks"] = [hook]

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.extract_info(url, download=True)

	progress(0.9, desc="Copying to Downloads folder…")

	# temp 디렉터리에서 파일 찾기
	downloaded_temp = None
	for f in os.listdir(self.temp_downloads):
	if timestamp in f:
	downloaded_temp = os.path.join(self.temp_downloads, f)
	break

	if not downloaded_temp:
	return None, "❌ Downloaded file not found"

	final_name = os.path.basename(downloaded_temp)
	final_path = os.path.join(self.downloads_folder, final_name)

	try:
	shutil.copy2(downloaded_temp, final_path)
	saved = True
	except Exception as e:
	print(f"Copy warning: {e}")
	saved = False
	final_path = "File kept only in temp folder"

	progress(1.0, desc="✅ Download complete!")

	msg = (
	"✅ Download successful!\n"
	f"📁 Temp file: {os.path.basename(downloaded_temp)}\n"
	f"📁 Saved to: {final_path if saved else 'Copy failed'}\n"
	f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
	)
	return downloaded_temp, msg

	except Exception as e:
	return None, f"❌ Download failed: {e}"


	# =================================================================
	# Helper functions for Gradio
	# =================================================================
	downloader = YouTubeDownloader()


	def configure_api_key(api_key):
	if not api_key or not api_key.strip():
	return "❌ Please enter a valid Google API key", gr.update(visible=False)
	ok, msg = downloader.configure_gemini(api_key.strip())
	return msg, gr.update(visible=ok)


	def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
	try:
	progress(0.05, desc="Starting analysis…")
	cookiefile = cookies_file if cookies_file else None
	info, msg = downloader.get_video_info(
	url, progress=progress, cookiefile=cookiefile
	)
	if info:
	progress(0.95, desc="Generating report…")
	return downloader.format_video_info(info)
	return f"❌ Analysis Failed: {msg}"
	except Exception as e:
	return f"❌ System Error: {e}"


	def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
	try:
	progress(0.05, desc="Preparing download…")
	cookiefile = cookies_file if cookies_file else None
	file_path, status = downloader.download_video(
	url, quality, audio_only, progress=progress, cookiefile=cookiefile
	)
	return (file_path, status) if file_path else (None, status)
	except Exception as e:
	return None, f"❌ System Error: {e}"


	# =================================================================
	# Gradio UI
	# =================================================================
	def create_interface():
	with gr.Blocks(
	theme=gr.themes.Soft(), title="🎥 YouTube Video Analyzer & Downloader Pro"
	) as iface:
	gr.HTML("<h1>🎥 YouTube Video Analyzer & Downloader Pro</h1>")

	# API 섹션
	with gr.Group():
	gr.HTML("<h3>🔑 Google Gemini API Configuration</h3>")
	with gr.Row():
	api_key_in = gr.Textbox(
	label="🔑 Google API Key",
	placeholder="Paste your Google API key…",
	type="password",
	)
	api_btn = gr.Button("🔧 Configure API", variant="secondary")
	api_status = gr.Textbox(
	label="API Status",
	value="❌ Gemini API not configured – Using fallback analysis",
	interactive=False,
	lines=1,
	)

	# 메인 UI
	with gr.Row():
	url_in = gr.Textbox(
	label="🔗 YouTube URL",
	placeholder="Paste YouTube video URL…",
	)
	cookies_in = gr.File(
	label="🍪 Upload cookies.txt (optional)",
	file_types=[".txt"],
	type="filepath",
	)

	with gr.Tabs():
	with gr.TabItem("📊 Video Analysis"):
	analyze_btn = gr.Button("🔍 Analyze Video", variant="primary")
	analysis_out = gr.Textbox(
	label="📊 Analysis Report", lines=25, show_copy_button=True
	)
	analyze_btn.click(
	fn=analyze_with_cookies,
	inputs=[url_in, cookies_in],
	outputs=analysis_out,
	show_progress=True,
	)

	with gr.TabItem("⬇️ Video Download"):
	with gr.Row():
	quality_dd = gr.Dropdown(
	choices=["best", "720p", "480p"],
	value="best",
	label="📺 Quality",
	)
	audio_cb = gr.Checkbox(label="🎵 Audio only (MP3)")
	download_btn = gr.Button("⬇️ Download Video", variant="primary")
	dl_status = gr.Textbox(
	label="📥 Download Status", lines=5, show_copy_button=True
	)
	dl_file = gr.File(label="📁 Downloaded File", visible=False)

	def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
	fp, st = download_with_cookies(url, q, a, cfile, progress)
	if fp and os.path.exists(fp):
	return st, gr.update(value=fp, visible=True)
	return st, gr.update(visible=False)

	download_btn.click(
	fn=wrapped_download,
	inputs=[url_in, quality_dd, audio_cb, cookies_in],
	outputs=[dl_status, dl_file],
	show_progress=True,
	)

	# API 버튼 동작
	api_btn.click(
	fn=configure_api_key,
	inputs=[api_key_in],
	outputs=[api_status],
	)

	gr.HTML(
	"""
	<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
	<h3>💡 Tip: 쿠키 파일 자동 사용</h3>
	<p><code>www.youtube.com_cookies.txt</code> 파일을 <strong>app.py</strong>와 같은
	폴더에 두면 자동으로 사용됩니다. 주기적으로 새 파일로 교체해 주세요.</p>
	</div>
	"""
	)
	return iface


	# =================================================================
	# Entrypoint
	# =================================================================
	if __name__ == "__main__":
	demo = create_interface()
	import atexit

	atexit.register(downloader.cleanup)
	demo.launch(debug=True, show_error=True)