YTB-TEST / app.py
fantaxy's picture
Update app.py
763c94d verified
raw
history blame
29.3 kB
#!/usr/bin/env python3
"""
YouTube Video Analyzer & Downloader Pro
(μΏ ν‚€ μžλ™ 처리 버전)
Β· `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면
μžλ™μœΌλ‘œ μ‚¬μš©ν•©λ‹ˆλ‹€.
Β· Gradio UIμ—μ„œ μΏ ν‚€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜λ©΄, μ—…λ‘œλ“œλœ 파일이 **μš°μ„ ** μ μš©λ©λ‹ˆλ‹€.
"""
# ──────────────────────────────────────────────────────────────
# ν‘œμ€€ 라이브러리
# ──────────────────────────────────────────────────────────────
import os
import re
import json
import uuid
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
# ──────────────────────────────────────────────────────────────
# μ™ΈλΆ€ 라이브러리
# ──────────────────────────────────────────────────────────────
import gradio as gr
import yt_dlp
import google.generativeai as genai
# ──────────────────────────────────────────────────────────────
# κΈ°λ³Έ μΏ ν‚€ 파일 경둜 ― 파일λͺ…이 λ™μΌν•˜λ©΄ μžλ™ μ‚¬μš©
# ──────────────────────────────────────────────────────────────
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
# =================================================================
# Main Class
# =================================================================
class YouTubeDownloader:
def __init__(self):
# μž„μ‹œ 디렉터리 (Gradio ν˜Έν™˜)
self.download_dir = tempfile.mkdtemp()
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
# μ‚¬μš©μž Downloads ν•˜μœ„ 폴더
self.downloads_folder = os.path.join(
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
)
os.makedirs(self.downloads_folder, exist_ok=True)
self.gemini_model = None
# ---------------------------------------------------------
# Google Gemini API
# ---------------------------------------------------------
def configure_gemini(self, api_key):
try:
genai.configure(api_key=api_key)
self.gemini_model = genai.GenerativeModel(
model_name="gemini-1.5-flash-latest"
)
return True, "βœ… Gemini API configured successfully!"
except Exception as e:
return False, f"❌ Failed to configure Gemini API: {e}"
# ---------------------------------------------------------
# μž„μ‹œ 디렉터리 정리
# ---------------------------------------------------------
def cleanup(self):
try:
if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
shutil.rmtree(self.download_dir)
if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
shutil.rmtree(self.temp_downloads)
except Exception as e:
print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
# ---------------------------------------------------------
# 유튜브 URL 검증
# ---------------------------------------------------------
def is_valid_youtube_url(self, url):
youtube_regex = re.compile(
r"(https?://)?(www\.)?"
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
)
return youtube_regex.match(url) is not None
# ---------------------------------------------------------
# Gemini-AI μž₯λ©΄ 뢄석
# ---------------------------------------------------------
def generate_scene_breakdown_gemini(self, video_info):
if not self.gemini_model:
return self.generate_scene_breakdown_fallback(video_info)
try:
duration = video_info.get("duration", 0)
title = video_info.get("title", "")
description = video_info.get("description", "")[:1500]
if not duration:
return [
"**[Duration Unknown]**: Unable to generate timestamped breakdown - "
"video duration not available"
]
prompt = f"""
Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown
with precise timestamps and specific descriptions:
Title: {title}
Duration: {duration} seconds
Description: {description}
IMPORTANT INSTRUCTIONS:
1. Create detailed scene descriptions that include:
- Physical appearance of people (age, gender, clothing, hair, etc.)
- Exact actions being performed
- Dialogue or speech (include actual lines if audible, or infer probable spoken
lines based on actions and setting; format them as "Character: line…")
- Setting and environment details
- Props, objects, or products being shown
- Visual effects, text overlays, or graphics
- Mood, tone, and atmosphere
- Camera movements or angles (if apparent)
2. Dialogue Emphasis:
- Include short dialogue lines in **every scene** wherever plausible.
- Write lines like: Character: "Actual or inferred line…"
- If dialogue is not available, intelligently infer probable phrases
(e.g., "Welcome!", "Try this now!", "It feels amazing!").
3. Timestamp Guidelines:
- For videos under 1 minute: 2-3 second segments
- For videos 1-5 minutes: 3-5 second segments
- For videos 5-15 minutes: 5-10 second segments
- For videos over 15 minutes: 10-15 second segments
- Maximum 20 scenes total for longer videos
4. Format each scene EXACTLY like this:
**[MM:SS-MM:SS]**: Detailed description…
5. Write descriptions as if you're watching the video in real-time,
noting everything visible and audible.
"""
response = self.gemini_model.generate_content(prompt)
if response and response.text:
scenes = []
lines = response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
scenes.append(current_scene.strip())
return (
scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
)
return self.generate_scene_breakdown_fallback(video_info)
except Exception as e:
print(f"Gemini API error: {e}")
return self.generate_scene_breakdown_fallback(video_info)
# ---------------------------------------------------------
# Fallback μž₯λ©΄ 뢄석
# ---------------------------------------------------------
def generate_scene_breakdown_fallback(self, video_info):
duration = video_info.get("duration", 0)
title = video_info.get("title", "").lower()
description = video_info.get("description", "").lower()
uploader = video_info.get("uploader", "Content creator")
if not duration:
return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
if duration <= 60:
segment_length = 3
elif duration <= 300:
segment_length = 5
elif duration <= 900:
segment_length = 10
else:
segment_length = 15
scenes = []
num_segments = min(duration // segment_length + 1, 20)
video_type = self.detect_video_type_detailed(title, description)
for i in range(num_segments):
start_time = i * segment_length
end_time = min(start_time + segment_length - 1, duration)
start_fmt = f"{start_time//60}:{start_time%60:02d}"
end_fmt = f"{end_time//60}:{end_time%60:02d}"
desc = self.generate_contextual_description(
i, num_segments, video_type, uploader, title
)
scenes.append(f"**[{start_fmt}-{end_fmt}]**: {desc}")
return scenes
# ---------------------------------------------------------
# λΉ„λ””μ˜€ μœ ν˜• 감지(상세)
# ---------------------------------------------------------
def detect_video_type_detailed(self, title, description):
text = (title + " " + description).lower()
if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
return "tutorial"
if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
return "review"
if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
return "vlog"
if any(w in text for w in ["music", "song", "cover", "lyrics"]):
return "music"
if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
return "entertainment"
if any(w in text for w in ["news", "breaking", "update", "report"]):
return "news"
if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
return "cooking"
if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
return "fitness"
return "general"
# ---------------------------------------------------------
# μž₯면별 μ„€λͺ… 생성
# ---------------------------------------------------------
def generate_contextual_description(
self, scene_index, total_scenes, video_type, uploader, title
):
presenter_desc = "The content creator"
if "woman" in title or "girl" in title:
presenter_desc = "A woman"
elif "man" in title or "guy" in title:
presenter_desc = "A man"
if scene_index == 0:
if video_type == "tutorial":
return (
f"{presenter_desc} appears on screen, introducing themselves and the "
f"topic. They are in a well-lit workspace, wearing casual clothes."
)
if video_type == "vlog":
return (
f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
f"explains what today's vlog is about."
)
if video_type == "review":
return (
f"{presenter_desc} holds up the product to be reviewed, giving a brief "
f"overview of its features."
)
return (
f"{presenter_desc} starts the video with an engaging introduction to "
f"capture viewers' attention."
)
if scene_index == total_scenes - 1:
if video_type == "tutorial":
return (
f"{presenter_desc} shows the final result, thanks viewers, and "
f"encourages them to like and subscribe."
)
if video_type == "vlog":
return (
f"{presenter_desc} wraps up the day, sharing final thoughts and "
f"bidding farewell."
)
return (
f"{presenter_desc} concludes, summarizing key points and prompting "
f"engagement through likes and comments."
)
if video_type == "tutorial":
return (
f"{presenter_desc} demonstrates the next step, providing clear "
f"instructions with close-up shots."
)
if video_type == "review":
return (
f"{presenter_desc} examines a specific feature of the product, showing "
f"it in use and commenting on performance."
)
if video_type == "vlog":
return (
f"{presenter_desc} continues the day's activities, sharing candid "
f"moments and personal reflections."
)
if video_type == "cooking":
return (
f"{presenter_desc} prepares ingredients, chopping and mixing while "
f"explaining each step."
)
if video_type == "fitness":
return (
f"{presenter_desc} performs an exercise set, demonstrating proper form "
f"and offering tips."
)
return (
f"{presenter_desc} proceeds with the main content, engaging viewers through "
f"clear explanations."
)
# ---------------------------------------------------------
# λΉ„λ””μ˜€ μœ ν˜• (κ°„λž΅)
# ---------------------------------------------------------
def detect_video_type(self, title, description):
text = (title + " " + description).lower()
if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
return "🎡 Music Video"
if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
return "πŸ“š Tutorial/Educational"
if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
return "🎭 Entertainment/Comedy"
if any(w in text for w in ["news", "breaking", "report", "update"]):
return "πŸ“° News/Information"
if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
return "⭐ Review/Unboxing"
if any(w in text for w in ["commercial", "ad", "brand", "product"]):
return "πŸ“Ί Commercial/Advertisement"
return "🎬 General Content"
# ---------------------------------------------------------
# λ°°κ²½ μŒμ•… μΆ”μ •
# ---------------------------------------------------------
def detect_background_music(self, video_info):
title = video_info.get("title", "").lower()
if "music" in title or "song" in title:
return "🎡 Original Music/Soundtrack"
if "commercial" in title or "ad" in title:
return "🎢 Upbeat Commercial Music"
if "tutorial" in title or "how to" in title:
return "πŸ”‡ Minimal/No Background Music"
if "vlog" in title or "daily" in title:
return "🎼 Ambient Background Music"
return "🎡 Background Music"
# ---------------------------------------------------------
# μΈν”Œλ£¨μ–Έμ„œ 규λͺ¨ μΆ”μ •
# ---------------------------------------------------------
def detect_influencer_status(self, video_info):
subs = video_info.get("channel_followers", 0)
views = video_info.get("view_count", 0)
if subs > 10_000_000:
return "🌟 Mega Influencer (10M+)"
if subs > 1_000_000:
return "⭐ Major Influencer (1M+)"
if subs > 100_000:
return "🎯 Mid-tier Influencer (100K+)"
if subs > 10_000:
return "πŸ“ˆ Micro Influencer (10K+)"
if views > 100_000:
return "πŸ”₯ Viral Content Creator"
return "πŸ‘€ Regular Content Creator"
# ---------------------------------------------------------
# 숫자 포맷터
# ---------------------------------------------------------
def format_number(self, num):
if not num:
return "0"
if num >= 1_000_000_000:
return f"{num/1_000_000_000:.1f}B"
if num >= 1_000_000:
return f"{num/1_000_000:.1f}M"
if num >= 1_000:
return f"{num/1_000:.1f}K"
return str(num)
# ---------------------------------------------------------
# μ΅œμ’… 리포트 생성
# ---------------------------------------------------------
def format_video_info(self, video_info):
if not video_info:
return "❌ No video information available."
title = video_info.get("title", "Unknown")
uploader = video_info.get("uploader", "Unknown")
duration = video_info.get("duration", 0)
dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
views = video_info.get("view_count", 0)
likes = video_info.get("like_count", 0)
comments = video_info.get("comment_count", 0)
upload_date = video_info.get("upload_date", "Unknown")
if len(upload_date) == 8:
upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
scenes = self.generate_scene_breakdown_gemini(video_info)
vtype = self.detect_video_type(title, video_info.get("description", ""))
bgm = self.detect_background_music(video_info)
creator = self.detect_influencer_status(video_info)
engagement = (likes / views) * 100 if views else 0
report = f"""
🎬 YOUTUBE VIDEO ANALYSIS REPORT
{'='*50}
πŸ“‹ BASIC INFORMATION
{'─'*25}
πŸ“Ή **Title:** {title}
πŸ‘€ **Uploader:** {uploader}
πŸ“… **Upload Date:** {upload_date}
⏱️ **Duration:** {dur_str}
πŸ†” **Video ID:** {video_info.get('id', 'Unknown')}
πŸ“Š PERFORMANCE METRICS
{'─'*25}
πŸ‘€ **Views:** {self.format_number(views)} ({views:,})
πŸ‘ **Likes:** {self.format_number(likes)} ({likes:,})
πŸ’¬ **Comments:** {self.format_number(comments)} ({comments:,})
πŸ“ˆ **Engagement Rate:** {engagement:.2f}%
🎯 CONTENT ANALYSIS
{'─'*25}
πŸ“‚ **Video Type:** {vtype}
🎡 **Background Music:** {bgm}
πŸ‘‘ **Creator Status:** {creator}
🎬 DETAILED SCENE BREAKDOWN
{'─'*30}
{chr(10).join(scenes)}
πŸ“ DESCRIPTION PREVIEW
{'─'*25}
{video_info.get('description', 'No description available')[:500]}
{'...(truncated)' if len(video_info.get('description', '')) > 500 else ''}
{'='*50}
πŸ“Š **Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
πŸ€– **AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
"""
return report.strip()
# ---------------------------------------------------------
# 메타데이터 μΆ”μΆœ
# ---------------------------------------------------------
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
# μΏ ν‚€ 선택 μˆœμ„œ: UI μ—…λ‘œλ“œ β†’ κΈ°λ³Έ μΏ ν‚€ β†’ None
if cookiefile and os.path.exists(cookiefile):
cookiefile = cookiefile
elif DEFAULT_COOKIE_FILE.exists():
cookiefile = str(DEFAULT_COOKIE_FILE)
else:
cookiefile = None
try:
progress(0.1, desc="Initializing YouTube extractor…")
ydl_opts = {"noplaylist": True, "extract_flat": False}
if cookiefile:
ydl_opts["cookiefile"] = cookiefile
progress(0.5, desc="Extracting video metadata…")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
progress(1.0, desc="βœ… Analysis complete!")
return info, "βœ… Video information extracted successfully"
except Exception as e:
return None, f"❌ Error: {e}"
# ---------------------------------------------------------
# λ‹€μš΄λ‘œλ“œ
# ---------------------------------------------------------
def download_video(
self,
url,
quality="best",
audio_only=False,
progress=gr.Progress(),
cookiefile=None,
):
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
# μΏ ν‚€ 선택 μˆœμ„œ
if cookiefile and os.path.exists(cookiefile):
cookiefile = cookiefile
elif DEFAULT_COOKIE_FILE.exists():
cookiefile = str(DEFAULT_COOKIE_FILE)
else:
cookiefile = None
try:
progress(0.1, desc="Preparing download…")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
ydl_opts = {
"outtmpl": os.path.join(
self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
),
"noplaylist": True,
}
if audio_only:
ydl_opts["format"] = "bestaudio/best"
ydl_opts["postprocessors"] = [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
]
else:
if quality == "720p":
ydl_opts["format"] = "best[height<=720]"
elif quality == "480p":
ydl_opts["format"] = "best[height<=480]"
else: # "best"
ydl_opts["format"] = "best[height<=1080]"
if cookiefile:
ydl_opts["cookiefile"] = cookiefile
# μ§„ν–‰λ₯  ν›…
def hook(d):
if d["status"] == "downloading":
if "total_bytes" in d:
pct = d["downloaded_bytes"] / d["total_bytes"] * 100
progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
else:
progress(0.5, desc="Downloading…")
elif d["status"] == "finished":
progress(0.8, desc="Processing download…")
ydl_opts["progress_hooks"] = [hook]
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.extract_info(url, download=True)
progress(0.9, desc="Copying to Downloads folder…")
# temp λ””λ ‰ν„°λ¦¬μ—μ„œ 파일 μ°ΎκΈ°
downloaded_temp = None
for f in os.listdir(self.temp_downloads):
if timestamp in f:
downloaded_temp = os.path.join(self.temp_downloads, f)
break
if not downloaded_temp:
return None, "❌ Downloaded file not found"
final_name = os.path.basename(downloaded_temp)
final_path = os.path.join(self.downloads_folder, final_name)
try:
shutil.copy2(downloaded_temp, final_path)
saved = True
except Exception as e:
print(f"Copy warning: {e}")
saved = False
final_path = "File kept only in temp folder"
progress(1.0, desc="βœ… Download complete!")
msg = (
"βœ… Download successful!\n"
f"πŸ“ Temp file: {os.path.basename(downloaded_temp)}\n"
f"πŸ“ Saved to: {final_path if saved else 'Copy failed'}\n"
f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
)
return downloaded_temp, msg
except Exception as e:
return None, f"❌ Download failed: {e}"
# =================================================================
# Helper functions for Gradio
# =================================================================
downloader = YouTubeDownloader()
def configure_api_key(api_key):
if not api_key or not api_key.strip():
return "❌ Please enter a valid Google API key", gr.update(visible=False)
ok, msg = downloader.configure_gemini(api_key.strip())
return msg, gr.update(visible=ok)
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
try:
progress(0.05, desc="Starting analysis…")
cookiefile = cookies_file if cookies_file else None
info, msg = downloader.get_video_info(
url, progress=progress, cookiefile=cookiefile
)
if info:
progress(0.95, desc="Generating report…")
return downloader.format_video_info(info)
return f"❌ Analysis Failed: {msg}"
except Exception as e:
return f"❌ System Error: {e}"
def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
try:
progress(0.05, desc="Preparing download…")
cookiefile = cookies_file if cookies_file else None
file_path, status = downloader.download_video(
url, quality, audio_only, progress=progress, cookiefile=cookiefile
)
return (file_path, status) if file_path else (None, status)
except Exception as e:
return None, f"❌ System Error: {e}"
# =================================================================
# Gradio UI
# =================================================================
def create_interface():
with gr.Blocks(
theme=gr.themes.Soft(), title="πŸŽ₯ YouTube Video Analyzer & Downloader Pro"
) as iface:
gr.HTML("<h1>πŸŽ₯ YouTube Video Analyzer & Downloader Pro</h1>")
# API μ„Ήμ…˜
with gr.Group():
gr.HTML("<h3>πŸ”‘ Google Gemini API Configuration</h3>")
with gr.Row():
api_key_in = gr.Textbox(
label="πŸ”‘ Google API Key",
placeholder="Paste your Google API key…",
type="password",
)
api_btn = gr.Button("πŸ”§ Configure API", variant="secondary")
api_status = gr.Textbox(
label="API Status",
value="❌ Gemini API not configured – Using fallback analysis",
interactive=False,
lines=1,
)
# 메인 UI
with gr.Row():
url_in = gr.Textbox(
label="πŸ”— YouTube URL",
placeholder="Paste YouTube video URL…",
)
cookies_in = gr.File(
label="πŸͺ Upload cookies.txt (optional)",
file_types=[".txt"],
type="filepath",
)
with gr.Tabs():
with gr.TabItem("πŸ“Š Video Analysis"):
analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
analysis_out = gr.Textbox(
label="πŸ“Š Analysis Report", lines=25, show_copy_button=True
)
analyze_btn.click(
fn=analyze_with_cookies,
inputs=[url_in, cookies_in],
outputs=analysis_out,
show_progress=True,
)
with gr.TabItem("⬇️ Video Download"):
with gr.Row():
quality_dd = gr.Dropdown(
choices=["best", "720p", "480p"],
value="best",
label="πŸ“Ί Quality",
)
audio_cb = gr.Checkbox(label="🎡 Audio only (MP3)")
download_btn = gr.Button("⬇️ Download Video", variant="primary")
dl_status = gr.Textbox(
label="πŸ“₯ Download Status", lines=5, show_copy_button=True
)
dl_file = gr.File(label="πŸ“ Downloaded File", visible=False)
def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
fp, st = download_with_cookies(url, q, a, cfile, progress)
if fp and os.path.exists(fp):
return st, gr.update(value=fp, visible=True)
return st, gr.update(visible=False)
download_btn.click(
fn=wrapped_download,
inputs=[url_in, quality_dd, audio_cb, cookies_in],
outputs=[dl_status, dl_file],
show_progress=True,
)
# API λ²„νŠΌ λ™μž‘
api_btn.click(
fn=configure_api_key,
inputs=[api_key_in],
outputs=[api_status],
)
gr.HTML(
"""
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
<h3>πŸ’‘ Tip: μΏ ν‚€ 파일 μžλ™ μ‚¬μš©</h3>
<p><code>www.youtube.com_cookies.txt</code> νŒŒμΌμ„ <strong>app.py</strong>와 같은
폴더에 두면 μžλ™μœΌλ‘œ μ‚¬μš©λ©λ‹ˆλ‹€. 주기적으둜 μƒˆ 파일둜 ꡐ체해 μ£Όμ„Έμš”.</p>
</div>
"""
)
return iface
# =================================================================
# Entrypoint
# =================================================================
if __name__ == "__main__":
demo = create_interface()
import atexit
atexit.register(downloader.cleanup)
demo.launch(debug=True, show_error=True)