YTB-TEST / app.py
fantaxy's picture
Update app.py
b075ad9 verified
raw
history blame
35.6 kB
#!/usr/bin/env python3
"""
YouTube Video Analyzer & Downloader Pro
(μΏ ν‚€ μžλ™ 처리 버전)
Β· `www.youtube.com_cookies.txt` 파일이 **app.py**와 같은 폴더에 있으면
μžλ™μœΌλ‘œ μ‚¬μš©ν•©λ‹ˆλ‹€.
Β· Gradio UIμ—μ„œ μΏ ν‚€ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜λ©΄, μ—…λ‘œλ“œλœ 파일이 **μš°μ„ ** μ μš©λ©λ‹ˆλ‹€.
"""
# ──────────────────────────────────────────────────────────────
# ν‘œμ€€ 라이브러리
# ──────────────────────────────────────────────────────────────
import os
import re
import json
import uuid
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
# ──────────────────────────────────────────────────────────────
# μ™ΈλΆ€ 라이브러리
# ──────────────────────────────────────────────────────────────
import gradio as gr
import yt_dlp
import google.generativeai as genai
# ──────────────────────────────────────────────────────────────
# κΈ°λ³Έ μΏ ν‚€ 파일 경둜 ― 파일λͺ…이 λ™μΌν•˜λ©΄ μžλ™ μ‚¬μš©
# ──────────────────────────────────────────────────────────────
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt")
# =================================================================
# Main Class
# =================================================================
class YouTubeDownloader:
def __init__(self):
# μž„μ‹œ 디렉터리 (Gradio ν˜Έν™˜)
self.download_dir = tempfile.mkdtemp()
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
# μ‚¬μš©μž Downloads ν•˜μœ„ 폴더
self.downloads_folder = os.path.join(
os.path.expanduser("~"), "Downloads", "YouTube_Downloads"
)
os.makedirs(self.downloads_folder, exist_ok=True)
self.gemini_model = None
# ---------------------------------------------------------
# Google Gemini API
# ---------------------------------------------------------
def configure_gemini(self, api_key):
try:
genai.configure(api_key=api_key)
self.gemini_model = genai.GenerativeModel(
model_name="gemini-1.5-flash-latest"
)
return True, "βœ… Gemini API configured successfully!"
except Exception as e:
return False, f"❌ Failed to configure Gemini API: {e}"
# ---------------------------------------------------------
# μž„μ‹œ 디렉터리 정리
# ---------------------------------------------------------
def cleanup(self):
try:
if hasattr(self, "download_dir") and os.path.exists(self.download_dir):
shutil.rmtree(self.download_dir)
if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads):
shutil.rmtree(self.temp_downloads)
except Exception as e:
print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
# ---------------------------------------------------------
# 유튜브 URL 검증
# ---------------------------------------------------------
def is_valid_youtube_url(self, url):
youtube_regex = re.compile(
r"(https?://)?(www\.)?"
r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
)
return youtube_regex.match(url) is not None
# ---------------------------------------------------------
# Gemini-AI μŒμ„±/λŒ€μ‚¬ μΆ”μΆœ (ν•œκΈ€ μš°μ„ )
# ---------------------------------------------------------
def generate_scene_breakdown_gemini(self, video_info):
if not self.gemini_model:
return self.generate_scene_breakdown_fallback(video_info)
try:
duration = video_info.get("duration", 0)
title = video_info.get("title", "")
description = video_info.get("description", "")[:1500]
if not duration:
return {
"korean": ["**[μž¬μƒμ‹œκ°„ μ•Œ 수 μ—†μŒ]**: λΉ„λ””μ˜€ μž¬μƒμ‹œκ°„μ„ 확인할 수 μ—†μ–΄ νƒ€μž„μŠ€νƒ¬ν”„λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€"],
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
}
prompt = f"""
이 YouTube λΉ„λ””μ˜€μ˜ μŒμ„±/λŒ€μ‚¬λ₯Ό νƒ€μž„μŠ€νƒ¬ν”„λ³„λ‘œ μΆ”μΆœν•΄μ£Όμ„Έμš”.
제λͺ©: {title}
μž¬μƒμ‹œκ°„: {duration}초
μ„€λͺ…: {description}
맀우 μ€‘μš”ν•œ μ§€μΉ¨:
1. μ‹€μ œ μ˜μƒμ—μ„œ λ“€λ¦¬λŠ” λŒ€μ‚¬, λ‚΄λ ˆμ΄μ…˜, μŒμ„±μ„ κ·ΈλŒ€λ‘œ μ μ–΄μ£Όμ„Έμš”
2. μž₯λ©΄ μ„€λͺ…이 μ•„λ‹Œ μ‹€μ œ μŒμ„± λ‚΄μš©λ§Œ μž‘μ„±ν•˜μ„Έμš”
3. μŒμ„±μ΄ μ—†λŠ” 뢀뢄은 (...) λ˜λŠ” (λ°°κ²½μŒμ•…) λ“±μœΌλ‘œ ν‘œμ‹œ
4. νƒ€μž„μŠ€νƒ¬ν”„ κ°€μ΄λ“œλΌμΈ:
- λŒ€μ‚¬λ‚˜ λ‚΄λ ˆμ΄μ…˜μ΄ μ‹œμž‘ν•˜κ³  λλ‚˜λŠ” 지점 κΈ°μ€€
- μ—°μ†λœ λŒ€μ‚¬λŠ” ν•˜λ‚˜λ‘œ λ¬Άμ–΄μ„œ ν‘œμ‹œ
- μ΅œλŒ€ν•œ μžμ—°μŠ€λŸ¬μš΄ λ‹¨μœ„λ‘œ ꡬ뢄
5. ν˜•μ‹:
**[MM:SS-MM:SS]**: "μ‹€μ œ λŒ€μ‚¬λ‚˜ λ‚΄λ ˆμ΄μ…˜ λ‚΄μš©"
**[MM:SS-MM:SS]**: (λ°°κ²½μŒμ•…) λ˜λŠ” (...μž₯λ©΄ μ „ν™˜...)
6. λͺ¨λ“  μŒμ„± λ‚΄μš©μ„ 빠짐없이 μ μ–΄μ£Όμ„Έμš”
7. μžλ§‰μ΄λ‚˜ 화면에 ν‘œμ‹œλœ ν…μŠ€νŠΈλ„ ν¬ν•¨ν•˜μ„Έμš”
8. μž₯λ©΄ μ„€λͺ…은 μ ˆλŒ€ ν•˜μ§€ λ§ˆμ„Έμš”. 였직 μŒμ„±κ³Ό ν…μŠ€νŠΈλ§Œ μΆ”μΆœν•˜μ„Έμš”.
μ˜ˆμ‹œ:
**[00:00-00:05]**: "μ•ˆλ…•ν•˜μ„Έμš”. μ˜€λŠ˜μ€ λ―Έλ₯΅μ‚°μ—μ„œ 발견된 백제 μœ μ μ— λŒ€ν•΄ μ•Œμ•„λ³΄κ² μŠ΅λ‹ˆλ‹€."
**[00:05-00:08]**: (λ°°κ²½μŒμ•…)
**[00:08-00:15]**: "λ―Έλ₯΅μ‚¬μ§€λ₯Ό ν’ˆκ³  μžˆλŠ” 읡산 λ―Έλ₯΅μ‚°μ˜ μ •μƒλΆ€μ—μ„œ λ°±μ œμ‹œλŒ€μ— λ§Œλ“  κ²ƒμœΌλ‘œ μΆ”μ •λ˜λŠ” μ €μˆ˜μ‘°κ°€ λ°œκ΅΄λμŠ΅λ‹ˆλ‹€."
**[00:15-00:18]**: (인터뷰 μ€€λΉ„ 쀑...)
**[00:18-00:25]**: [이도학 ꡐ수] "이번 λ°œκ΅΄μ€ 백제 역사 연ꡬ에 μ€‘μš”ν•œ μ „ν™˜μ μ΄ 될 κ²ƒμž…λ‹ˆλ‹€."
λ‰΄μŠ€μ˜ 경우:
- μ•΅μ»€λ‚˜ 기자의 λ©˜νŠΈλŠ” κ·ΈλŒ€λ‘œ 적기
- μΈν„°λ·°λŠ” [인터뷰이 이름] "λ‚΄μš©" ν˜•μ‹μœΌλ‘œ
- μžλ§‰μ€ [μžλ§‰] λ‚΄μš© ν˜•μ‹μœΌλ‘œ
"""
response = self.gemini_model.generate_content(prompt)
korean_scenes = []
if response and response.text:
lines = response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
korean_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
korean_scenes.append(current_scene.strip())
# μ˜μ–΄ λ²ˆμ—­ 생성
english_prompt = f"""
Translate the following Korean speech/dialogue transcription to English, maintaining the exact same timestamps.
Translate ONLY the actual speech content, not descriptions:
{chr(10).join(korean_scenes)}
Important rules:
- Keep the format exactly the same: **[MM:SS-MM:SS]**: "English translation of speech"
- For non-speech parts like (λ°°κ²½μŒμ•…), translate as (background music)
- For (...) keep as is
- For interview tags like [이도학 ꡐ수], translate as [Professor Lee Do-hak]
- For [μžλ§‰], translate as [Subtitle]
- Keep quotation marks for actual speech
- Do NOT add any scene descriptions or explanations
"""
english_response = self.gemini_model.generate_content(english_prompt)
english_scenes = []
if english_response and english_response.text:
lines = english_response.text.split("\n")
current_scene = ""
for line in lines:
line = line.strip()
if line.startswith("**[") and "]**:" in line:
if current_scene:
english_scenes.append(current_scene.strip())
current_scene = line
elif current_scene:
current_scene += "\n" + line
if current_scene:
english_scenes.append(current_scene.strip())
return {
"korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"],
"english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"]
}
except Exception as e:
print(f"Gemini API error: {e}")
return self.generate_scene_breakdown_fallback(video_info)
# ---------------------------------------------------------
# Fallback μŒμ„±/λŒ€μ‚¬ μΆ”μΆœ (ν•œκΈ€/μ˜μ–΄)
# ---------------------------------------------------------
def generate_scene_breakdown_fallback(self, video_info):
duration = video_info.get("duration", 0)
title = video_info.get("title", "").lower()
description = video_info.get("description", "").lower()
uploader = video_info.get("uploader", "μ½˜ν…μΈ  μ œμž‘μž")
if not duration:
return {
"korean": ["**[μž¬μƒμ‹œκ°„ μ•Œ 수 μ—†μŒ]**: νƒ€μž„μŠ€νƒ¬ν”„λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€"],
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
}
# λΉ„λ””μ˜€ νƒ€μž…μ— λ”°λ₯Έ λŒ€μ‚¬ ν…œν”Œλ¦Ώ
if duration <= 60:
segment_length = 10
elif duration <= 300:
segment_length = 15
elif duration <= 900:
segment_length = 20
else:
segment_length = 30
korean_scenes = []
english_scenes = []
num_segments = min(duration // segment_length + 1, 20)
video_type = self.detect_video_type_detailed(title, description)
for i in range(num_segments):
start_time = i * segment_length
end_time = min(start_time + segment_length - 1, duration)
start_fmt = f"{start_time//60}:{start_time%60:02d}"
end_fmt = f"{end_time//60}:{end_time%60:02d}"
# μŒμ„± μΆ”μΆœμ΄ λΆˆκ°€λŠ₯ν•œ 경우의 κΈ°λ³Έ ν…œν”Œλ¦Ώ
if i == 0:
korean_desc = f"(μŒμ„± μΆ”μΆœμ„ μœ„ν•΄ Gemini APIκ°€ ν•„μš”ν•©λ‹ˆλ‹€. 인트둜 λΆ€λΆ„...)"
english_desc = f"(Gemini API required for speech extraction. Intro section...)"
elif i == num_segments - 1:
korean_desc = f"(μ•„μ›ƒνŠΈλ‘œ λΆ€λΆ„...)"
english_desc = f"(Outro section...)"
else:
korean_desc = f"(λ³Έλ¬Έ λ‚΄μš©...)"
english_desc = f"(Main content...)"
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: {korean_desc}")
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: {english_desc}")
return {"korean": korean_scenes, "english": english_scenes}
# ---------------------------------------------------------
# λΉ„λ””μ˜€ μœ ν˜• 감지(상세)
# ---------------------------------------------------------
def detect_video_type_detailed(self, title, description):
text = (title + " " + description).lower()
if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]):
return "tutorial"
if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]):
return "review"
if any(w in text for w in ["vlog", "daily", "routine", "day in"]):
return "vlog"
if any(w in text for w in ["music", "song", "cover", "lyrics"]):
return "music"
if any(w in text for w in ["comedy", "funny", "prank", "challenge"]):
return "entertainment"
if any(w in text for w in ["news", "breaking", "update", "report"]):
return "news"
if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]):
return "cooking"
if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]):
return "fitness"
return "general"
# ---------------------------------------------------------
# μž₯면별 μ„€λͺ… 생성 (ν•œκΈ€)
# ---------------------------------------------------------
def generate_contextual_description_korean(
self, scene_index, total_scenes, video_type, uploader, title
):
presenter_desc = "μ½˜ν…μΈ  μ œμž‘μž"
if "woman" in title or "girl" in title or "μ—¬μ„±" in title or "μ—¬μž" in title:
presenter_desc = "μ—¬μ„± μ§„ν–‰μž"
elif "man" in title or "guy" in title or "남성" in title or "λ‚¨μž" in title:
presenter_desc = "남성 μ§„ν–‰μž"
if scene_index == 0:
if video_type == "tutorial":
return f"{presenter_desc}κ°€ 화면에 λ“±μž₯ν•˜μ—¬ μžμ‹ μ„ μ†Œκ°œν•˜κ³  주제λ₯Ό μ„€λͺ…ν•©λ‹ˆλ‹€. 밝은 μ‘°λͺ…μ˜ μž‘μ—… κ³΅κ°„μ—μ„œ μΊμ£Όμ–Όν•œ μ˜·μ„ μž…κ³  μžˆμŠ΅λ‹ˆλ‹€."
if video_type == "vlog":
return f"{presenter_desc}κ°€ 카메라λ₯Ό ν–₯ν•΄ 밝게 μΈμ‚¬ν•˜λ©° 였늘의 브이둜그 주제λ₯Ό μ„€λͺ…ν•©λ‹ˆλ‹€."
if video_type == "review":
return f"{presenter_desc}κ°€ 리뷰할 μ œν’ˆμ„ λ“€μ–΄ 보이며 κ°„λ‹¨ν•œ νŠΉμ§•μ„ μ†Œκ°œν•©λ‹ˆλ‹€."
return f"{presenter_desc}κ°€ μ‹œμ²­μžμ˜ 관심을 끌기 μœ„ν•œ λ§€λ ₯적인 인트둜둜 μ˜μƒμ„ μ‹œμž‘ν•©λ‹ˆλ‹€."
if scene_index == total_scenes - 1:
if video_type == "tutorial":
return f"{presenter_desc}κ°€ μ΅œμ’… 결과물을 보여주고 μ‹œμ²­μžμ—κ²Œ 감사 인사λ₯Ό μ „ν•˜λ©° μ’‹μ•„μš”μ™€ ꡬ독을 λΆ€νƒν•©λ‹ˆλ‹€."
if video_type == "vlog":
return f"{presenter_desc}κ°€ ν•˜λ£¨λ₯Ό λ§ˆλ¬΄λ¦¬ν•˜λ©° μ΅œμ’… 생각을 κ³΅μœ ν•˜κ³  μž‘λ³„ 인사λ₯Ό ν•©λ‹ˆλ‹€."
return f"{presenter_desc}κ°€ μ£Όμš” λ‚΄μš©μ„ μš”μ•½ν•˜κ³  μ’‹μ•„μš”μ™€ λŒ“κΈ€μ„ ν†΅ν•œ μ°Έμ—¬λ₯Ό λ…λ €ν•˜λ©° λ§ˆλ¬΄λ¦¬ν•©λ‹ˆλ‹€."
if video_type == "tutorial":
return f"{presenter_desc}κ°€ λ‹€μŒ 단계λ₯Ό μ‹œμ—°ν•˜λ©° ν΄λ‘œμ¦ˆμ—… μƒ·κ³Ό ν•¨κ»˜ λͺ…ν™•ν•œ 지침을 μ œκ³΅ν•©λ‹ˆλ‹€."
if video_type == "review":
return f"{presenter_desc}κ°€ μ œν’ˆμ˜ νŠΉμ • κΈ°λŠ₯을 μ‚΄νŽ΄λ³΄λ©° μ‚¬μš© λͺ¨μŠ΅μ„ 보여주고 μ„±λŠ₯에 λŒ€ν•΄ μ„€λͺ…ν•©λ‹ˆλ‹€."
if video_type == "vlog":
return f"{presenter_desc}κ°€ 일상 ν™œλ™μ„ κ³„μ†ν•˜λ©° μ†”μ§ν•œ μˆœκ°„κ³Ό 개인적인 생각을 κ³΅μœ ν•©λ‹ˆλ‹€."
if video_type == "cooking":
return f"{presenter_desc}κ°€ 재료λ₯Ό μ€€λΉ„ν•˜λ©° 자λ₯΄κ³  μ„žμœΌλ©΄μ„œ 각 단계λ₯Ό μ„€λͺ…ν•©λ‹ˆλ‹€."
if video_type == "fitness":
return f"{presenter_desc}κ°€ μš΄λ™ μ„ΈνŠΈλ₯Ό μˆ˜ν–‰ν•˜λ©° μ˜¬λ°”λ₯Έ μžμ„Έλ₯Ό μ‹œμ—°ν•˜κ³  νŒμ„ μ œκ³΅ν•©λ‹ˆλ‹€."
return f"{presenter_desc}κ°€ λͺ…ν™•ν•œ μ„€λͺ…을 톡해 μ‹œμ²­μžμ™€ μ†Œν†΅ν•˜λ©° μ£Όμš” μ½˜ν…μΈ λ₯Ό μ§„ν–‰ν•©λ‹ˆλ‹€."
# ---------------------------------------------------------
# μž₯면별 μ„€λͺ… 생성 (μ˜μ–΄ - κΈ°μ‘΄ λ©”μ„œλ“œ μœ μ§€)
# ---------------------------------------------------------
def generate_contextual_description(
self, scene_index, total_scenes, video_type, uploader, title
):
presenter_desc = "The content creator"
if "woman" in title or "girl" in title:
presenter_desc = "A woman"
elif "man" in title or "guy" in title:
presenter_desc = "A man"
if scene_index == 0:
if video_type == "tutorial":
return (
f"{presenter_desc} appears on screen, introducing themselves and the "
f"topic. They are in a well-lit workspace, wearing casual clothes."
)
if video_type == "vlog":
return (
f"{presenter_desc} greets the camera cheerfully, perhaps waving, and "
f"explains what today's vlog is about."
)
if video_type == "review":
return (
f"{presenter_desc} holds up the product to be reviewed, giving a brief "
f"overview of its features."
)
return (
f"{presenter_desc} starts the video with an engaging introduction to "
f"capture viewers' attention."
)
if scene_index == total_scenes - 1:
if video_type == "tutorial":
return (
f"{presenter_desc} shows the final result, thanks viewers, and "
f"encourages them to like and subscribe."
)
if video_type == "vlog":
return (
f"{presenter_desc} wraps up the day, sharing final thoughts and "
f"bidding farewell."
)
return (
f"{presenter_desc} concludes, summarizing key points and prompting "
f"engagement through likes and comments."
)
if video_type == "tutorial":
return (
f"{presenter_desc} demonstrates the next step, providing clear "
f"instructions with close-up shots."
)
if video_type == "review":
return (
f"{presenter_desc} examines a specific feature of the product, showing "
f"it in use and commenting on performance."
)
if video_type == "vlog":
return (
f"{presenter_desc} continues the day's activities, sharing candid "
f"moments and personal reflections."
)
if video_type == "cooking":
return (
f"{presenter_desc} prepares ingredients, chopping and mixing while "
f"explaining each step."
)
if video_type == "fitness":
return (
f"{presenter_desc} performs an exercise set, demonstrating proper form "
f"and offering tips."
)
return (
f"{presenter_desc} proceeds with the main content, engaging viewers through "
f"clear explanations."
)
# ---------------------------------------------------------
# λΉ„λ””μ˜€ μœ ν˜• (κ°„λž΅)
# ---------------------------------------------------------
def detect_video_type(self, title, description):
text = (title + " " + description).lower()
if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]):
return "🎡 Music Video"
if any(w in text for w in ["tutorial", "how to", "guide", "learn"]):
return "πŸ“š Tutorial/Educational"
if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]):
return "🎭 Entertainment/Comedy"
if any(w in text for w in ["news", "breaking", "report", "update"]):
return "πŸ“° News/Information"
if any(w in text for w in ["review", "unboxing", "test", "comparison"]):
return "⭐ Review/Unboxing"
if any(w in text for w in ["commercial", "ad", "brand", "product"]):
return "πŸ“Ί Commercial/Advertisement"
return "🎬 General Content"
# ---------------------------------------------------------
# λ°°κ²½ μŒμ•… μΆ”μ •
# ---------------------------------------------------------
def detect_background_music(self, video_info):
title = video_info.get("title", "").lower()
if "music" in title or "song" in title:
return "🎡 Original Music/Soundtrack"
if "commercial" in title or "ad" in title:
return "🎢 Upbeat Commercial Music"
if "tutorial" in title or "how to" in title:
return "πŸ”‡ Minimal/No Background Music"
if "vlog" in title or "daily" in title:
return "🎼 Ambient Background Music"
return "🎡 Background Music"
# ---------------------------------------------------------
# μΈν”Œλ£¨μ–Έμ„œ 규λͺ¨ μΆ”μ •
# ---------------------------------------------------------
def detect_influencer_status(self, video_info):
subs = video_info.get("channel_followers", 0)
views = video_info.get("view_count", 0)
if subs > 10_000_000:
return "🌟 Mega Influencer (10M+)"
if subs > 1_000_000:
return "⭐ Major Influencer (1M+)"
if subs > 100_000:
return "🎯 Mid-tier Influencer (100K+)"
if subs > 10_000:
return "πŸ“ˆ Micro Influencer (10K+)"
if views > 100_000:
return "πŸ”₯ Viral Content Creator"
return "πŸ‘€ Regular Content Creator"
# ---------------------------------------------------------
# 숫자 포맷터
# ---------------------------------------------------------
def format_number(self, num):
if not num:
return "0"
if num >= 1_000_000_000:
return f"{num/1_000_000_000:.1f}B"
if num >= 1_000_000:
return f"{num/1_000_000:.1f}M"
if num >= 1_000:
return f"{num/1_000:.1f}K"
return str(num)
# ---------------------------------------------------------
# μ΅œμ’… 리포트 생성 (ν•œκΈ€/μ˜μ–΄ 뢄리)
# ---------------------------------------------------------
def format_video_info(self, video_info):
if not video_info:
return "❌ No video information available."
title = video_info.get("title", "Unknown")
uploader = video_info.get("uploader", "Unknown")
duration = video_info.get("duration", 0)
dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
views = video_info.get("view_count", 0)
likes = video_info.get("like_count", 0)
comments = video_info.get("comment_count", 0)
upload_date = video_info.get("upload_date", "Unknown")
if len(upload_date) == 8:
upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
scene_data = self.generate_scene_breakdown_gemini(video_info)
korean_scenes = scene_data.get("korean", [])
english_scenes = scene_data.get("english", [])
vtype = self.detect_video_type(title, video_info.get("description", ""))
bgm = self.detect_background_music(video_info)
creator = self.detect_influencer_status(video_info)
engagement = (likes / views) * 100 if views else 0
report = f"""
🎬 YOUTUBE VIDEO ANALYSIS REPORT
{'='*50}
πŸ“‹ κΈ°λ³Έ 정보 / BASIC INFORMATION
{'─'*25}
πŸ“Ή **제λͺ©/Title:** {title}
πŸ‘€ **μ—…λ‘œλ”/Uploader:** {uploader}
πŸ“… **μ—…λ‘œλ“œ λ‚ μ§œ/Upload Date:** {upload_date}
⏱️ **μž¬μƒμ‹œκ°„/Duration:** {dur_str}
πŸ†” **λΉ„λ””μ˜€ ID/Video ID:** {video_info.get('id', 'Unknown')}
πŸ“Š μ„±κ³Ό μ§€ν‘œ / PERFORMANCE METRICS
{'─'*25}
πŸ‘€ **쑰회수/Views:** {self.format_number(views)} ({views:,})
πŸ‘ **μ’‹μ•„μš”/Likes:** {self.format_number(likes)} ({likes:,})
πŸ’¬ **λŒ“κΈ€/Comments:** {self.format_number(comments)} ({comments:,})
πŸ“ˆ **μ°Έμ—¬μœ¨/Engagement Rate:** {engagement:.2f}%
🎯 μ½˜ν…μΈ  뢄석 / CONTENT ANALYSIS
{'─'*25}
πŸ“‚ **λΉ„λ””μ˜€ μœ ν˜•/Video Type:** {vtype}
🎡 **λ°°κ²½μŒμ•…/Background Music:** {bgm}
πŸ‘‘ **μ œμž‘μž μƒνƒœ/Creator Status:** {creator}
πŸŽ™οΈ μŒμ„±/λŒ€μ‚¬ μΆ”μΆœ (ν•œκ΅­μ–΄) / SPEECH/DIALOGUE EXTRACTION (KOREAN)
{'─'*30}
{chr(10).join(korean_scenes)}
πŸŽ™οΈ μŒμ„±/λŒ€μ‚¬ μΆ”μΆœ (μ˜μ–΄) / SPEECH/DIALOGUE EXTRACTION (ENGLISH)
{'─'*30}
{chr(10).join(english_scenes)}
πŸ“ μ„€λͺ… 미리보기 / DESCRIPTION PREVIEW
{'─'*25}
{video_info.get('description', 'No description available')[:500]}
{'...(μƒλž΅/truncated)' if len(video_info.get('description', '')) > 500 else ''}
{'='*50}
πŸ“Š **뢄석 μ™„λ£Œ/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
πŸ€– **AI κ°•ν™”/AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'}
"""
return report.strip()
# ---------------------------------------------------------
# 메타데이터 μΆ”μΆœ
# ---------------------------------------------------------
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
# μΏ ν‚€ 선택 μˆœμ„œ: UI μ—…λ‘œλ“œ β†’ κΈ°λ³Έ μΏ ν‚€ β†’ None
if cookiefile and os.path.exists(cookiefile):
cookiefile = cookiefile
elif DEFAULT_COOKIE_FILE.exists():
cookiefile = str(DEFAULT_COOKIE_FILE)
else:
cookiefile = None
try:
progress(0.1, desc="Initializing YouTube extractor…")
ydl_opts = {"noplaylist": True, "extract_flat": False}
if cookiefile:
ydl_opts["cookiefile"] = cookiefile
progress(0.5, desc="Extracting video metadata…")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
progress(1.0, desc="βœ… Analysis complete!")
return info, "βœ… Video information extracted successfully"
except Exception as e:
return None, f"❌ Error: {e}"
# ---------------------------------------------------------
# λ‹€μš΄λ‘œλ“œ
# ---------------------------------------------------------
def download_video(
self,
url,
quality="best",
audio_only=False,
progress=gr.Progress(),
cookiefile=None,
):
if not url or not url.strip():
return None, "❌ Please enter a YouTube URL"
if not self.is_valid_youtube_url(url):
return None, "❌ Invalid YouTube URL format"
# μΏ ν‚€ 선택 μˆœμ„œ
if cookiefile and os.path.exists(cookiefile):
cookiefile = cookiefile
elif DEFAULT_COOKIE_FILE.exists():
cookiefile = str(DEFAULT_COOKIE_FILE)
else:
cookiefile = None
try:
progress(0.1, desc="Preparing download…")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
ydl_opts = {
"outtmpl": os.path.join(
self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s"
),
"noplaylist": True,
}
if audio_only:
ydl_opts["format"] = "bestaudio/best"
ydl_opts["postprocessors"] = [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
]
else:
if quality == "720p":
ydl_opts["format"] = "best[height<=720]"
elif quality == "480p":
ydl_opts["format"] = "best[height<=480]"
else: # "best"
ydl_opts["format"] = "best[height<=1080]"
if cookiefile:
ydl_opts["cookiefile"] = cookiefile
# μ§„ν–‰λ₯  ν›…
def hook(d):
if d["status"] == "downloading":
if "total_bytes" in d:
pct = d["downloaded_bytes"] / d["total_bytes"] * 100
progress(0.1 + pct / 100 * 0.7, desc=f"Downloading… {pct:.1f}%")
else:
progress(0.5, desc="Downloading…")
elif d["status"] == "finished":
progress(0.8, desc="Processing download…")
ydl_opts["progress_hooks"] = [hook]
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.extract_info(url, download=True)
progress(0.9, desc="Copying to Downloads folder…")
# temp λ””λ ‰ν„°λ¦¬μ—μ„œ 파일 μ°ΎκΈ°
downloaded_temp = None
for f in os.listdir(self.temp_downloads):
if timestamp in f:
downloaded_temp = os.path.join(self.temp_downloads, f)
break
if not downloaded_temp:
return None, "❌ Downloaded file not found"
final_name = os.path.basename(downloaded_temp)
final_path = os.path.join(self.downloads_folder, final_name)
try:
shutil.copy2(downloaded_temp, final_path)
saved = True
except Exception as e:
print(f"Copy warning: {e}")
saved = False
final_path = "File kept only in temp folder"
progress(1.0, desc="βœ… Download complete!")
msg = (
"βœ… Download successful!\n"
f"πŸ“ Temp file: {os.path.basename(downloaded_temp)}\n"
f"πŸ“ Saved to: {final_path if saved else 'Copy failed'}\n"
f"🎯 Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB"
)
return downloaded_temp, msg
except Exception as e:
return None, f"❌ Download failed: {e}"
# =================================================================
# Helper functions for Gradio
# =================================================================
downloader = YouTubeDownloader()
def configure_api_key(api_key):
if not api_key or not api_key.strip():
return "❌ Please enter a valid Google API key", gr.update(visible=False)
ok, msg = downloader.configure_gemini(api_key.strip())
return msg, gr.update(visible=ok)
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
try:
progress(0.05, desc="Starting analysis…")
cookiefile = cookies_file if cookies_file else None
info, msg = downloader.get_video_info(
url, progress=progress, cookiefile=cookiefile
)
if info:
progress(0.95, desc="Generating report…")
return downloader.format_video_info(info)
return f"❌ Analysis Failed: {msg}"
except Exception as e:
return f"❌ System Error: {e}"
def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
try:
progress(0.05, desc="Preparing download…")
cookiefile = cookies_file if cookies_file else None
file_path, status = downloader.download_video(
url, quality, audio_only, progress=progress, cookiefile=cookiefile
)
return (file_path, status) if file_path else (None, status)
except Exception as e:
return None, f"❌ System Error: {e}"
# =================================================================
# Gradio UI
# =================================================================
def create_interface():
with gr.Blocks(
theme=gr.themes.Soft(), title="πŸŽ₯ YouTube Video Analyzer & Downloader Pro"
) as iface:
gr.HTML("<h1>πŸŽ₯ YouTube Video Analyzer & Downloader Pro</h1>")
# API μ„Ήμ…˜
with gr.Group():
gr.HTML("<h3>πŸ”‘ Google Gemini API Configuration</h3>")
with gr.Row():
api_key_in = gr.Textbox(
label="πŸ”‘ Google API Key",
placeholder="Paste your Google API key…",
type="password",
)
api_btn = gr.Button("πŸ”§ Configure API", variant="secondary")
api_status = gr.Textbox(
label="API Status",
value="❌ Gemini API not configured – Using fallback analysis",
interactive=False,
lines=1,
)
# 메인 UI
with gr.Row():
url_in = gr.Textbox(
label="πŸ”— YouTube URL",
placeholder="Paste YouTube video URL…",
)
cookies_in = gr.File(
label="πŸͺ Upload cookies.txt (optional)",
file_types=[".txt"],
type="filepath",
)
with gr.Tabs():
with gr.TabItem("πŸ“Š Video Analysis"):
analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
analysis_out = gr.Textbox(
label="πŸ“Š Analysis Report", lines=25, show_copy_button=True
)
analyze_btn.click(
fn=analyze_with_cookies,
inputs=[url_in, cookies_in],
outputs=analysis_out,
show_progress=True,
)
with gr.TabItem("⬇️ Video Download"):
with gr.Row():
quality_dd = gr.Dropdown(
choices=["best", "720p", "480p"],
value="best",
label="πŸ“Ί Quality",
)
audio_cb = gr.Checkbox(label="🎡 Audio only (MP3)")
download_btn = gr.Button("⬇️ Download Video", variant="primary")
dl_status = gr.Textbox(
label="πŸ“₯ Download Status", lines=5, show_copy_button=True
)
dl_file = gr.File(label="πŸ“ Downloaded File", visible=False)
def wrapped_download(url, q, a, cfile, progress=gr.Progress()):
fp, st = download_with_cookies(url, q, a, cfile, progress)
if fp and os.path.exists(fp):
return st, gr.update(value=fp, visible=True)
return st, gr.update(visible=False)
download_btn.click(
fn=wrapped_download,
inputs=[url_in, quality_dd, audio_cb, cookies_in],
outputs=[dl_status, dl_file],
show_progress=True,
)
# API λ²„νŠΌ λ™μž‘
api_btn.click(
fn=configure_api_key,
inputs=[api_key_in],
outputs=[api_status],
)
gr.HTML(
"""
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;">
<h3>πŸ’‘ Tip: μΏ ν‚€ 파일 μžλ™ μ‚¬μš©</h3>
<p><code>www.youtube.com_cookies.txt</code> νŒŒμΌμ„ <strong>app.py</strong>와 같은
폴더에 두면 μžλ™μœΌλ‘œ μ‚¬μš©λ©λ‹ˆλ‹€. 주기적으둜 μƒˆ 파일둜 ꡐ체해 μ£Όμ„Έμš”.</p>
</div>
"""
)
return iface
# =================================================================
# Entrypoint
# =================================================================
if __name__ == "__main__":
demo = create_interface()
import atexit
atexit.register(downloader.cleanup)
demo.launch(debug=True, show_error=True)