|
|
|
""" |
|
YouTube Video Analyzer & Downloader Pro |
|
(μΏ ν€ μλ μ²λ¦¬ λ²μ ) |
|
|
|
Β· `www.youtube.com_cookies.txt` νμΌμ΄ **app.py**μ κ°μ ν΄λμ μμΌλ©΄ |
|
μλμΌλ‘ μ¬μ©ν©λλ€. |
|
Β· Gradio UIμμ μΏ ν€ νμΌμ μ
λ‘λνλ©΄, μ
λ‘λλ νμΌμ΄ **μ°μ ** μ μ©λ©λλ€. |
|
""" |
|
|
|
|
|
|
|
|
|
import os |
|
import re |
|
import json |
|
import uuid |
|
import shutil |
|
import tempfile |
|
from datetime import datetime |
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import yt_dlp |
|
import google.generativeai as genai |
|
|
|
|
|
|
|
|
|
DEFAULT_COOKIE_FILE = Path(__file__).with_name("www.youtube.com_cookies.txt") |
|
|
|
|
|
|
|
|
|
|
|
class YouTubeDownloader: |
|
def __init__(self): |
|
|
|
self.download_dir = tempfile.mkdtemp() |
|
self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_") |
|
|
|
|
|
self.downloads_folder = os.path.join( |
|
os.path.expanduser("~"), "Downloads", "YouTube_Downloads" |
|
) |
|
os.makedirs(self.downloads_folder, exist_ok=True) |
|
|
|
self.gemini_model = None |
|
|
|
|
|
|
|
|
|
def configure_gemini(self, api_key): |
|
try: |
|
genai.configure(api_key=api_key) |
|
self.gemini_model = genai.GenerativeModel( |
|
model_name="gemini-1.5-flash-latest" |
|
) |
|
return True, "β
Gemini API configured successfully!" |
|
except Exception as e: |
|
return False, f"β Failed to configure Gemini API: {e}" |
|
|
|
|
|
|
|
|
|
def cleanup(self): |
|
try: |
|
if hasattr(self, "download_dir") and os.path.exists(self.download_dir): |
|
shutil.rmtree(self.download_dir) |
|
if hasattr(self, "temp_downloads") and os.path.exists(self.temp_downloads): |
|
shutil.rmtree(self.temp_downloads) |
|
except Exception as e: |
|
print(f"β οΈ Warning: Could not clean up temporary directory: {e}") |
|
|
|
|
|
|
|
|
|
def is_valid_youtube_url(self, url): |
|
youtube_regex = re.compile( |
|
r"(https?://)?(www\.)?" |
|
r"(youtube|youtu|youtube-nocookie)\.(com|be)/" |
|
r"(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})" |
|
) |
|
return youtube_regex.match(url) is not None |
|
|
|
|
|
|
|
|
|
def generate_scene_breakdown_gemini(self, video_info): |
|
if not self.gemini_model: |
|
return self.generate_scene_breakdown_fallback(video_info) |
|
|
|
try: |
|
duration = video_info.get("duration", 0) |
|
title = video_info.get("title", "") |
|
description = video_info.get("description", "")[:1500] |
|
|
|
if not duration: |
|
return { |
|
"korean": ["**[μ¬μμκ° μ μ μμ]**: λΉλμ€ μ¬μμκ°μ νμΈν μ μμ΄ νμμ€ν¬νλ₯Ό μμ±ν μ μμ΅λλ€"], |
|
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"] |
|
} |
|
|
|
prompt = f""" |
|
μ΄ YouTube λΉλμ€μ μμ±/λμ¬λ₯Ό νμμ€ν¬νλ³λ‘ μΆμΆν΄μ£ΌμΈμ. |
|
|
|
μ λͺ©: {title} |
|
μ¬μμκ°: {duration}μ΄ |
|
μ€λͺ
: {description} |
|
|
|
λ§€μ° μ€μν μ§μΉ¨: |
|
1. μ€μ μμμμ λ€λ¦¬λ λμ¬, λ΄λ μ΄μ
, μμ±μ κ·Έλλ‘ μ μ΄μ£ΌμΈμ |
|
2. μ₯λ©΄ μ€λͺ
μ΄ μλ μ€μ μμ± λ΄μ©λ§ μμ±νμΈμ |
|
3. μμ±μ΄ μλ λΆλΆμ (...) λλ (λ°°κ²½μμ
) λ±μΌλ‘ νμ |
|
4. νμμ€ν¬ν κ°μ΄λλΌμΈ: |
|
- λμ¬λ λ΄λ μ΄μ
μ΄ μμνκ³ λλλ μ§μ κΈ°μ€ |
|
- μ°μλ λμ¬λ νλλ‘ λ¬Άμ΄μ νμ |
|
- μ΅λν μμ°μ€λ¬μ΄ λ¨μλ‘ κ΅¬λΆ |
|
5. νμ: |
|
**[MM:SS-MM:SS]**: "μ€μ λμ¬λ λ΄λ μ΄μ
λ΄μ©" |
|
**[MM:SS-MM:SS]**: (λ°°κ²½μμ
) λλ (...μ₯λ©΄ μ ν...) |
|
6. λͺ¨λ μμ± λ΄μ©μ λΉ μ§μμ΄ μ μ΄μ£ΌμΈμ |
|
7. μλ§μ΄λ νλ©΄μ νμλ ν
μ€νΈλ ν¬ν¨νμΈμ |
|
8. μ₯λ©΄ μ€λͺ
μ μ λ νμ§ λ§μΈμ. μ€μ§ μμ±κ³Ό ν
μ€νΈλ§ μΆμΆνμΈμ. |
|
|
|
μμ: |
|
**[00:00-00:05]**: "μλ
νμΈμ. μ€λμ λ―Έλ₯΅μ°μμ λ°κ²¬λ λ°±μ μ μ μ λν΄ μμλ³΄κ² μ΅λλ€." |
|
**[00:05-00:08]**: (λ°°κ²½μμ
) |
|
**[00:08-00:15]**: "λ―Έλ₯΅μ¬μ§λ₯Ό νκ³ μλ μ΅μ° λ―Έλ₯΅μ°μ μ μλΆμμ λ°±μ μλμ λ§λ κ²μΌλ‘ μΆμ λλ μ μμ‘°κ° λ°κ΅΄λμ΅λλ€." |
|
**[00:15-00:18]**: (μΈν°λ·° μ€λΉ μ€...) |
|
**[00:18-00:25]**: [μ΄λν κ΅μ] "μ΄λ² λ°κ΅΄μ λ°±μ μμ¬ μ°κ΅¬μ μ€μν μ νμ μ΄ λ κ²μ
λλ€." |
|
|
|
λ΄μ€μ κ²½μ°: |
|
- μ΅μ»€λ κΈ°μμ λ©νΈλ κ·Έλλ‘ μ κΈ° |
|
- μΈν°λ·°λ [μΈν°λ·°μ΄ μ΄λ¦] "λ΄μ©" νμμΌλ‘ |
|
- μλ§μ [μλ§] λ΄μ© νμμΌλ‘ |
|
""" |
|
response = self.gemini_model.generate_content(prompt) |
|
|
|
korean_scenes = [] |
|
if response and response.text: |
|
lines = response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
korean_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
korean_scenes.append(current_scene.strip()) |
|
|
|
|
|
english_prompt = f""" |
|
Translate the following Korean speech/dialogue transcription to English, maintaining the exact same timestamps. |
|
Translate ONLY the actual speech content, not descriptions: |
|
|
|
{chr(10).join(korean_scenes)} |
|
|
|
Important rules: |
|
- Keep the format exactly the same: **[MM:SS-MM:SS]**: "English translation of speech" |
|
- For non-speech parts like (λ°°κ²½μμ
), translate as (background music) |
|
- For (...) keep as is |
|
- For interview tags like [μ΄λν κ΅μ], translate as [Professor Lee Do-hak] |
|
- For [μλ§], translate as [Subtitle] |
|
- Keep quotation marks for actual speech |
|
- Do NOT add any scene descriptions or explanations |
|
""" |
|
english_response = self.gemini_model.generate_content(english_prompt) |
|
|
|
english_scenes = [] |
|
if english_response and english_response.text: |
|
lines = english_response.text.split("\n") |
|
current_scene = "" |
|
|
|
for line in lines: |
|
line = line.strip() |
|
if line.startswith("**[") and "]**:" in line: |
|
if current_scene: |
|
english_scenes.append(current_scene.strip()) |
|
current_scene = line |
|
elif current_scene: |
|
current_scene += "\n" + line |
|
|
|
if current_scene: |
|
english_scenes.append(current_scene.strip()) |
|
|
|
return { |
|
"korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"], |
|
"english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"] |
|
} |
|
|
|
except Exception as e: |
|
print(f"Gemini API error: {e}") |
|
return self.generate_scene_breakdown_fallback(video_info) |
|
|
|
|
|
|
|
|
|
def generate_scene_breakdown_fallback(self, video_info): |
|
duration = video_info.get("duration", 0) |
|
title = video_info.get("title", "").lower() |
|
description = video_info.get("description", "").lower() |
|
uploader = video_info.get("uploader", "μ½ν
μΈ μ μμ") |
|
|
|
if not duration: |
|
return { |
|
"korean": ["**[μ¬μμκ° μ μ μμ]**: νμμ€ν¬νλ₯Ό μμ±ν μ μμ΅λλ€"], |
|
"english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"] |
|
} |
|
|
|
|
|
if duration <= 60: |
|
segment_length = 10 |
|
elif duration <= 300: |
|
segment_length = 15 |
|
elif duration <= 900: |
|
segment_length = 20 |
|
else: |
|
segment_length = 30 |
|
|
|
korean_scenes = [] |
|
english_scenes = [] |
|
num_segments = min(duration // segment_length + 1, 20) |
|
video_type = self.detect_video_type_detailed(title, description) |
|
|
|
for i in range(num_segments): |
|
start_time = i * segment_length |
|
end_time = min(start_time + segment_length - 1, duration) |
|
|
|
start_fmt = f"{start_time//60}:{start_time%60:02d}" |
|
end_fmt = f"{end_time//60}:{end_time%60:02d}" |
|
|
|
|
|
if i == 0: |
|
korean_desc = f"(μμ± μΆμΆμ μν΄ Gemini APIκ° νμν©λλ€. μΈνΈλ‘ λΆλΆ...)" |
|
english_desc = f"(Gemini API required for speech extraction. Intro section...)" |
|
elif i == num_segments - 1: |
|
korean_desc = f"(μμνΈλ‘ λΆλΆ...)" |
|
english_desc = f"(Outro section...)" |
|
else: |
|
korean_desc = f"(λ³Έλ¬Έ λ΄μ©...)" |
|
english_desc = f"(Main content...)" |
|
|
|
korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: {korean_desc}") |
|
english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: {english_desc}") |
|
|
|
return {"korean": korean_scenes, "english": english_scenes} |
|
|
|
|
|
|
|
|
|
def detect_video_type_detailed(self, title, description): |
|
text = (title + " " + description).lower() |
|
if any(w in text for w in ["tutorial", "how to", "guide", "learn", "diy"]): |
|
return "tutorial" |
|
if any(w in text for w in ["review", "unboxing", "test", "comparison", "vs"]): |
|
return "review" |
|
if any(w in text for w in ["vlog", "daily", "routine", "day in"]): |
|
return "vlog" |
|
if any(w in text for w in ["music", "song", "cover", "lyrics"]): |
|
return "music" |
|
if any(w in text for w in ["comedy", "funny", "prank", "challenge"]): |
|
return "entertainment" |
|
if any(w in text for w in ["news", "breaking", "update", "report"]): |
|
return "news" |
|
if any(w in text for w in ["cooking", "recipe", "food", "kitchen"]): |
|
return "cooking" |
|
if any(w in text for w in ["workout", "fitness", "exercise", "yoga"]): |
|
return "fitness" |
|
return "general" |
|
|
|
|
|
|
|
|
|
def generate_contextual_description_korean( |
|
self, scene_index, total_scenes, video_type, uploader, title |
|
): |
|
presenter_desc = "μ½ν
μΈ μ μμ" |
|
if "woman" in title or "girl" in title or "μ¬μ±" in title or "μ¬μ" in title: |
|
presenter_desc = "μ¬μ± μ§νμ" |
|
elif "man" in title or "guy" in title or "λ¨μ±" in title or "λ¨μ" in title: |
|
presenter_desc = "λ¨μ± μ§νμ" |
|
|
|
if scene_index == 0: |
|
if video_type == "tutorial": |
|
return f"{presenter_desc}κ° νλ©΄μ λ±μ₯νμ¬ μμ μ μκ°νκ³ μ£Όμ λ₯Ό μ€λͺ
ν©λλ€. λ°μ μ‘°λͺ
μ μμ
곡κ°μμ μΊμ£ΌμΌν μ·μ μ
κ³ μμ΅λλ€." |
|
if video_type == "vlog": |
|
return f"{presenter_desc}κ° μΉ΄λ©λΌλ₯Ό ν₯ν΄ λ°κ² μΈμ¬νλ©° μ€λμ λΈμ΄λ‘κ·Έ μ£Όμ λ₯Ό μ€λͺ
ν©λλ€." |
|
if video_type == "review": |
|
return f"{presenter_desc}κ° λ¦¬λ·°ν μ νμ λ€μ΄ 보μ΄λ©° κ°λ¨ν νΉμ§μ μκ°ν©λλ€." |
|
return f"{presenter_desc}κ° μμ²μμ κ΄μ¬μ λκΈ° μν λ§€λ ₯μ μΈ μΈνΈλ‘λ‘ μμμ μμν©λλ€." |
|
|
|
if scene_index == total_scenes - 1: |
|
if video_type == "tutorial": |
|
return f"{presenter_desc}κ° μ΅μ’
κ²°κ³Όλ¬Όμ 보μ¬μ£Όκ³ μμ²μμκ² κ°μ¬ μΈμ¬λ₯Ό μ νλ©° μ’μμμ ꡬλ
μ λΆνν©λλ€." |
|
if video_type == "vlog": |
|
return f"{presenter_desc}κ° ν루λ₯Ό λ§λ¬΄λ¦¬νλ©° μ΅μ’
μκ°μ 곡μ νκ³ μλ³ μΈμ¬λ₯Ό ν©λλ€." |
|
return f"{presenter_desc}κ° μ£Όμ λ΄μ©μ μμ½νκ³ μ’μμμ λκΈμ ν΅ν μ°Έμ¬λ₯Ό λ
λ €νλ©° λ§λ¬΄λ¦¬ν©λλ€." |
|
|
|
if video_type == "tutorial": |
|
return f"{presenter_desc}κ° λ€μ λ¨κ³λ₯Ό μμ°νλ©° ν΄λ‘μ¦μ
μ·κ³Ό ν¨κ» λͺ
νν μ§μΉ¨μ μ 곡ν©λλ€." |
|
if video_type == "review": |
|
return f"{presenter_desc}κ° μ νμ νΉμ κΈ°λ₯μ μ΄ν΄λ³΄λ©° μ¬μ© λͺ¨μ΅μ 보μ¬μ£Όκ³ μ±λ₯μ λν΄ μ€λͺ
ν©λλ€." |
|
if video_type == "vlog": |
|
return f"{presenter_desc}κ° μΌμ νλμ κ³μνλ©° μμ§ν μκ°κ³Ό κ°μΈμ μΈ μκ°μ 곡μ ν©λλ€." |
|
if video_type == "cooking": |
|
return f"{presenter_desc}κ° μ¬λ£λ₯Ό μ€λΉνλ©° μλ₯΄κ³ μμΌλ©΄μ κ° λ¨κ³λ₯Ό μ€λͺ
ν©λλ€." |
|
if video_type == "fitness": |
|
return f"{presenter_desc}κ° μ΄λ μΈνΈλ₯Ό μννλ©° μ¬λ°λ₯Έ μμΈλ₯Ό μμ°νκ³ νμ μ 곡ν©λλ€." |
|
return f"{presenter_desc}κ° λͺ
νν μ€λͺ
μ ν΅ν΄ μμ²μμ μν΅νλ©° μ£Όμ μ½ν
μΈ λ₯Ό μ§νν©λλ€." |
|
|
|
|
|
|
|
|
|
def generate_contextual_description( |
|
self, scene_index, total_scenes, video_type, uploader, title |
|
): |
|
presenter_desc = "The content creator" |
|
if "woman" in title or "girl" in title: |
|
presenter_desc = "A woman" |
|
elif "man" in title or "guy" in title: |
|
presenter_desc = "A man" |
|
|
|
if scene_index == 0: |
|
if video_type == "tutorial": |
|
return ( |
|
f"{presenter_desc} appears on screen, introducing themselves and the " |
|
f"topic. They are in a well-lit workspace, wearing casual clothes." |
|
) |
|
if video_type == "vlog": |
|
return ( |
|
f"{presenter_desc} greets the camera cheerfully, perhaps waving, and " |
|
f"explains what today's vlog is about." |
|
) |
|
if video_type == "review": |
|
return ( |
|
f"{presenter_desc} holds up the product to be reviewed, giving a brief " |
|
f"overview of its features." |
|
) |
|
return ( |
|
f"{presenter_desc} starts the video with an engaging introduction to " |
|
f"capture viewers' attention." |
|
) |
|
|
|
if scene_index == total_scenes - 1: |
|
if video_type == "tutorial": |
|
return ( |
|
f"{presenter_desc} shows the final result, thanks viewers, and " |
|
f"encourages them to like and subscribe." |
|
) |
|
if video_type == "vlog": |
|
return ( |
|
f"{presenter_desc} wraps up the day, sharing final thoughts and " |
|
f"bidding farewell." |
|
) |
|
return ( |
|
f"{presenter_desc} concludes, summarizing key points and prompting " |
|
f"engagement through likes and comments." |
|
) |
|
|
|
if video_type == "tutorial": |
|
return ( |
|
f"{presenter_desc} demonstrates the next step, providing clear " |
|
f"instructions with close-up shots." |
|
) |
|
if video_type == "review": |
|
return ( |
|
f"{presenter_desc} examines a specific feature of the product, showing " |
|
f"it in use and commenting on performance." |
|
) |
|
if video_type == "vlog": |
|
return ( |
|
f"{presenter_desc} continues the day's activities, sharing candid " |
|
f"moments and personal reflections." |
|
) |
|
if video_type == "cooking": |
|
return ( |
|
f"{presenter_desc} prepares ingredients, chopping and mixing while " |
|
f"explaining each step." |
|
) |
|
if video_type == "fitness": |
|
return ( |
|
f"{presenter_desc} performs an exercise set, demonstrating proper form " |
|
f"and offering tips." |
|
) |
|
return ( |
|
f"{presenter_desc} proceeds with the main content, engaging viewers through " |
|
f"clear explanations." |
|
) |
|
|
|
|
|
|
|
|
|
def detect_video_type(self, title, description): |
|
text = (title + " " + description).lower() |
|
if any(w in text for w in ["music", "song", "album", "artist", "band", "lyrics"]): |
|
return "π΅ Music Video" |
|
if any(w in text for w in ["tutorial", "how to", "guide", "learn"]): |
|
return "π Tutorial/Educational" |
|
if any(w in text for w in ["funny", "comedy", "entertainment", "vlog"]): |
|
return "π Entertainment/Comedy" |
|
if any(w in text for w in ["news", "breaking", "report", "update"]): |
|
return "π° News/Information" |
|
if any(w in text for w in ["review", "unboxing", "test", "comparison"]): |
|
return "β Review/Unboxing" |
|
if any(w in text for w in ["commercial", "ad", "brand", "product"]): |
|
return "πΊ Commercial/Advertisement" |
|
return "π¬ General Content" |
|
|
|
|
|
|
|
|
|
def detect_background_music(self, video_info): |
|
title = video_info.get("title", "").lower() |
|
if "music" in title or "song" in title: |
|
return "π΅ Original Music/Soundtrack" |
|
if "commercial" in title or "ad" in title: |
|
return "πΆ Upbeat Commercial Music" |
|
if "tutorial" in title or "how to" in title: |
|
return "π Minimal/No Background Music" |
|
if "vlog" in title or "daily" in title: |
|
return "πΌ Ambient Background Music" |
|
return "π΅ Background Music" |
|
|
|
|
|
|
|
|
|
def detect_influencer_status(self, video_info): |
|
subs = video_info.get("channel_followers", 0) |
|
views = video_info.get("view_count", 0) |
|
if subs > 10_000_000: |
|
return "π Mega Influencer (10M+)" |
|
if subs > 1_000_000: |
|
return "β Major Influencer (1M+)" |
|
if subs > 100_000: |
|
return "π― Mid-tier Influencer (100K+)" |
|
if subs > 10_000: |
|
return "π Micro Influencer (10K+)" |
|
if views > 100_000: |
|
return "π₯ Viral Content Creator" |
|
return "π€ Regular Content Creator" |
|
|
|
|
|
|
|
|
|
def format_number(self, num): |
|
if not num: |
|
return "0" |
|
if num >= 1_000_000_000: |
|
return f"{num/1_000_000_000:.1f}B" |
|
if num >= 1_000_000: |
|
return f"{num/1_000_000:.1f}M" |
|
if num >= 1_000: |
|
return f"{num/1_000:.1f}K" |
|
return str(num) |
|
|
|
|
|
|
|
|
|
def format_video_info(self, video_info): |
|
if not video_info: |
|
return "β No video information available." |
|
|
|
title = video_info.get("title", "Unknown") |
|
uploader = video_info.get("uploader", "Unknown") |
|
duration = video_info.get("duration", 0) |
|
dur_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown" |
|
views = video_info.get("view_count", 0) |
|
likes = video_info.get("like_count", 0) |
|
comments = video_info.get("comment_count", 0) |
|
upload_date = video_info.get("upload_date", "Unknown") |
|
|
|
if len(upload_date) == 8: |
|
upload_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" |
|
|
|
scene_data = self.generate_scene_breakdown_gemini(video_info) |
|
korean_scenes = scene_data.get("korean", []) |
|
english_scenes = scene_data.get("english", []) |
|
|
|
vtype = self.detect_video_type(title, video_info.get("description", "")) |
|
bgm = self.detect_background_music(video_info) |
|
creator = self.detect_influencer_status(video_info) |
|
engagement = (likes / views) * 100 if views else 0 |
|
|
|
report = f""" |
|
π¬ YOUTUBE VIDEO ANALYSIS REPORT |
|
{'='*50} |
|
|
|
π κΈ°λ³Έ μ 보 / BASIC INFORMATION |
|
{'β'*25} |
|
πΉ **μ λͺ©/Title:** {title} |
|
π€ **μ
λ‘λ/Uploader:** {uploader} |
|
π
**μ
λ‘λ λ μ§/Upload Date:** {upload_date} |
|
β±οΈ **μ¬μμκ°/Duration:** {dur_str} |
|
π **λΉλμ€ ID/Video ID:** {video_info.get('id', 'Unknown')} |
|
|
|
π μ±κ³Ό μ§ν / PERFORMANCE METRICS |
|
{'β'*25} |
|
π **μ‘°νμ/Views:** {self.format_number(views)} ({views:,}) |
|
π **μ’μμ/Likes:** {self.format_number(likes)} ({likes:,}) |
|
π¬ **λκΈ/Comments:** {self.format_number(comments)} ({comments:,}) |
|
π **μ°Έμ¬μ¨/Engagement Rate:** {engagement:.2f}% |
|
|
|
π― μ½ν
μΈ λΆμ / CONTENT ANALYSIS |
|
{'β'*25} |
|
π **λΉλμ€ μ ν/Video Type:** {vtype} |
|
π΅ **λ°°κ²½μμ
/Background Music:** {bgm} |
|
π **μ μμ μν/Creator Status:** {creator} |
|
|
|
ποΈ μμ±/λμ¬ μΆμΆ (νκ΅μ΄) / SPEECH/DIALOGUE EXTRACTION (KOREAN) |
|
{'β'*30} |
|
{chr(10).join(korean_scenes)} |
|
|
|
ποΈ μμ±/λμ¬ μΆμΆ (μμ΄) / SPEECH/DIALOGUE EXTRACTION (ENGLISH) |
|
{'β'*30} |
|
{chr(10).join(english_scenes)} |
|
|
|
π μ€λͺ
미리보기 / DESCRIPTION PREVIEW |
|
{'β'*25} |
|
{video_info.get('description', 'No description available')[:500]} |
|
{'...(μλ΅/truncated)' if len(video_info.get('description', '')) > 500 else ''} |
|
|
|
{'='*50} |
|
π **λΆμ μλ£/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} |
|
π€ **AI κ°ν/AI Enhancement:** {'Gemini AI' if self.gemini_model else 'Standard Analysis'} |
|
""" |
|
return report.strip() |
|
|
|
|
|
|
|
|
|
def get_video_info(self, url, progress=gr.Progress(), cookiefile=None): |
|
if not url or not url.strip(): |
|
return None, "β Please enter a YouTube URL" |
|
if not self.is_valid_youtube_url(url): |
|
return None, "β Invalid YouTube URL format" |
|
|
|
|
|
if cookiefile and os.path.exists(cookiefile): |
|
cookiefile = cookiefile |
|
elif DEFAULT_COOKIE_FILE.exists(): |
|
cookiefile = str(DEFAULT_COOKIE_FILE) |
|
else: |
|
cookiefile = None |
|
|
|
try: |
|
progress(0.1, desc="Initializing YouTube extractorβ¦") |
|
ydl_opts = {"noplaylist": True, "extract_flat": False} |
|
if cookiefile: |
|
ydl_opts["cookiefile"] = cookiefile |
|
|
|
progress(0.5, desc="Extracting video metadataβ¦") |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info = ydl.extract_info(url, download=False) |
|
|
|
progress(1.0, desc="β
Analysis complete!") |
|
return info, "β
Video information extracted successfully" |
|
|
|
except Exception as e: |
|
return None, f"β Error: {e}" |
|
|
|
|
|
|
|
|
|
def download_video( |
|
self, |
|
url, |
|
quality="best", |
|
audio_only=False, |
|
progress=gr.Progress(), |
|
cookiefile=None, |
|
): |
|
if not url or not url.strip(): |
|
return None, "β Please enter a YouTube URL" |
|
if not self.is_valid_youtube_url(url): |
|
return None, "β Invalid YouTube URL format" |
|
|
|
|
|
if cookiefile and os.path.exists(cookiefile): |
|
cookiefile = cookiefile |
|
elif DEFAULT_COOKIE_FILE.exists(): |
|
cookiefile = str(DEFAULT_COOKIE_FILE) |
|
else: |
|
cookiefile = None |
|
|
|
try: |
|
progress(0.1, desc="Preparing downloadβ¦") |
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
|
ydl_opts = { |
|
"outtmpl": os.path.join( |
|
self.temp_downloads, f"%(title)s_{timestamp}.%(ext)s" |
|
), |
|
"noplaylist": True, |
|
} |
|
|
|
if audio_only: |
|
ydl_opts["format"] = "bestaudio/best" |
|
ydl_opts["postprocessors"] = [ |
|
{ |
|
"key": "FFmpegExtractAudio", |
|
"preferredcodec": "mp3", |
|
"preferredquality": "192", |
|
} |
|
] |
|
else: |
|
if quality == "720p": |
|
ydl_opts["format"] = "best[height<=720]" |
|
elif quality == "480p": |
|
ydl_opts["format"] = "best[height<=480]" |
|
else: |
|
ydl_opts["format"] = "best[height<=1080]" |
|
|
|
if cookiefile: |
|
ydl_opts["cookiefile"] = cookiefile |
|
|
|
|
|
def hook(d): |
|
if d["status"] == "downloading": |
|
if "total_bytes" in d: |
|
pct = d["downloaded_bytes"] / d["total_bytes"] * 100 |
|
progress(0.1 + pct / 100 * 0.7, desc=f"Downloading⦠{pct:.1f}%") |
|
else: |
|
progress(0.5, desc="Downloadingβ¦") |
|
elif d["status"] == "finished": |
|
progress(0.8, desc="Processing downloadβ¦") |
|
|
|
ydl_opts["progress_hooks"] = [hook] |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.extract_info(url, download=True) |
|
|
|
progress(0.9, desc="Copying to Downloads folderβ¦") |
|
|
|
|
|
downloaded_temp = None |
|
for f in os.listdir(self.temp_downloads): |
|
if timestamp in f: |
|
downloaded_temp = os.path.join(self.temp_downloads, f) |
|
break |
|
|
|
if not downloaded_temp: |
|
return None, "β Downloaded file not found" |
|
|
|
final_name = os.path.basename(downloaded_temp) |
|
final_path = os.path.join(self.downloads_folder, final_name) |
|
|
|
try: |
|
shutil.copy2(downloaded_temp, final_path) |
|
saved = True |
|
except Exception as e: |
|
print(f"Copy warning: {e}") |
|
saved = False |
|
final_path = "File kept only in temp folder" |
|
|
|
progress(1.0, desc="β
Download complete!") |
|
|
|
msg = ( |
|
"β
Download successful!\n" |
|
f"π Temp file: {os.path.basename(downloaded_temp)}\n" |
|
f"π Saved to: {final_path if saved else 'Copy failed'}\n" |
|
f"π― Size: {os.path.getsize(downloaded_temp)/(1024*1024):.1f} MB" |
|
) |
|
return downloaded_temp, msg |
|
|
|
except Exception as e: |
|
return None, f"β Download failed: {e}" |
|
|
|
|
|
|
|
|
|
|
|
downloader = YouTubeDownloader() |
|
|
|
|
|
def configure_api_key(api_key): |
|
if not api_key or not api_key.strip(): |
|
return "β Please enter a valid Google API key", gr.update(visible=False) |
|
ok, msg = downloader.configure_gemini(api_key.strip()) |
|
return msg, gr.update(visible=ok) |
|
|
|
|
|
def analyze_with_cookies(url, cookies_file, progress=gr.Progress()): |
|
try: |
|
progress(0.05, desc="Starting analysisβ¦") |
|
cookiefile = cookies_file if cookies_file else None |
|
info, msg = downloader.get_video_info( |
|
url, progress=progress, cookiefile=cookiefile |
|
) |
|
if info: |
|
progress(0.95, desc="Generating reportβ¦") |
|
return downloader.format_video_info(info) |
|
return f"β Analysis Failed: {msg}" |
|
except Exception as e: |
|
return f"β System Error: {e}" |
|
|
|
|
|
def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()): |
|
try: |
|
progress(0.05, desc="Preparing downloadβ¦") |
|
cookiefile = cookies_file if cookies_file else None |
|
file_path, status = downloader.download_video( |
|
url, quality, audio_only, progress=progress, cookiefile=cookiefile |
|
) |
|
return (file_path, status) if file_path else (None, status) |
|
except Exception as e: |
|
return None, f"β System Error: {e}" |
|
|
|
|
|
|
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks( |
|
theme=gr.themes.Soft(), title="π₯ YouTube Video Analyzer & Downloader Pro" |
|
) as iface: |
|
gr.HTML("<h1>π₯ YouTube Video Analyzer & Downloader Pro</h1>") |
|
|
|
|
|
with gr.Group(): |
|
gr.HTML("<h3>π Google Gemini API Configuration</h3>") |
|
with gr.Row(): |
|
api_key_in = gr.Textbox( |
|
label="π Google API Key", |
|
placeholder="Paste your Google API keyβ¦", |
|
type="password", |
|
) |
|
api_btn = gr.Button("π§ Configure API", variant="secondary") |
|
api_status = gr.Textbox( |
|
label="API Status", |
|
value="β Gemini API not configured β Using fallback analysis", |
|
interactive=False, |
|
lines=1, |
|
) |
|
|
|
|
|
with gr.Row(): |
|
url_in = gr.Textbox( |
|
label="π YouTube URL", |
|
placeholder="Paste YouTube video URLβ¦", |
|
) |
|
cookies_in = gr.File( |
|
label="πͺ Upload cookies.txt (optional)", |
|
file_types=[".txt"], |
|
type="filepath", |
|
) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("π Video Analysis"): |
|
analyze_btn = gr.Button("π Analyze Video", variant="primary") |
|
analysis_out = gr.Textbox( |
|
label="π Analysis Report", lines=25, show_copy_button=True |
|
) |
|
analyze_btn.click( |
|
fn=analyze_with_cookies, |
|
inputs=[url_in, cookies_in], |
|
outputs=analysis_out, |
|
show_progress=True, |
|
) |
|
|
|
with gr.TabItem("β¬οΈ Video Download"): |
|
with gr.Row(): |
|
quality_dd = gr.Dropdown( |
|
choices=["best", "720p", "480p"], |
|
value="best", |
|
label="πΊ Quality", |
|
) |
|
audio_cb = gr.Checkbox(label="π΅ Audio only (MP3)") |
|
download_btn = gr.Button("β¬οΈ Download Video", variant="primary") |
|
dl_status = gr.Textbox( |
|
label="π₯ Download Status", lines=5, show_copy_button=True |
|
) |
|
dl_file = gr.File(label="π Downloaded File", visible=False) |
|
|
|
def wrapped_download(url, q, a, cfile, progress=gr.Progress()): |
|
fp, st = download_with_cookies(url, q, a, cfile, progress) |
|
if fp and os.path.exists(fp): |
|
return st, gr.update(value=fp, visible=True) |
|
return st, gr.update(visible=False) |
|
|
|
download_btn.click( |
|
fn=wrapped_download, |
|
inputs=[url_in, quality_dd, audio_cb, cookies_in], |
|
outputs=[dl_status, dl_file], |
|
show_progress=True, |
|
) |
|
|
|
|
|
api_btn.click( |
|
fn=configure_api_key, |
|
inputs=[api_key_in], |
|
outputs=[api_status], |
|
) |
|
|
|
gr.HTML( |
|
""" |
|
<div style="margin-top:20px;padding:15px;background:#f0f8ff;border-left:5px solid #4285f4;border-radius:10px;"> |
|
<h3>π‘ Tip: μΏ ν€ νμΌ μλ μ¬μ©</h3> |
|
<p><code>www.youtube.com_cookies.txt</code> νμΌμ <strong>app.py</strong>μ κ°μ |
|
ν΄λμ λλ©΄ μλμΌλ‘ μ¬μ©λ©λλ€. μ£ΌκΈ°μ μΌλ‘ μ νμΌλ‘ κ΅μ²΄ν΄ μ£ΌμΈμ.</p> |
|
</div> |
|
""" |
|
) |
|
return iface |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
import atexit |
|
|
|
atexit.register(downloader.cleanup) |
|
demo.launch(debug=True, show_error=True) |