Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.29.0
|
2 |
+
google-generativeai==0.7.0
|
3 |
+
parler-tts==0.1.1
|
4 |
+
transformers==4.40.1
|
5 |
+
torch==2.2.2 # Or CPU version: torch==2.2.2+cpu (check PyTorch website for your OS)
|
6 |
+
torchaudio==2.2.2 # Or CPU version
|
7 |
+
torchvision==0.17.2 # Or CPU version
|
8 |
+
soundfile==0.12.1
|
9 |
+
requests==2.31.0
|
10 |
+
beautifulsoup4==4.12.3
|
11 |
+
fake_useragent==1.5.1
|
12 |
+
moviepy==1.0.3
|
13 |
+
google-api-python-client==2.126.0
|
14 |
+
google-auth-oauthlib==1.2.0
|
15 |
+
google-auth-httplib2==0.2.0
|
16 |
+
yt-dlp==2024.04.09
|
17 |
+
python-dotenv==1.0.1
|
18 |
+
imageio-ffmpeg==0.4.9 # Often a helpful specific version for moviepy
|
19 |
+
```**Note on PyTorch:** Visit [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/) to get the correct `torch`, `torchaudio`, and `torchvision` command for your specific OS and CUDA version (if you have an NVIDIA GPU). The versions above are examples. If you don't have a CUDA-enabled GPU, use CPU versions.
|
20 |
+
|
21 |
+
**3. `app.py`**
|
22 |
+
|
23 |
+
```python
|
24 |
+
import gradio as gr
|
25 |
+
import google.generativeai as genai
|
26 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
27 |
+
from transformers import AutoTokenizer
|
28 |
+
import soundfile as sf
|
29 |
+
import requests
|
30 |
+
from bs4 import BeautifulSoup
|
31 |
+
from fake_useragent import UserAgent
|
32 |
+
from moviepy.editor import (ImageClip, AudioFileClip, concatenate_audioclips,
|
33 |
+
concatenate_videoclips, CompositeVideoClip, TextClip,
|
34 |
+
VideoFileClip, vfx) # Added VideoFileClip and vfx
|
35 |
+
from googleapiclient.discovery import build
|
36 |
+
import yt_dlp
|
37 |
+
import os
|
38 |
+
import re
|
39 |
+
import time
|
40 |
+
import shutil
|
41 |
+
import random
|
42 |
+
from dotenv import load_dotenv
|
43 |
+
from urllib.parse import quote_plus
|
44 |
+
|
45 |
+
# --- CONFIGURATION ---
|
46 |
+
load_dotenv() # Load environment variables from .env file
|
47 |
+
|
48 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
49 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
50 |
+
|
51 |
+
if not GEMINI_API_KEY:
|
52 |
+
print("WARNING: GEMINI_API_KEY not found in .env file or environment.")
|
53 |
+
if not YOUTUBE_API_KEY:
|
54 |
+
print("WARNING: YOUTUBE_API_KEY not found in .env file or environment.")
|
55 |
+
|
56 |
+
TEMP_DIR = "temp_files_youtube_creator" # Unique temp dir name
|
57 |
+
SPEAKER_DESCRIPTION_FOR_TTS = "A clear, engaging, and expressive male voice with a standard American accent, speaking at a moderate pace. The recording is of high quality with minimal background noise."
|
58 |
+
IMAGES_PER_SEGMENT = 1
|
59 |
+
VIDEO_WIDTH = 1280 # Adjusted for faster processing, 1920x1080 is also good
|
60 |
+
VIDEO_HEIGHT = 720
|
61 |
+
VIDEO_FPS = 24
|
62 |
+
MAX_SCRIPT_SEGMENTS_FOR_DEMO = 5 # To keep processing time reasonable for Gradio
|
63 |
+
|
64 |
+
# --- END CONFIGURATION ---
|
65 |
+
|
66 |
+
# --- Initialize Models (Global for efficiency if Gradio doesn't reload everything) ---
|
67 |
+
gemini_model = None
|
68 |
+
parler_model = None
|
69 |
+
parler_tokenizer = None
|
70 |
+
parler_description_tokenizer = None
|
71 |
+
youtube_service = None
|
72 |
+
ua = UserAgent()
|
73 |
+
|
74 |
+
def initialize_models():
|
75 |
+
global gemini_model, parler_model, parler_tokenizer, parler_description_tokenizer, youtube_service
|
76 |
+
|
77 |
+
if GEMINI_API_KEY and gemini_model is None:
|
78 |
+
try:
|
79 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
80 |
+
gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest") # Using latest flash
|
81 |
+
print("Gemini model initialized.")
|
82 |
+
except Exception as e:
|
83 |
+
print(f"Error initializing Gemini model: {e}")
|
84 |
+
gemini_model = None # Ensure it's None if init fails
|
85 |
+
|
86 |
+
if parler_model is None:
|
87 |
+
try:
|
88 |
+
print("Loading Parler-TTS models...")
|
89 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
90 |
+
parler_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1.1").to(device)
|
91 |
+
parler_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1.1")
|
92 |
+
parler_description_tokenizer = AutoTokenizer.from_pretrained(parler_model.config.text_encoder._name_or_path)
|
93 |
+
print("Parler-TTS models loaded.")
|
94 |
+
except Exception as e:
|
95 |
+
print(f"Error initializing Parler-TTS models: {e}")
|
96 |
+
parler_model = None
|
97 |
+
|
98 |
+
if YOUTUBE_API_KEY and youtube_service is None:
|
99 |
+
try:
|
100 |
+
youtube_service = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
101 |
+
print("YouTube service initialized.")
|
102 |
+
except Exception as e:
|
103 |
+
print(f"Error initializing YouTube service: {e}")
|
104 |
+
youtube_service = None
|
105 |
+
|
106 |
+
# Call initialization at the start
|
107 |
+
initialize_models()
|
108 |
+
|
109 |
+
# --- Prompts ---
|
110 |
+
def get_idea_generation_prompt_template(niche):
|
111 |
+
return f"""
|
112 |
+
Generate 5 diverse and highly engaging YouTube video ideas for the niche: '{niche}'.
|
113 |
+
For each idea, provide:
|
114 |
+
1. **Title:** A very catchy, short, and SEO-friendly Title (max 10 words).
|
115 |
+
2. **Description:** A compelling 1-2 sentence hook.
|
116 |
+
3. **Keywords:** 3-5 specific keywords for YouTube search.
|
117 |
+
|
118 |
+
Format each idea clearly, separated by '---'.
|
119 |
+
Example:
|
120 |
+
Title: Zen Masters' Morning Secrets
|
121 |
+
Description: Unlock ancient Zen rituals for a peaceful and productive morning. Transform your day before it even begins!
|
122 |
+
Keywords: zen, morning routine, mindfulness, productivity, meditation
|
123 |
+
---
|
124 |
+
"""
|
125 |
+
|
126 |
+
def get_viral_selection_prompt_template(ideas_text):
|
127 |
+
return f"""
|
128 |
+
Analyze the following YouTube video ideas. Select the ONE idea with the highest potential for virality and broad appeal within its niche.
|
129 |
+
Consider factors like curiosity gap, emotional impact, and shareability.
|
130 |
+
Provide ONLY the Title of the selected idea. No extra text.
|
131 |
+
|
132 |
+
Video Ideas:
|
133 |
+
{ideas_text}
|
134 |
+
|
135 |
+
Most Viral Title:
|
136 |
+
"""
|
137 |
+
|
138 |
+
def get_script_generation_prompt_template(title, description, target_duration_seconds=60): # Shorter for demo
|
139 |
+
return f"""
|
140 |
+
Create a captivating YouTube video script for:
|
141 |
+
Title: "{title}"
|
142 |
+
Description: "{description}"
|
143 |
+
|
144 |
+
The script should be for a video of approximately {target_duration_seconds} seconds.
|
145 |
+
Break it into distinct scenes/segments. For each scene:
|
146 |
+
1. **VOICEOVER:** (The text to be spoken)
|
147 |
+
2. **IMAGE_KEYWORDS:** [keyword1, keyword2, visual detail] (Suggest 2-3 descriptive keywords for Unsplash image search for this scene)
|
148 |
+
|
149 |
+
The voiceover should be conversational, engaging, and clear.
|
150 |
+
Include an intro, main points, and a concluding call to action (e.g., subscribe).
|
151 |
+
Each voiceover part should be a few sentences long, suitable for a single visual scene.
|
152 |
+
|
153 |
+
Example Scene:
|
154 |
+
VOICEOVER: Imagine a world where time slows down, and every moment is an opportunity for peace. [serene landscape, misty mountains, calm lake]
|
155 |
+
---
|
156 |
+
Script:
|
157 |
+
"""
|
158 |
+
|
159 |
+
# --- Gemini Handler ---
|
160 |
+
def query_gemini(prompt_text):
|
161 |
+
if not gemini_model:
|
162 |
+
return "Error: Gemini model not initialized. Check API Key."
|
163 |
+
try:
|
164 |
+
response = gemini_model.generate_content(prompt_text)
|
165 |
+
return response.text
|
166 |
+
except Exception as e:
|
167 |
+
return f"Error calling Gemini API: {e}"
|
168 |
+
|
169 |
+
def parse_generated_ideas(text):
|
170 |
+
ideas = []
|
171 |
+
# Improved regex to handle variations and ensure all parts are captured
|
172 |
+
idea_blocks = re.split(r'\n\s*---\s*\n', text.strip())
|
173 |
+
for block in idea_blocks:
|
174 |
+
if not block.strip():
|
175 |
+
continue
|
176 |
+
title_match = re.search(r"Title:\s*(.*)", block, re.IGNORECASE)
|
177 |
+
desc_match = re.search(r"Description:\s*(.*)", block, re.IGNORECASE)
|
178 |
+
keywords_match = re.search(r"Keywords:\s*(.*)", block, re.IGNORECASE)
|
179 |
+
|
180 |
+
if title_match and desc_match:
|
181 |
+
title = title_match.group(1).strip()
|
182 |
+
description = desc_match.group(1).strip()
|
183 |
+
keywords_raw = keywords_match.group(1).strip() if keywords_match else ""
|
184 |
+
keywords = [k.strip() for k in keywords_raw.split(',') if k.strip()]
|
185 |
+
ideas.append({"title": title, "description": description, "keywords": keywords})
|
186 |
+
return ideas
|
187 |
+
|
188 |
+
def parse_generated_script(text):
|
189 |
+
segments = []
|
190 |
+
# Regex to capture VOICEOVER and IMAGE_KEYWORDS blocks
|
191 |
+
pattern = re.compile(r"VOICEOVER:\s*(.*?)\s*IMAGE_KEYWORDS:\s*\[(.*?)\]", re.DOTALL | re.IGNORECASE)
|
192 |
+
matches = pattern.findall(text)
|
193 |
+
|
194 |
+
for vo, kw_str in matches:
|
195 |
+
keywords = [k.strip() for k in kw_str.split(',') if k.strip()]
|
196 |
+
segments.append({
|
197 |
+
"voiceover": vo.strip(),
|
198 |
+
"image_keywords": keywords if keywords else ["general background"] # Default
|
199 |
+
})
|
200 |
+
if not segments and "VOICEOVER:" in text: # Fallback if structure is slightly off
|
201 |
+
parts = text.split("---")
|
202 |
+
for part in parts:
|
203 |
+
vo_match = re.search(r"VOICEOVER:\s*(.*)", part, re.DOTALL | re.IGNORECASE)
|
204 |
+
kw_match = re.search(r"IMAGE_KEYWORDS:\s*\[(.*?)\]", part, re.DOTALL | re.IGNORECASE)
|
205 |
+
if vo_match:
|
206 |
+
vo = vo_match.group(1).strip()
|
207 |
+
kws = []
|
208 |
+
if kw_match:
|
209 |
+
kws = [k.strip() for k in kw_match.group(1).split(',') if k.strip()]
|
210 |
+
segments.append({"voiceover": vo, "image_keywords": kws if kws else ["general background"]})
|
211 |
+
|
212 |
+
return segments[:MAX_SCRIPT_SEGMENTS_FOR_DEMO] # Limit for demo
|
213 |
+
|
214 |
+
# --- TTS Handler ---
|
215 |
+
def text_to_speech(text_prompt, speaker_desc, output_filename="segment_audio.wav"):
|
216 |
+
if not parler_model:
|
217 |
+
return "Error: Parler-TTS model not initialized."
|
218 |
+
|
219 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
220 |
+
try:
|
221 |
+
input_ids = parler_description_tokenizer(speaker_desc, return_tensors="pt").input_ids.to(device)
|
222 |
+
prompt_input_ids = parler_tokenizer(text_prompt, return_tensors="pt").input_ids.to(device)
|
223 |
+
|
224 |
+
generation = parler_model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, do_sample=True, temperature=0.7, repetition_penalty=1.1) # Added some generation params
|
225 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
226 |
+
|
227 |
+
full_output_path = os.path.join(TEMP_DIR, "audio_segments", output_filename)
|
228 |
+
sf.write(full_output_path, audio_arr, parler_model.config.sampling_rate)
|
229 |
+
return full_output_path
|
230 |
+
except Exception as e:
|
231 |
+
print(f"Parler-TTS Error for '{text_prompt[:30]}...': {e}")
|
232 |
+
return None
|
233 |
+
|
234 |
+
# --- Image Scraper (Improved Unsplash Scraper) ---
|
235 |
+
def fetch_unsplash_images(keywords, num_images=1):
|
236 |
+
if not keywords:
|
237 |
+
keywords = ["video background"] # More generic default
|
238 |
+
query = "+".join(quote_plus(k) for k in keywords) # URL encode keywords
|
239 |
+
# Try more specific search, e.g., landscape or portrait based on video aspect ratio
|
240 |
+
# For now, general search
|
241 |
+
search_url = f"https://unsplash.com/s/photos/{query}"
|
242 |
+
image_urls = []
|
243 |
+
downloaded_image_paths = []
|
244 |
+
|
245 |
+
headers = {'User-Agent': ua.random, 'Accept-Language': 'en-US,en;q=0.5'}
|
246 |
+
|
247 |
+
try:
|
248 |
+
print(f"Searching Unsplash: {search_url}")
|
249 |
+
response = requests.get(search_url, headers=headers, timeout=15)
|
250 |
+
response.raise_for_status()
|
251 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
252 |
+
|
253 |
+
# Unsplash structure is dynamic. This is a common pattern.
|
254 |
+
# Look for figure tags, then img tags within them with srcset
|
255 |
+
# Or links that contain '/photos/'
|
256 |
+
|
257 |
+
# Attempt 1: Figure tags with img having srcset (often high quality)
|
258 |
+
figures = soup.find_all('figure', itemprop="image")
|
259 |
+
for fig in figures:
|
260 |
+
img_tag = fig.find('img', srcset=True)
|
261 |
+
if img_tag:
|
262 |
+
# Get the highest resolution from srcset (often the last one)
|
263 |
+
# Example srcset: url1 300w, url2 600w, url3 1000w
|
264 |
+
srcset_parts = img_tag['srcset'].split(',')
|
265 |
+
best_url = srcset_parts[-1].strip().split(' ')[0]
|
266 |
+
if best_url not in image_urls:
|
267 |
+
image_urls.append(best_url)
|
268 |
+
if len(image_urls) >= num_images * 2: # Fetch a bit more to choose from
|
269 |
+
break
|
270 |
+
|
271 |
+
# Attempt 2: Links to photo pages (if first attempt fails or yields few)
|
272 |
+
if len(image_urls) < num_images:
|
273 |
+
links = soup.find_all('a', href=True)
|
274 |
+
for link in links:
|
275 |
+
href = link['href']
|
276 |
+
if href.startswith('/photos/') and 'plus.unsplash.com' not in href: # Avoid premium
|
277 |
+
photo_id = href.split('/')[-1].split('?')[0]
|
278 |
+
# Construct a potential direct image URL (might not always work)
|
279 |
+
# Unsplash often uses source.unsplash.com for direct links by ID
|
280 |
+
direct_img_url = f"https://source.unsplash.com/{photo_id}/{VIDEO_WIDTH}x{VIDEO_HEIGHT}"
|
281 |
+
if direct_img_url not in image_urls:
|
282 |
+
image_urls.append(direct_img_url)
|
283 |
+
if len(image_urls) >= num_images * 2:
|
284 |
+
break
|
285 |
+
|
286 |
+
# Attempt 3: Generic placeholder if all else fails
|
287 |
+
if not image_urls:
|
288 |
+
print("Using placeholder image as Unsplash scraping yielded no results.")
|
289 |
+
for i in range(num_images):
|
290 |
+
downloaded_image_paths.append(get_placeholder_images(keywords, 1)[0]) # Use the placeholder fn
|
291 |
+
return downloaded_image_paths
|
292 |
+
|
293 |
+
|
294 |
+
print(f"Found {len(image_urls)} potential image URLs for '{query}'. Downloading {num_images}...")
|
295 |
+
|
296 |
+
os.makedirs(os.path.join(TEMP_DIR, "images"), exist_ok=True)
|
297 |
+
|
298 |
+
selected_urls = random.sample(image_urls, min(num_images, len(image_urls)))
|
299 |
+
|
300 |
+
for i, img_url in enumerate(selected_urls):
|
301 |
+
try:
|
302 |
+
time.sleep(random.uniform(0.5, 1.5)) # Respectful delay
|
303 |
+
img_response = requests.get(img_url, headers=headers, timeout=10, stream=True)
|
304 |
+
img_response.raise_for_status()
|
305 |
+
|
306 |
+
# Sanitize filename from keywords
|
307 |
+
safe_keywords = "".join(c if c.isalnum() else "_" for c in "_".join(keywords))
|
308 |
+
filename = f"unsplash_{safe_keywords}_{i}.jpg"
|
309 |
+
filepath = os.path.join(TEMP_DIR, "images", filename)
|
310 |
+
|
311 |
+
with open(filepath, 'wb') as f:
|
312 |
+
for chunk in img_response.iter_content(chunk_size=8192):
|
313 |
+
f.write(chunk)
|
314 |
+
downloaded_image_paths.append(filepath)
|
315 |
+
print(f"Downloaded: {filepath}")
|
316 |
+
except Exception as e_img:
|
317 |
+
print(f"Failed to download image {img_url}: {e_img}")
|
318 |
+
|
319 |
+
except requests.exceptions.RequestException as e_req:
|
320 |
+
print(f"Request error scraping Unsplash for '{query}': {e_req}")
|
321 |
+
except Exception as e_gen:
|
322 |
+
print(f"General error scraping Unsplash: {e_gen}")
|
323 |
+
|
324 |
+
# If not enough images downloaded, fill with placeholders
|
325 |
+
while len(downloaded_image_paths) < num_images:
|
326 |
+
print("Not enough images from Unsplash, adding placeholder.")
|
327 |
+
placeholder = get_placeholder_images(["generic"], 1)
|
328 |
+
if placeholder:
|
329 |
+
downloaded_image_paths.append(placeholder[0])
|
330 |
+
else: # Absolute fallback
|
331 |
+
break
|
332 |
+
|
333 |
+
return downloaded_image_paths
|
334 |
+
|
335 |
+
|
336 |
+
# --- Music Handler ---
|
337 |
+
def find_and_download_music(keywords, output_dir=TEMP_DIR):
|
338 |
+
if not youtube_service:
|
339 |
+
return "Error: YouTube service not initialized. Check API Key.", None
|
340 |
+
|
341 |
+
search_query = " ".join(keywords) + " copyright free instrumental background music"
|
342 |
+
try:
|
343 |
+
search_response = youtube_service.search().list(
|
344 |
+
q=search_query,
|
345 |
+
part='id,snippet',
|
346 |
+
maxResults=5, # Get a few options
|
347 |
+
type='video',
|
348 |
+
videoLicense='creativeCommon'
|
349 |
+
).execute()
|
350 |
+
|
351 |
+
if not search_response.get('items'):
|
352 |
+
return "No Creative Commons music found on YouTube.", None
|
353 |
+
|
354 |
+
# Simple selection: pick the first one. Could add logic to pick based on duration, views etc.
|
355 |
+
video = search_response['items'][0]
|
356 |
+
video_id = video['id']['videoId']
|
357 |
+
video_title = video['snippet']['title']
|
358 |
+
|
359 |
+
status_msg = f"Found music: '{video_title}'. Downloading..."
|
360 |
+
print(status_msg)
|
361 |
+
|
362 |
+
audio_path = os.path.join(output_dir, "background_music.mp3")
|
363 |
+
ydl_opts = {
|
364 |
+
'format': 'bestaudio/best',
|
365 |
+
'outtmpl': audio_path,
|
366 |
+
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
|
367 |
+
'quiet': True, 'no_warnings': True
|
368 |
+
}
|
369 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
370 |
+
ydl.download([f"https://www.youtube.com/watch?v={video_id}"])
|
371 |
+
|
372 |
+
return f"{status_msg} Downloaded to {audio_path}", audio_path
|
373 |
+
except Exception as e:
|
374 |
+
return f"Error fetching/downloading music: {e}", None
|
375 |
+
|
376 |
+
# --- Video Processor ---
|
377 |
+
def create_video(image_paths, voiceover_audio_paths, script_segments, background_music_path=None):
|
378 |
+
video_clips_list = []
|
379 |
+
min_segment_duration = 2.0 # Minimum duration for a scene
|
380 |
+
|
381 |
+
if not image_paths or not voiceover_audio_paths or len(image_paths) != len(voiceover_audio_paths) or len(voiceover_audio_paths) != len(script_segments):
|
382 |
+
комплектация print(f"Warning: Mismatch in number of images ({len(image_paths)}), voiceovers ({len(voiceover_audio_paths)}), or script segments ({len(script_segments)}). Adjusting.")
|
383 |
+
# This needs careful handling. For demo, we'll try to proceed with minimum available.
|
384 |
+
num_segments = min(len(image_paths), len(voiceover_audio_paths), len(script_segments))
|
385 |
+
if num_segments == 0:
|
386 |
+
return "Error: Not enough assets to create video segments.", None
|
387 |
+
image_paths = image_paths[:num_segments]
|
388 |
+
voiceover_audio_paths = voiceover_audio_paths[:num_segments]
|
389 |
+
script_segments = script_segments[:num_segments]
|
390 |
+
|
391 |
+
|
392 |
+
for i in range(len(voiceover_audio_paths)):
|
393 |
+
img_path = image_paths[i]
|
394 |
+
vo_path = voiceover_audio_paths[i]
|
395 |
+
script_text = script_segments[i]['voiceover']
|
396 |
+
|
397 |
+
try:
|
398 |
+
audio_clip = AudioFileClip(vo_path)
|
399 |
+
# Ensure segment duration is at least min_segment_duration
|
400 |
+
segment_dur = max(audio_clip.duration, min_segment_duration)
|
401 |
+
|
402 |
+
# Image with Ken Burns effect (simple zoom and pan)
|
403 |
+
img = (ImageClip(img_path)
|
404 |
+
.set_duration(segment_dur)
|
405 |
+
.resize(height=VIDEO_HEIGHT) # Resize to fit height
|
406 |
+
.set_fps(VIDEO_FPS))
|
407 |
+
|
408 |
+
# Make image slightly larger for Ken Burns
|
409 |
+
img_zoomed = img.resize(1.2) # Zoom by 20%
|
410 |
+
|
411 |
+
# Pan from left to right (or other variations)
|
412 |
+
# img_animated = img_zoomed.set_position(lambda t: (-(img_zoomed.w - VIDEO_WIDTH) * (t / segment_dur), 'center'))
|
413 |
+
# Simpler: Crop to create a slight zoom/pan effect
|
414 |
+
img_animated = img_zoomed.fx(vfx.crop, width=VIDEO_WIDTH, height=VIDEO_HEIGHT, x_center=img_zoomed.w/2, y_center=img_zoomed.h/2)
|
415 |
+
|
416 |
+
# Subtitle styling (more polished)
|
417 |
+
txt = (TextClip(script_text, fontsize=30, color='yellow', font='Arial-Unicode-MS', # Try a font known for good char support
|
418 |
+
bg_color='rgba(0,0,0,0.5)', size=(VIDEO_WIDTH*0.9, None),
|
419 |
+
method='caption', align='South')
|
420 |
+
.set_duration(audio_clip.duration) # Sync with actual voiceover length
|
421 |
+
.set_start(0) # Start text when audio starts
|
422 |
+
.set_position(('center', 'bottom')))
|
423 |
+
|
424 |
+
video_segment = CompositeVideoClip([img_animated, txt], size=(VIDEO_WIDTH, VIDEO_HEIGHT)).set_audio(audio_clip)
|
425 |
+
video_clips_list.append(video_segment)
|
426 |
+
|
427 |
+
except Exception as e:
|
428 |
+
print(f"Error processing segment {i+1} with image {img_path} and audio {vo_path}: {e}")
|
429 |
+
continue # Skip problematic segment
|
430 |
+
|
431 |
+
if not video_clips_list:
|
432 |
+
return "Error: No video segments could be created.", None
|
433 |
+
|
434 |
+
final_vid = concatenate_videoclips(video_clips_list, method="compose", transition=VideoFileClip.crossfadein(0.5)) # Crossfade transition
|
435 |
+
|
436 |
+
if background_music_path and os.path.exists(background_music_path):
|
437 |
+
music = AudioFileClip(background_music_path).volumex(0.15) # Lower volume
|
438 |
+
if music.duration > final_vid.duration:
|
439 |
+
music = music.subclip(0, final_vid.duration)
|
440 |
+
|
441 |
+
# Ensure final_vid has an audio track before composing
|
442 |
+
if final_vid.audio is None and video_clips_list and video_clips_list[0].audio:
|
443 |
+
# If concatenate_videoclips dropped audio, re-add from first segment (or combine all)
|
444 |
+
# This can happen if first clip has no audio. Better to combine all VOs first.
|
445 |
+
combined_vo = concatenate_audioclips([vc.audio for vc in video_clips_list if vc.audio])
|
446 |
+
final_vid = final_vid.set_audio(combined_vo)
|
447 |
+
|
448 |
+
if final_vid.audio: # Check again
|
449 |
+
final_audio = CompositeAudioClip([final_vid.audio, music])
|
450 |
+
final_vid = final_vid.set_audio(final_audio)
|
451 |
+
else:
|
452 |
+
print("Warning: Final video has no primary audio track to mix music with.")
|
453 |
+
final_vid = final_vid.set_audio(music) # Use only music if no VOs
|
454 |
+
|
455 |
+
output_filepath = os.path.join(TEMP_DIR, "final_output_video.mp4")
|
456 |
+
try:
|
457 |
+
final_vid.write_videofile(output_filepath, codec="libx264", audio_codec="aac", fps=VIDEO_FPS, threads=4, preset='medium') # Added threads and preset
|
458 |
+
return f"Video created: {output_filepath}", output_filepath
|
459 |
+
except Exception as e:
|
460 |
+
return f"Error writing final video: {e}", None
|
461 |
+
finally:
|
462 |
+
# Close all clips
|
463 |
+
for clip in video_clips_list:
|
464 |
+
if clip.audio: clip.audio.close()
|
465 |
+
clip.close()
|
466 |
+
if 'music' in locals() and music.reader: music.close()
|
467 |
+
if final_vid.audio: final_vid.audio.close()
|
468 |
+
if final_vid.reader: final_vid.close()
|
469 |
+
|
470 |
+
|
471 |
+
# --- Main Gradio Function ---
|
472 |
+
def generate_youtube_video(niche_input, progress=gr.Progress(track_tqdm=True)):
|
473 |
+
if not GEMINI_API_KEY or not YOUTUBE_API_KEY or not parler_model or not youtube_service or not gemini_model:
|
474 |
+
missing = []
|
475 |
+
if not GEMINI_API_KEY: missing.append("Gemini API Key")
|
476 |
+
if not YOUTUBE_API_KEY: missing.append("YouTube API Key")
|
477 |
+
if not parler_model: missing.append("Parler-TTS models")
|
478 |
+
if not youtube_service: missing.append("YouTube service")
|
479 |
+
if not gemini_model: missing.append("Gemini service")
|
480 |
+
return None, f"ERROR: Required services/API keys not initialized: {', '.join(missing)}. Please check your .env file and console logs."
|
481 |
+
|
482 |
+
cleanup_temp_files()
|
483 |
+
log_messages = ["Process Started...\n"]
|
484 |
+
|
485 |
+
progress(0.05, desc="Generating video ideas...")
|
486 |
+
log_messages.append("1. Generating Video Ideas...")
|
487 |
+
ideas_prompt = get_idea_generation_prompt_template(niche_input)
|
488 |
+
raw_ideas_text = query_gemini(ideas_prompt)
|
489 |
+
if "Error:" in raw_ideas_text:
|
490 |
+
log_messages.append(raw_ideas_text)
|
491 |
+
return None, "\n".join(log_messages)
|
492 |
+
|
493 |
+
parsed_ideas = parse_generated_ideas(raw_ideas_text)
|
494 |
+
if not parsed_ideas:
|
495 |
+
log_messages.append("Error: No ideas parsed from Gemini response.")
|
496 |
+
return None, "\n".join(log_messages)
|
497 |
+
log_messages.append(f"Generated {len(parsed_ideas)} ideas.")
|
498 |
+
# For UI, let's display the ideas (optional)
|
499 |
+
# log_messages.append("Ideas:\n" + "\n".join([f"- {i['title']}" for i in parsed_ideas]))
|
500 |
+
|
501 |
+
|
502 |
+
progress(0.15, desc="Selecting viral idea...")
|
503 |
+
log_messages.append("\n2. Selecting Most Viral Idea...")
|
504 |
+
ideas_for_selection_prompt = "\n---\n".join([f"Title: {i['title']}\nDescription: {i['description']}" for i in parsed_ideas])
|
505 |
+
selection_prompt = get_viral_selection_prompt_template(ideas_for_selection_prompt)
|
506 |
+
selected_title_raw = query_gemini(selection_prompt)
|
507 |
+
if "Error:" in selected_title_raw:
|
508 |
+
log_messages.append(f"Error selecting idea: {selected_title_raw}. Using first idea.")
|
509 |
+
chosen_idea = parsed_ideas[0]
|
510 |
+
else:
|
511 |
+
selected_title = selected_title_raw.replace("Most Viral Title:", "").strip()
|
512 |
+
chosen_idea = next((idea for idea in parsed_ideas if idea["title"].strip().lower() == selected_title.lower()), parsed_ideas[0])
|
513 |
+
log_messages.append(f"Chosen Idea: '{chosen_idea['title']}'")
|
514 |
+
|
515 |
+
progress(0.25, desc="Generating script...")
|
516 |
+
log_messages.append(f"\n3. Generating Script for '{chosen_idea['title']}'...")
|
517 |
+
script_prompt = get_script_generation_prompt_template(chosen_idea['title'], chosen_idea['description'])
|
518 |
+
raw_script_text = query_gemini(script_prompt)
|
519 |
+
if "Error:" in raw_script_text:
|
520 |
+
log_messages.append(raw_script_text)
|
521 |
+
химический return None, "\n".join(log_messages)
|
522 |
+
|
523 |
+
script_segments = parse_generated_script(raw_script_text)
|
524 |
+
if not script_segments:
|
525 |
+
log_messages.append("Error: No script segments parsed.")
|
526 |
+
return None, "\n".join(log_messages)
|
527 |
+
log_messages.append(f"Script generated with {len(script_segments)} segments (limited to {MAX_SCRIPT_SEGMENTS_FOR_DEMO} for demo).")
|
528 |
+
|
529 |
+
progress(0.40, desc="Generating voiceovers...")
|
530 |
+
log_messages.append("\n4. Generating Voiceovers...")
|
531 |
+
voiceover_paths = []
|
532 |
+
for i, segment in enumerate(progress.tqdm(script_segments, desc="TTS Progress")):
|
533 |
+
vo_text = segment['voiceover']
|
534 |
+
if not vo_text: continue # Skip if no voiceover text
|
535 |
+
audio_filename = f"segment_{i+1}_audio.wav"
|
536 |
+
path = text_to_speech(vo_text, SPEAKER_DESCRIPTION_FOR_TTS, audio_filename)
|
537 |
+
if path:
|
538 |
+
voiceover_paths.append(path)
|
539 |
+
log_messages.append(f" - Voiceover for segment {i+1} created.")
|
540 |
+
else:
|
541 |
+
log_messages.append(f" - Failed voiceover for segment {i+1}.")
|
542 |
+
if not voiceover_paths or len(voiceover_paths) < len(script_segments):
|
543 |
+
log_messages.append("Warning: Not all voiceovers could be generated.")
|
544 |
+
if not voiceover_paths:
|
545 |
+
return None, "\n".join(log_messages) # Critical failure if NO voiceovers
|
546 |
+
|
547 |
+
progress(0.60, desc="Fetching images...")
|
548 |
+
log_messages.append("\n5. Fetching Images...")
|
549 |
+
all_image_paths_for_video = []
|
550 |
+
for i, segment in enumerate(progress.tqdm(script_segments, desc="Image Fetching")):
|
551 |
+
keywords = segment['image_keywords']
|
552 |
+
if not keywords: keywords = [chosen_idea['title']] # Fallback to title
|
553 |
+
|
554 |
+
# Fetch one image per segment
|
555 |
+
img_path_list = fetch_unsplash_images(keywords, num_images=IMAGES_PER_SEGMENT)
|
556 |
+
if img_path_list:
|
557 |
+
all_image_paths_for_video.append(img_path_list[0]) # Take the first image found
|
558 |
+
log_messages.append(f" - Image for segment {i+1} using keywords '{', '.join(keywords)}' fetched: {os.path.basename(img_path_list[0])}")
|
559 |
+
else:
|
560 |
+
log_messages.append(f" - No image found for segment {i+1} with keywords '{', '.join(keywords)}'. Using placeholder.")
|
561 |
+
placeholder_img = get_placeholder_images(keywords,1) # Use the function that creates/downloads a placeholder
|
562 |
+
if placeholder_img:
|
563 |
+
all_image_paths_for_video.append(placeholder_img[0])
|
564 |
+
else: # Absolute fallback
|
565 |
+
log_messages.append(" - CRITICAL: Could not get even a placeholder image. Video might fail.")
|
566 |
+
# For robustness, ensure a default image exists if this happens
|
567 |
+
default_img_path = os.path.join(TEMP_DIR, "images", "default_img.jpg")
|
568 |
+
if not os.path.exists(default_img_path): # Create a dummy if it doesn't exist
|
569 |
+
try:
|
570 |
+
from PIL import Image
|
571 |
+
Image.new('RGB', (VIDEO_WIDTH, VIDEO_HEIGHT), color = 'black').save(default_img_path)
|
572 |
+
all_image_paths_for_video.append(default_img_path)
|
573 |
+
except ImportError:
|
574 |
+
log_messages.append("PIL/Pillow not installed, cannot create dummy image.")
|
575 |
+
return None, "\n".join(log_messages) # Can't proceed without images
|
576 |
+
else:
|
577 |
+
all_image_paths_for_video.append(default_img_path)
|
578 |
+
|
579 |
+
|
580 |
+
if len(all_image_paths_for_video) < len(voiceover_paths):
|
581 |
+
log_messages.append("Warning: Not enough images fetched for all voiceover segments. Video might be shorter or reuse images.")
|
582 |
+
# Pad with last image if necessary, or a default
|
583 |
+
while len(all_image_paths_for_video) < len(voiceover_paths) and all_image_paths_for_video:
|
584 |
+
all_image_paths_for_video.append(all_image_paths_for_video[-1])
|
585 |
+
if not all_image_paths_for_video: # Still no images
|
586 |
+
log_messages.append("Fatal Error: No images available for video creation.")
|
587 |
+
return None, "\n".join(log_messages)
|
588 |
+
|
589 |
+
|
590 |
+
progress(0.75, desc="Finding background music...")
|
591 |
+
log_messages.append("\n6. Finding Background Music...")
|
592 |
+
music_search_keywords = chosen_idea.get("keywords", []) + [niche_input, "cinematic", "calm"]
|
593 |
+
music_status, music_file_path = find_and_download_music(music_search_keywords)
|
594 |
+
log_messages.append(f" - {music_status}")
|
595 |
+
|
596 |
+
progress(0.85, desc="Assembling video...")
|
597 |
+
log_messages.append("\n7. Assembling Video...")
|
598 |
+
# Make sure number of images matches number of VOs for the video processor
|
599 |
+
# The video processor already has some logic, but let's be explicit here
|
600 |
+
final_images = all_image_paths_for_video[:len(voiceover_paths)]
|
601 |
+
|
602 |
+
|
603 |
+
video_status, final_video_path = create_video(final_images, voiceover_paths, script_segments, music_file_path)
|
604 |
+
log_messages.append(f" - {video_status}")
|
605 |
+
|
606 |
+
if not final_video_path:
|
607 |
+
return None, "\n".join(log_messages)
|
608 |
+
|
609 |
+
progress(1.0, desc="Process Complete!")
|
610 |
+
log_messages.append("\nProcess Complete! Video ready.")
|
611 |
+
return final_video_path, "\n".join(log_messages)
|
612 |
+
|
613 |
+
|
614 |
+
# --- Gradio UI ---
|
615 |
+
css = """
|
616 |
+
.gradio-container { font-family: 'Roboto', sans-serif; }
|
617 |
+
.gr-button { background-color: #FF7F50; color: white; border-radius: 8px; }
|
618 |
+
.gr-button:hover { background-color: #FF6347; }
|
619 |
+
footer {display: none !important;}
|
620 |
+
""" # Hide default Gradio footer
|
621 |
+
|
622 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="red"), css=css) as demo:
|
623 |
+
gr.Markdown(
|
624 |
+
"""
|
625 |
+
<div style="text-align: center;">
|
626 |
+
<img src="https://i.imgur.com/J20hQ9h.png" alt="RoboNuggets Logo" style="width:100px; height:auto; margin-bottom: 5px;">
|
627 |
+
<h1>AI YouTube Video Creator (R28 LongForm Style)</h1>
|
628 |
+
<p>Automate your YouTube content creation! Enter a niche, and let AI handle the rest.</p>
|
629 |
+
</div>
|
630 |
+
"""
|
631 |
+
)
|
632 |
+
|
633 |
+
with gr.Row():
|
634 |
+
niche_textbox = gr.Textbox(
|
635 |
+
label="Enter Video Niche or Specific Topic",
|
636 |
+
placeholder="e.g., 'The Philosophy of Stoicism for Modern Life', 'Beginner's Guide to Urban Gardening'",
|
637 |
+
value="The Stoic Lion: Finding Calm in Chaos" # Default value from video
|
638 |
+
)
|
639 |
+
|
640 |
+
create_button = gr.Button("✨ Create Video ✨", variant="primary")
|
641 |
+
|
642 |
+
with gr.Accordion("📊 Process Log & Output", open=True):
|
643 |
+
log_output = gr.Textbox(label="Log", lines=15, interactive=False, placeholder="Process updates will appear here...")
|
644 |
+
video_output = gr.Video(label="Generated Video")
|
645 |
+
|
646 |
+
create_button.click(
|
647 |
+
fn=generate_youtube_video,
|
648 |
+
inputs=[niche_textbox],
|
649 |
+
outputs=[video_output, log_output]
|
650 |
+
)
|
651 |
+
|
652 |
+
gr.Markdown(
|
653 |
+
"""
|
654 |
+
---
|
655 |
+
*Powered by RoboNuggets AI*
|
656 |
+
*(Note: This is a demo. Image scraping from Unsplash can be unreliable. Ensure API keys are set in .env)*
|
657 |
+
"""
|
658 |
+
)
|
659 |
+
|
660 |
+
if __name__ == "__main__":
|
661 |
+
# Ensure temp directory exists
|
662 |
+
os.makedirs(os.path.join(TEMP_DIR, "images"), exist_ok=True)
|
663 |
+
os.makedirs(os.path.join(TEMP_DIR, "audio_segments"), exist_ok=True)
|
664 |
+
|
665 |
+
print("Starting Gradio App...")
|
666 |
+
demo.launch(debug=True, share=False) # share=True for public link (use with caution and ngrok)
|