garyuzair commited on
Commit
338ce5e
·
verified ·
1 Parent(s): 1185a67

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +666 -0
app.py ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.29.0
2
+ google-generativeai==0.7.0
3
+ parler-tts==0.1.1
4
+ transformers==4.40.1
5
+ torch==2.2.2 # Or CPU version: torch==2.2.2+cpu (check PyTorch website for your OS)
6
+ torchaudio==2.2.2 # Or CPU version
7
+ torchvision==0.17.2 # Or CPU version
8
+ soundfile==0.12.1
9
+ requests==2.31.0
10
+ beautifulsoup4==4.12.3
11
+ fake_useragent==1.5.1
12
+ moviepy==1.0.3
13
+ google-api-python-client==2.126.0
14
+ google-auth-oauthlib==1.2.0
15
+ google-auth-httplib2==0.2.0
16
+ yt-dlp==2024.04.09
17
+ python-dotenv==1.0.1
18
+ imageio-ffmpeg==0.4.9 # Often a helpful specific version for moviepy
19
+ ```**Note on PyTorch:** Visit [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/) to get the correct `torch`, `torchaudio`, and `torchvision` command for your specific OS and CUDA version (if you have an NVIDIA GPU). The versions above are examples. If you don't have a CUDA-enabled GPU, use CPU versions.
20
+
21
+ **3. `app.py`**
22
+
23
+ ```python
24
+ import gradio as gr
25
+ import google.generativeai as genai
26
+ from parler_tts import ParlerTTSForConditionalGeneration
27
+ from transformers import AutoTokenizer
28
+ import soundfile as sf
29
+ import requests
30
+ from bs4 import BeautifulSoup
31
+ from fake_useragent import UserAgent
32
+ from moviepy.editor import (ImageClip, AudioFileClip, concatenate_audioclips,
33
+ concatenate_videoclips, CompositeVideoClip, TextClip,
34
+ VideoFileClip, vfx) # Added VideoFileClip and vfx
35
+ from googleapiclient.discovery import build
36
+ import yt_dlp
37
+ import os
38
+ import re
39
+ import time
40
+ import shutil
41
+ import random
42
+ from dotenv import load_dotenv
43
+ from urllib.parse import quote_plus
44
+
45
+ # --- CONFIGURATION ---
46
+ load_dotenv() # Load environment variables from .env file
47
+
48
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
49
+ YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
50
+
51
+ if not GEMINI_API_KEY:
52
+ print("WARNING: GEMINI_API_KEY not found in .env file or environment.")
53
+ if not YOUTUBE_API_KEY:
54
+ print("WARNING: YOUTUBE_API_KEY not found in .env file or environment.")
55
+
56
+ TEMP_DIR = "temp_files_youtube_creator" # Unique temp dir name
57
+ SPEAKER_DESCRIPTION_FOR_TTS = "A clear, engaging, and expressive male voice with a standard American accent, speaking at a moderate pace. The recording is of high quality with minimal background noise."
58
+ IMAGES_PER_SEGMENT = 1
59
+ VIDEO_WIDTH = 1280 # Adjusted for faster processing, 1920x1080 is also good
60
+ VIDEO_HEIGHT = 720
61
+ VIDEO_FPS = 24
62
+ MAX_SCRIPT_SEGMENTS_FOR_DEMO = 5 # To keep processing time reasonable for Gradio
63
+
64
+ # --- END CONFIGURATION ---
65
+
66
+ # --- Initialize Models (Global for efficiency if Gradio doesn't reload everything) ---
67
+ gemini_model = None
68
+ parler_model = None
69
+ parler_tokenizer = None
70
+ parler_description_tokenizer = None
71
+ youtube_service = None
72
+ ua = UserAgent()
73
+
74
+ def initialize_models():
75
+ global gemini_model, parler_model, parler_tokenizer, parler_description_tokenizer, youtube_service
76
+
77
+ if GEMINI_API_KEY and gemini_model is None:
78
+ try:
79
+ genai.configure(api_key=GEMINI_API_KEY)
80
+ gemini_model = genai.GenerativeModel("gemini-1.5-flash-latest") # Using latest flash
81
+ print("Gemini model initialized.")
82
+ except Exception as e:
83
+ print(f"Error initializing Gemini model: {e}")
84
+ gemini_model = None # Ensure it's None if init fails
85
+
86
+ if parler_model is None:
87
+ try:
88
+ print("Loading Parler-TTS models...")
89
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
90
+ parler_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1.1").to(device)
91
+ parler_tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1.1")
92
+ parler_description_tokenizer = AutoTokenizer.from_pretrained(parler_model.config.text_encoder._name_or_path)
93
+ print("Parler-TTS models loaded.")
94
+ except Exception as e:
95
+ print(f"Error initializing Parler-TTS models: {e}")
96
+ parler_model = None
97
+
98
+ if YOUTUBE_API_KEY and youtube_service is None:
99
+ try:
100
+ youtube_service = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
101
+ print("YouTube service initialized.")
102
+ except Exception as e:
103
+ print(f"Error initializing YouTube service: {e}")
104
+ youtube_service = None
105
+
106
+ # Call initialization at the start
107
+ initialize_models()
108
+
109
+ # --- Prompts ---
110
+ def get_idea_generation_prompt_template(niche):
111
+ return f"""
112
+ Generate 5 diverse and highly engaging YouTube video ideas for the niche: '{niche}'.
113
+ For each idea, provide:
114
+ 1. **Title:** A very catchy, short, and SEO-friendly Title (max 10 words).
115
+ 2. **Description:** A compelling 1-2 sentence hook.
116
+ 3. **Keywords:** 3-5 specific keywords for YouTube search.
117
+
118
+ Format each idea clearly, separated by '---'.
119
+ Example:
120
+ Title: Zen Masters' Morning Secrets
121
+ Description: Unlock ancient Zen rituals for a peaceful and productive morning. Transform your day before it even begins!
122
+ Keywords: zen, morning routine, mindfulness, productivity, meditation
123
+ ---
124
+ """
125
+
126
+ def get_viral_selection_prompt_template(ideas_text):
127
+ return f"""
128
+ Analyze the following YouTube video ideas. Select the ONE idea with the highest potential for virality and broad appeal within its niche.
129
+ Consider factors like curiosity gap, emotional impact, and shareability.
130
+ Provide ONLY the Title of the selected idea. No extra text.
131
+
132
+ Video Ideas:
133
+ {ideas_text}
134
+
135
+ Most Viral Title:
136
+ """
137
+
138
+ def get_script_generation_prompt_template(title, description, target_duration_seconds=60): # Shorter for demo
139
+ return f"""
140
+ Create a captivating YouTube video script for:
141
+ Title: "{title}"
142
+ Description: "{description}"
143
+
144
+ The script should be for a video of approximately {target_duration_seconds} seconds.
145
+ Break it into distinct scenes/segments. For each scene:
146
+ 1. **VOICEOVER:** (The text to be spoken)
147
+ 2. **IMAGE_KEYWORDS:** [keyword1, keyword2, visual detail] (Suggest 2-3 descriptive keywords for Unsplash image search for this scene)
148
+
149
+ The voiceover should be conversational, engaging, and clear.
150
+ Include an intro, main points, and a concluding call to action (e.g., subscribe).
151
+ Each voiceover part should be a few sentences long, suitable for a single visual scene.
152
+
153
+ Example Scene:
154
+ VOICEOVER: Imagine a world where time slows down, and every moment is an opportunity for peace. [serene landscape, misty mountains, calm lake]
155
+ ---
156
+ Script:
157
+ """
158
+
159
+ # --- Gemini Handler ---
160
+ def query_gemini(prompt_text):
161
+ if not gemini_model:
162
+ return "Error: Gemini model not initialized. Check API Key."
163
+ try:
164
+ response = gemini_model.generate_content(prompt_text)
165
+ return response.text
166
+ except Exception as e:
167
+ return f"Error calling Gemini API: {e}"
168
+
169
+ def parse_generated_ideas(text):
170
+ ideas = []
171
+ # Improved regex to handle variations and ensure all parts are captured
172
+ idea_blocks = re.split(r'\n\s*---\s*\n', text.strip())
173
+ for block in idea_blocks:
174
+ if not block.strip():
175
+ continue
176
+ title_match = re.search(r"Title:\s*(.*)", block, re.IGNORECASE)
177
+ desc_match = re.search(r"Description:\s*(.*)", block, re.IGNORECASE)
178
+ keywords_match = re.search(r"Keywords:\s*(.*)", block, re.IGNORECASE)
179
+
180
+ if title_match and desc_match:
181
+ title = title_match.group(1).strip()
182
+ description = desc_match.group(1).strip()
183
+ keywords_raw = keywords_match.group(1).strip() if keywords_match else ""
184
+ keywords = [k.strip() for k in keywords_raw.split(',') if k.strip()]
185
+ ideas.append({"title": title, "description": description, "keywords": keywords})
186
+ return ideas
187
+
188
+ def parse_generated_script(text):
189
+ segments = []
190
+ # Regex to capture VOICEOVER and IMAGE_KEYWORDS blocks
191
+ pattern = re.compile(r"VOICEOVER:\s*(.*?)\s*IMAGE_KEYWORDS:\s*\[(.*?)\]", re.DOTALL | re.IGNORECASE)
192
+ matches = pattern.findall(text)
193
+
194
+ for vo, kw_str in matches:
195
+ keywords = [k.strip() for k in kw_str.split(',') if k.strip()]
196
+ segments.append({
197
+ "voiceover": vo.strip(),
198
+ "image_keywords": keywords if keywords else ["general background"] # Default
199
+ })
200
+ if not segments and "VOICEOVER:" in text: # Fallback if structure is slightly off
201
+ parts = text.split("---")
202
+ for part in parts:
203
+ vo_match = re.search(r"VOICEOVER:\s*(.*)", part, re.DOTALL | re.IGNORECASE)
204
+ kw_match = re.search(r"IMAGE_KEYWORDS:\s*\[(.*?)\]", part, re.DOTALL | re.IGNORECASE)
205
+ if vo_match:
206
+ vo = vo_match.group(1).strip()
207
+ kws = []
208
+ if kw_match:
209
+ kws = [k.strip() for k in kw_match.group(1).split(',') if k.strip()]
210
+ segments.append({"voiceover": vo, "image_keywords": kws if kws else ["general background"]})
211
+
212
+ return segments[:MAX_SCRIPT_SEGMENTS_FOR_DEMO] # Limit for demo
213
+
214
+ # --- TTS Handler ---
215
+ def text_to_speech(text_prompt, speaker_desc, output_filename="segment_audio.wav"):
216
+ if not parler_model:
217
+ return "Error: Parler-TTS model not initialized."
218
+
219
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
220
+ try:
221
+ input_ids = parler_description_tokenizer(speaker_desc, return_tensors="pt").input_ids.to(device)
222
+ prompt_input_ids = parler_tokenizer(text_prompt, return_tensors="pt").input_ids.to(device)
223
+
224
+ generation = parler_model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, do_sample=True, temperature=0.7, repetition_penalty=1.1) # Added some generation params
225
+ audio_arr = generation.cpu().numpy().squeeze()
226
+
227
+ full_output_path = os.path.join(TEMP_DIR, "audio_segments", output_filename)
228
+ sf.write(full_output_path, audio_arr, parler_model.config.sampling_rate)
229
+ return full_output_path
230
+ except Exception as e:
231
+ print(f"Parler-TTS Error for '{text_prompt[:30]}...': {e}")
232
+ return None
233
+
234
+ # --- Image Scraper (Improved Unsplash Scraper) ---
235
+ def fetch_unsplash_images(keywords, num_images=1):
236
+ if not keywords:
237
+ keywords = ["video background"] # More generic default
238
+ query = "+".join(quote_plus(k) for k in keywords) # URL encode keywords
239
+ # Try more specific search, e.g., landscape or portrait based on video aspect ratio
240
+ # For now, general search
241
+ search_url = f"https://unsplash.com/s/photos/{query}"
242
+ image_urls = []
243
+ downloaded_image_paths = []
244
+
245
+ headers = {'User-Agent': ua.random, 'Accept-Language': 'en-US,en;q=0.5'}
246
+
247
+ try:
248
+ print(f"Searching Unsplash: {search_url}")
249
+ response = requests.get(search_url, headers=headers, timeout=15)
250
+ response.raise_for_status()
251
+ soup = BeautifulSoup(response.content, 'html.parser')
252
+
253
+ # Unsplash structure is dynamic. This is a common pattern.
254
+ # Look for figure tags, then img tags within them with srcset
255
+ # Or links that contain '/photos/'
256
+
257
+ # Attempt 1: Figure tags with img having srcset (often high quality)
258
+ figures = soup.find_all('figure', itemprop="image")
259
+ for fig in figures:
260
+ img_tag = fig.find('img', srcset=True)
261
+ if img_tag:
262
+ # Get the highest resolution from srcset (often the last one)
263
+ # Example srcset: url1 300w, url2 600w, url3 1000w
264
+ srcset_parts = img_tag['srcset'].split(',')
265
+ best_url = srcset_parts[-1].strip().split(' ')[0]
266
+ if best_url not in image_urls:
267
+ image_urls.append(best_url)
268
+ if len(image_urls) >= num_images * 2: # Fetch a bit more to choose from
269
+ break
270
+
271
+ # Attempt 2: Links to photo pages (if first attempt fails or yields few)
272
+ if len(image_urls) < num_images:
273
+ links = soup.find_all('a', href=True)
274
+ for link in links:
275
+ href = link['href']
276
+ if href.startswith('/photos/') and 'plus.unsplash.com' not in href: # Avoid premium
277
+ photo_id = href.split('/')[-1].split('?')[0]
278
+ # Construct a potential direct image URL (might not always work)
279
+ # Unsplash often uses source.unsplash.com for direct links by ID
280
+ direct_img_url = f"https://source.unsplash.com/{photo_id}/{VIDEO_WIDTH}x{VIDEO_HEIGHT}"
281
+ if direct_img_url not in image_urls:
282
+ image_urls.append(direct_img_url)
283
+ if len(image_urls) >= num_images * 2:
284
+ break
285
+
286
+ # Attempt 3: Generic placeholder if all else fails
287
+ if not image_urls:
288
+ print("Using placeholder image as Unsplash scraping yielded no results.")
289
+ for i in range(num_images):
290
+ downloaded_image_paths.append(get_placeholder_images(keywords, 1)[0]) # Use the placeholder fn
291
+ return downloaded_image_paths
292
+
293
+
294
+ print(f"Found {len(image_urls)} potential image URLs for '{query}'. Downloading {num_images}...")
295
+
296
+ os.makedirs(os.path.join(TEMP_DIR, "images"), exist_ok=True)
297
+
298
+ selected_urls = random.sample(image_urls, min(num_images, len(image_urls)))
299
+
300
+ for i, img_url in enumerate(selected_urls):
301
+ try:
302
+ time.sleep(random.uniform(0.5, 1.5)) # Respectful delay
303
+ img_response = requests.get(img_url, headers=headers, timeout=10, stream=True)
304
+ img_response.raise_for_status()
305
+
306
+ # Sanitize filename from keywords
307
+ safe_keywords = "".join(c if c.isalnum() else "_" for c in "_".join(keywords))
308
+ filename = f"unsplash_{safe_keywords}_{i}.jpg"
309
+ filepath = os.path.join(TEMP_DIR, "images", filename)
310
+
311
+ with open(filepath, 'wb') as f:
312
+ for chunk in img_response.iter_content(chunk_size=8192):
313
+ f.write(chunk)
314
+ downloaded_image_paths.append(filepath)
315
+ print(f"Downloaded: {filepath}")
316
+ except Exception as e_img:
317
+ print(f"Failed to download image {img_url}: {e_img}")
318
+
319
+ except requests.exceptions.RequestException as e_req:
320
+ print(f"Request error scraping Unsplash for '{query}': {e_req}")
321
+ except Exception as e_gen:
322
+ print(f"General error scraping Unsplash: {e_gen}")
323
+
324
+ # If not enough images downloaded, fill with placeholders
325
+ while len(downloaded_image_paths) < num_images:
326
+ print("Not enough images from Unsplash, adding placeholder.")
327
+ placeholder = get_placeholder_images(["generic"], 1)
328
+ if placeholder:
329
+ downloaded_image_paths.append(placeholder[0])
330
+ else: # Absolute fallback
331
+ break
332
+
333
+ return downloaded_image_paths
334
+
335
+
336
+ # --- Music Handler ---
337
+ def find_and_download_music(keywords, output_dir=TEMP_DIR):
338
+ if not youtube_service:
339
+ return "Error: YouTube service not initialized. Check API Key.", None
340
+
341
+ search_query = " ".join(keywords) + " copyright free instrumental background music"
342
+ try:
343
+ search_response = youtube_service.search().list(
344
+ q=search_query,
345
+ part='id,snippet',
346
+ maxResults=5, # Get a few options
347
+ type='video',
348
+ videoLicense='creativeCommon'
349
+ ).execute()
350
+
351
+ if not search_response.get('items'):
352
+ return "No Creative Commons music found on YouTube.", None
353
+
354
+ # Simple selection: pick the first one. Could add logic to pick based on duration, views etc.
355
+ video = search_response['items'][0]
356
+ video_id = video['id']['videoId']
357
+ video_title = video['snippet']['title']
358
+
359
+ status_msg = f"Found music: '{video_title}'. Downloading..."
360
+ print(status_msg)
361
+
362
+ audio_path = os.path.join(output_dir, "background_music.mp3")
363
+ ydl_opts = {
364
+ 'format': 'bestaudio/best',
365
+ 'outtmpl': audio_path,
366
+ 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
367
+ 'quiet': True, 'no_warnings': True
368
+ }
369
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
370
+ ydl.download([f"https://www.youtube.com/watch?v={video_id}"])
371
+
372
+ return f"{status_msg} Downloaded to {audio_path}", audio_path
373
+ except Exception as e:
374
+ return f"Error fetching/downloading music: {e}", None
375
+
376
+ # --- Video Processor ---
377
+ def create_video(image_paths, voiceover_audio_paths, script_segments, background_music_path=None):
378
+ video_clips_list = []
379
+ min_segment_duration = 2.0 # Minimum duration for a scene
380
+
381
+ if not image_paths or not voiceover_audio_paths or len(image_paths) != len(voiceover_audio_paths) or len(voiceover_audio_paths) != len(script_segments):
382
+ комплектация print(f"Warning: Mismatch in number of images ({len(image_paths)}), voiceovers ({len(voiceover_audio_paths)}), or script segments ({len(script_segments)}). Adjusting.")
383
+ # This needs careful handling. For demo, we'll try to proceed with minimum available.
384
+ num_segments = min(len(image_paths), len(voiceover_audio_paths), len(script_segments))
385
+ if num_segments == 0:
386
+ return "Error: Not enough assets to create video segments.", None
387
+ image_paths = image_paths[:num_segments]
388
+ voiceover_audio_paths = voiceover_audio_paths[:num_segments]
389
+ script_segments = script_segments[:num_segments]
390
+
391
+
392
+ for i in range(len(voiceover_audio_paths)):
393
+ img_path = image_paths[i]
394
+ vo_path = voiceover_audio_paths[i]
395
+ script_text = script_segments[i]['voiceover']
396
+
397
+ try:
398
+ audio_clip = AudioFileClip(vo_path)
399
+ # Ensure segment duration is at least min_segment_duration
400
+ segment_dur = max(audio_clip.duration, min_segment_duration)
401
+
402
+ # Image with Ken Burns effect (simple zoom and pan)
403
+ img = (ImageClip(img_path)
404
+ .set_duration(segment_dur)
405
+ .resize(height=VIDEO_HEIGHT) # Resize to fit height
406
+ .set_fps(VIDEO_FPS))
407
+
408
+ # Make image slightly larger for Ken Burns
409
+ img_zoomed = img.resize(1.2) # Zoom by 20%
410
+
411
+ # Pan from left to right (or other variations)
412
+ # img_animated = img_zoomed.set_position(lambda t: (-(img_zoomed.w - VIDEO_WIDTH) * (t / segment_dur), 'center'))
413
+ # Simpler: Crop to create a slight zoom/pan effect
414
+ img_animated = img_zoomed.fx(vfx.crop, width=VIDEO_WIDTH, height=VIDEO_HEIGHT, x_center=img_zoomed.w/2, y_center=img_zoomed.h/2)
415
+
416
+ # Subtitle styling (more polished)
417
+ txt = (TextClip(script_text, fontsize=30, color='yellow', font='Arial-Unicode-MS', # Try a font known for good char support
418
+ bg_color='rgba(0,0,0,0.5)', size=(VIDEO_WIDTH*0.9, None),
419
+ method='caption', align='South')
420
+ .set_duration(audio_clip.duration) # Sync with actual voiceover length
421
+ .set_start(0) # Start text when audio starts
422
+ .set_position(('center', 'bottom')))
423
+
424
+ video_segment = CompositeVideoClip([img_animated, txt], size=(VIDEO_WIDTH, VIDEO_HEIGHT)).set_audio(audio_clip)
425
+ video_clips_list.append(video_segment)
426
+
427
+ except Exception as e:
428
+ print(f"Error processing segment {i+1} with image {img_path} and audio {vo_path}: {e}")
429
+ continue # Skip problematic segment
430
+
431
+ if not video_clips_list:
432
+ return "Error: No video segments could be created.", None
433
+
434
+ final_vid = concatenate_videoclips(video_clips_list, method="compose", transition=VideoFileClip.crossfadein(0.5)) # Crossfade transition
435
+
436
+ if background_music_path and os.path.exists(background_music_path):
437
+ music = AudioFileClip(background_music_path).volumex(0.15) # Lower volume
438
+ if music.duration > final_vid.duration:
439
+ music = music.subclip(0, final_vid.duration)
440
+
441
+ # Ensure final_vid has an audio track before composing
442
+ if final_vid.audio is None and video_clips_list and video_clips_list[0].audio:
443
+ # If concatenate_videoclips dropped audio, re-add from first segment (or combine all)
444
+ # This can happen if first clip has no audio. Better to combine all VOs first.
445
+ combined_vo = concatenate_audioclips([vc.audio for vc in video_clips_list if vc.audio])
446
+ final_vid = final_vid.set_audio(combined_vo)
447
+
448
+ if final_vid.audio: # Check again
449
+ final_audio = CompositeAudioClip([final_vid.audio, music])
450
+ final_vid = final_vid.set_audio(final_audio)
451
+ else:
452
+ print("Warning: Final video has no primary audio track to mix music with.")
453
+ final_vid = final_vid.set_audio(music) # Use only music if no VOs
454
+
455
+ output_filepath = os.path.join(TEMP_DIR, "final_output_video.mp4")
456
+ try:
457
+ final_vid.write_videofile(output_filepath, codec="libx264", audio_codec="aac", fps=VIDEO_FPS, threads=4, preset='medium') # Added threads and preset
458
+ return f"Video created: {output_filepath}", output_filepath
459
+ except Exception as e:
460
+ return f"Error writing final video: {e}", None
461
+ finally:
462
+ # Close all clips
463
+ for clip in video_clips_list:
464
+ if clip.audio: clip.audio.close()
465
+ clip.close()
466
+ if 'music' in locals() and music.reader: music.close()
467
+ if final_vid.audio: final_vid.audio.close()
468
+ if final_vid.reader: final_vid.close()
469
+
470
+
471
+ # --- Main Gradio Function ---
472
+ def generate_youtube_video(niche_input, progress=gr.Progress(track_tqdm=True)):
473
+ if not GEMINI_API_KEY or not YOUTUBE_API_KEY or not parler_model or not youtube_service or not gemini_model:
474
+ missing = []
475
+ if not GEMINI_API_KEY: missing.append("Gemini API Key")
476
+ if not YOUTUBE_API_KEY: missing.append("YouTube API Key")
477
+ if not parler_model: missing.append("Parler-TTS models")
478
+ if not youtube_service: missing.append("YouTube service")
479
+ if not gemini_model: missing.append("Gemini service")
480
+ return None, f"ERROR: Required services/API keys not initialized: {', '.join(missing)}. Please check your .env file and console logs."
481
+
482
+ cleanup_temp_files()
483
+ log_messages = ["Process Started...\n"]
484
+
485
+ progress(0.05, desc="Generating video ideas...")
486
+ log_messages.append("1. Generating Video Ideas...")
487
+ ideas_prompt = get_idea_generation_prompt_template(niche_input)
488
+ raw_ideas_text = query_gemini(ideas_prompt)
489
+ if "Error:" in raw_ideas_text:
490
+ log_messages.append(raw_ideas_text)
491
+ return None, "\n".join(log_messages)
492
+
493
+ parsed_ideas = parse_generated_ideas(raw_ideas_text)
494
+ if not parsed_ideas:
495
+ log_messages.append("Error: No ideas parsed from Gemini response.")
496
+ return None, "\n".join(log_messages)
497
+ log_messages.append(f"Generated {len(parsed_ideas)} ideas.")
498
+ # For UI, let's display the ideas (optional)
499
+ # log_messages.append("Ideas:\n" + "\n".join([f"- {i['title']}" for i in parsed_ideas]))
500
+
501
+
502
+ progress(0.15, desc="Selecting viral idea...")
503
+ log_messages.append("\n2. Selecting Most Viral Idea...")
504
+ ideas_for_selection_prompt = "\n---\n".join([f"Title: {i['title']}\nDescription: {i['description']}" for i in parsed_ideas])
505
+ selection_prompt = get_viral_selection_prompt_template(ideas_for_selection_prompt)
506
+ selected_title_raw = query_gemini(selection_prompt)
507
+ if "Error:" in selected_title_raw:
508
+ log_messages.append(f"Error selecting idea: {selected_title_raw}. Using first idea.")
509
+ chosen_idea = parsed_ideas[0]
510
+ else:
511
+ selected_title = selected_title_raw.replace("Most Viral Title:", "").strip()
512
+ chosen_idea = next((idea for idea in parsed_ideas if idea["title"].strip().lower() == selected_title.lower()), parsed_ideas[0])
513
+ log_messages.append(f"Chosen Idea: '{chosen_idea['title']}'")
514
+
515
+ progress(0.25, desc="Generating script...")
516
+ log_messages.append(f"\n3. Generating Script for '{chosen_idea['title']}'...")
517
+ script_prompt = get_script_generation_prompt_template(chosen_idea['title'], chosen_idea['description'])
518
+ raw_script_text = query_gemini(script_prompt)
519
+ if "Error:" in raw_script_text:
520
+ log_messages.append(raw_script_text)
521
+ химический return None, "\n".join(log_messages)
522
+
523
+ script_segments = parse_generated_script(raw_script_text)
524
+ if not script_segments:
525
+ log_messages.append("Error: No script segments parsed.")
526
+ return None, "\n".join(log_messages)
527
+ log_messages.append(f"Script generated with {len(script_segments)} segments (limited to {MAX_SCRIPT_SEGMENTS_FOR_DEMO} for demo).")
528
+
529
+ progress(0.40, desc="Generating voiceovers...")
530
+ log_messages.append("\n4. Generating Voiceovers...")
531
+ voiceover_paths = []
532
+ for i, segment in enumerate(progress.tqdm(script_segments, desc="TTS Progress")):
533
+ vo_text = segment['voiceover']
534
+ if not vo_text: continue # Skip if no voiceover text
535
+ audio_filename = f"segment_{i+1}_audio.wav"
536
+ path = text_to_speech(vo_text, SPEAKER_DESCRIPTION_FOR_TTS, audio_filename)
537
+ if path:
538
+ voiceover_paths.append(path)
539
+ log_messages.append(f" - Voiceover for segment {i+1} created.")
540
+ else:
541
+ log_messages.append(f" - Failed voiceover for segment {i+1}.")
542
+ if not voiceover_paths or len(voiceover_paths) < len(script_segments):
543
+ log_messages.append("Warning: Not all voiceovers could be generated.")
544
+ if not voiceover_paths:
545
+ return None, "\n".join(log_messages) # Critical failure if NO voiceovers
546
+
547
+ progress(0.60, desc="Fetching images...")
548
+ log_messages.append("\n5. Fetching Images...")
549
+ all_image_paths_for_video = []
550
+ for i, segment in enumerate(progress.tqdm(script_segments, desc="Image Fetching")):
551
+ keywords = segment['image_keywords']
552
+ if not keywords: keywords = [chosen_idea['title']] # Fallback to title
553
+
554
+ # Fetch one image per segment
555
+ img_path_list = fetch_unsplash_images(keywords, num_images=IMAGES_PER_SEGMENT)
556
+ if img_path_list:
557
+ all_image_paths_for_video.append(img_path_list[0]) # Take the first image found
558
+ log_messages.append(f" - Image for segment {i+1} using keywords '{', '.join(keywords)}' fetched: {os.path.basename(img_path_list[0])}")
559
+ else:
560
+ log_messages.append(f" - No image found for segment {i+1} with keywords '{', '.join(keywords)}'. Using placeholder.")
561
+ placeholder_img = get_placeholder_images(keywords,1) # Use the function that creates/downloads a placeholder
562
+ if placeholder_img:
563
+ all_image_paths_for_video.append(placeholder_img[0])
564
+ else: # Absolute fallback
565
+ log_messages.append(" - CRITICAL: Could not get even a placeholder image. Video might fail.")
566
+ # For robustness, ensure a default image exists if this happens
567
+ default_img_path = os.path.join(TEMP_DIR, "images", "default_img.jpg")
568
+ if not os.path.exists(default_img_path): # Create a dummy if it doesn't exist
569
+ try:
570
+ from PIL import Image
571
+ Image.new('RGB', (VIDEO_WIDTH, VIDEO_HEIGHT), color = 'black').save(default_img_path)
572
+ all_image_paths_for_video.append(default_img_path)
573
+ except ImportError:
574
+ log_messages.append("PIL/Pillow not installed, cannot create dummy image.")
575
+ return None, "\n".join(log_messages) # Can't proceed without images
576
+ else:
577
+ all_image_paths_for_video.append(default_img_path)
578
+
579
+
580
+ if len(all_image_paths_for_video) < len(voiceover_paths):
581
+ log_messages.append("Warning: Not enough images fetched for all voiceover segments. Video might be shorter or reuse images.")
582
+ # Pad with last image if necessary, or a default
583
+ while len(all_image_paths_for_video) < len(voiceover_paths) and all_image_paths_for_video:
584
+ all_image_paths_for_video.append(all_image_paths_for_video[-1])
585
+ if not all_image_paths_for_video: # Still no images
586
+ log_messages.append("Fatal Error: No images available for video creation.")
587
+ return None, "\n".join(log_messages)
588
+
589
+
590
+ progress(0.75, desc="Finding background music...")
591
+ log_messages.append("\n6. Finding Background Music...")
592
+ music_search_keywords = chosen_idea.get("keywords", []) + [niche_input, "cinematic", "calm"]
593
+ music_status, music_file_path = find_and_download_music(music_search_keywords)
594
+ log_messages.append(f" - {music_status}")
595
+
596
+ progress(0.85, desc="Assembling video...")
597
+ log_messages.append("\n7. Assembling Video...")
598
+ # Make sure number of images matches number of VOs for the video processor
599
+ # The video processor already has some logic, but let's be explicit here
600
+ final_images = all_image_paths_for_video[:len(voiceover_paths)]
601
+
602
+
603
+ video_status, final_video_path = create_video(final_images, voiceover_paths, script_segments, music_file_path)
604
+ log_messages.append(f" - {video_status}")
605
+
606
+ if not final_video_path:
607
+ return None, "\n".join(log_messages)
608
+
609
+ progress(1.0, desc="Process Complete!")
610
+ log_messages.append("\nProcess Complete! Video ready.")
611
+ return final_video_path, "\n".join(log_messages)
612
+
613
+
614
+ # --- Gradio UI ---
615
+ css = """
616
+ .gradio-container { font-family: 'Roboto', sans-serif; }
617
+ .gr-button { background-color: #FF7F50; color: white; border-radius: 8px; }
618
+ .gr-button:hover { background-color: #FF6347; }
619
+ footer {display: none !important;}
620
+ """ # Hide default Gradio footer
621
+
622
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="red"), css=css) as demo:
623
+ gr.Markdown(
624
+ """
625
+ <div style="text-align: center;">
626
+ <img src="https://i.imgur.com/J20hQ9h.png" alt="RoboNuggets Logo" style="width:100px; height:auto; margin-bottom: 5px;">
627
+ <h1>AI YouTube Video Creator (R28 LongForm Style)</h1>
628
+ <p>Automate your YouTube content creation! Enter a niche, and let AI handle the rest.</p>
629
+ </div>
630
+ """
631
+ )
632
+
633
+ with gr.Row():
634
+ niche_textbox = gr.Textbox(
635
+ label="Enter Video Niche or Specific Topic",
636
+ placeholder="e.g., 'The Philosophy of Stoicism for Modern Life', 'Beginner's Guide to Urban Gardening'",
637
+ value="The Stoic Lion: Finding Calm in Chaos" # Default value from video
638
+ )
639
+
640
+ create_button = gr.Button("✨ Create Video ✨", variant="primary")
641
+
642
+ with gr.Accordion("📊 Process Log & Output", open=True):
643
+ log_output = gr.Textbox(label="Log", lines=15, interactive=False, placeholder="Process updates will appear here...")
644
+ video_output = gr.Video(label="Generated Video")
645
+
646
+ create_button.click(
647
+ fn=generate_youtube_video,
648
+ inputs=[niche_textbox],
649
+ outputs=[video_output, log_output]
650
+ )
651
+
652
+ gr.Markdown(
653
+ """
654
+ ---
655
+ *Powered by RoboNuggets AI*
656
+ *(Note: This is a demo. Image scraping from Unsplash can be unreliable. Ensure API keys are set in .env)*
657
+ """
658
+ )
659
+
660
+ if __name__ == "__main__":
661
+ # Ensure temp directory exists
662
+ os.makedirs(os.path.join(TEMP_DIR, "images"), exist_ok=True)
663
+ os.makedirs(os.path.join(TEMP_DIR, "audio_segments"), exist_ok=True)
664
+
665
+ print("Starting Gradio App...")
666
+ demo.launch(debug=True, share=False) # share=True for public link (use with caution and ngrok)