testdeep123 commited on
Commit
59b7f47
·
verified ·
1 Parent(s): c760fa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +651 -862
app.py CHANGED
@@ -1,1085 +1,874 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
- import shutil # Added for directory cleanup
3
- import requests
4
- import io
5
- import time
6
- import re
7
  import random
8
- import tempfile # Added for use in create_clip
9
- import math
10
  import cv2
 
 
 
 
 
 
 
 
 
 
 
 
11
  import numpy as np
12
- import soundfile as sf
13
- import torch
14
- import gradio as gr
15
- import pysrt
16
  from bs4 import BeautifulSoup
 
17
  from urllib.parse import quote
18
- from PIL import Image, ImageDraw, ImageFont
19
  from gtts import gTTS
20
- from pydub import AudioSegment
21
- from pydub.generators import Sine
22
 
23
- # Import moviepy components correctly
24
- try:
25
- from moviepy.editor import (
26
- VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
27
- CompositeVideoClip, TextClip, CompositeAudioClip
28
- )
29
- import moviepy.video.fx.all as vfx
30
- import moviepy.config as mpy_config
31
- # Set ImageMagick binary (adjust path if necessary for your environment)
32
- # Check if ImageMagick is available, otherwise TextClip might fail
33
- try:
34
- # Attempt to find ImageMagick automatically or use a common path
35
- # If running locally, ensure ImageMagick is installed and in your PATH
36
- # If on Hugging Face Spaces, add 'imagemagick' to a packages.txt file
37
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"}) # Common Linux path
38
- print("ImageMagick path set.")
39
- # You might need to verify this path works in your specific deployment environment
40
- except Exception as e:
41
- print(f"Warning: Could not configure ImageMagick path. TextClip might fail. Error: {e}")
42
- # Consider adding a fallback or disabling text if ImageMagick is essential and not found
43
- except ImportError:
44
- print("Error: moviepy library not found. Please install it using 'pip install moviepy'.")
45
- # Optionally, exit or raise a more specific error if moviepy is critical
46
- exit() # Exit if moviepy is absolutely required
47
-
48
- # Import Kokoro (ensure it's installed)
49
- try:
50
- from kokoro import KPipeline
51
- # Initialize Kokoro TTS pipeline
52
- # Using 'en' as a placeholder, adjust 'a' if it was intentional and valid for Kokoro
53
- pipeline = KPipeline(lang_code='en')
54
- print("Kokoro Pipeline Initialized.")
55
- except ImportError:
56
- print("Warning: Kokoro library not found. TTS generation will rely solely on gTTS.")
57
- pipeline = None
58
- except Exception as e:
59
- print(f"Warning: Failed to initialize Kokoro Pipeline. TTS generation will rely solely on gTTS. Error: {e}")
60
- pipeline = None
61
-
62
-
63
- # Global Configuration
64
- # --- IMPORTANT: Replace placeholders with your actual keys or use environment variables ---
65
- PEXELS_API_KEY = os.getenv('PEXELS_API_KEY', 'YOUR_PEXELS_API_KEY_HERE') # Use environment variable or replace
66
- OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', 'YOUR_OPENROUTER_API_KEY_HERE') # Use environment variable or replace
67
- # --- ---
68
-
69
- if PEXELS_API_KEY == 'YOUR_PEXELS_API_KEY_HERE' or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY_HERE':
70
- print("\n*** WARNING: API keys are not set. Please set PEXELS_API_KEY and OPENROUTER_API_KEY environment variables or replace the placeholders in the script. ***\n")
71
-
72
- OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free" # Or choose another model
73
- TEMP_FOLDER = "temp_video_processing"
74
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
75
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
76
- BACKGROUND_MUSIC_PATH = "background_music.mp3" # Ensure this file exists
77
 
78
- # --- Global variables to be set by Gradio input ---
79
- TARGET_RESOLUTION = (1920, 1080) # Default, will be updated
80
- CAPTION_COLOR = "white" # Default, will be updated
 
 
 
81
 
82
- # Helper Functions
 
 
 
83
 
84
  def generate_script(user_input):
85
- """Generates a script using the OpenRouter API based on user input."""
86
- if not OPENROUTER_API_KEY or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY_HERE':
87
- print("Error: OpenRouter API Key not configured.")
88
- return None
89
-
90
  headers = {
91
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
92
- 'HTTP-Referer': 'https://your-app-name.hf.space', # Optional: Replace with your app's URL if deployed
93
- 'X-Title': 'AI Documentary Maker' # Optional
94
  }
95
- # Updated prompt based on original instructions
96
- prompt = f"""Short Documentary Script Generator Instructions:
 
97
  If I say "use this," just output the script exactly as I gave it.
98
  If I only give topics, generate a script based on them.
99
- If I provide a full script, rewrite it without any changes.
100
- Make everything short, simple, humorous, funny, and act serious but humorous.
101
- Don't say anything off-topic.
102
- Always include a funny statement to subscribe based on the video topic at the end.
103
- Use normal conversational text like a normal person talking, avoid AI phrases, make statements humanized and normal conversational.
104
- Always talk negative views, don't really want any positive statements.
105
- The full script will be one video about one topic, so keep that in mind.
106
-
107
  Formatting Rules:
108
- Title in Square Brackets: Each section starts with a one-word title inside [ ] (max two words if necessary). This title will be used as a search term for Pexels footage.
109
- Casual & Funny Narration: Each section has 5-15 words of narration. Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
110
- No Special Formatting: No bold, italics, or special characters. You are an assistant AI; your task is to create the script. You aren't a chatbot. So, don't write extra text.
111
- Generalized Search Terms: If a term is too specific, make it more general for Pexels search.
112
- Scene-Specific Writing: Each section describes only what should be shown in the video.
113
- Output Only the Script: No extra text, just the script.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  Example Output:
116
  [North Korea]
117
- Top 5 unknown facts about North Korea, maybe.
 
 
118
  [Invisibility]
119
- North Korea’s internet speed is so fast… it’s basically dial-up from 1998.
 
 
120
  [Leadership]
121
- Kim Jong-un once won an election with 100% votes… because who would vote against him?
 
 
122
  [Magic]
123
- North Korea discovered unicorns. They're delicious, apparently.
 
 
124
  [Warning]
125
- Subscribe now, or Kim Jong-un might send you a strongly worded letter.
 
 
126
  [Freedom]
127
- North Korean citizens enjoy unparalleled freedom... to agree with the government.
128
 
129
- Now here is the Topic/script: {user_input}
 
130
  """
 
131
  data = {
132
  'model': OPENROUTER_MODEL,
133
  'messages': [{'role': 'user', 'content': prompt}],
134
- 'temperature': 0.5, # Slightly increased for more variety in humor
135
- 'max_tokens': 1000 # Reduced slightly, adjust if scripts get cut off
136
  }
 
137
  try:
138
  response = requests.post(
139
  'https://openrouter.ai/api/v1/chat/completions',
140
  headers=headers,
141
  json=data,
142
- timeout=45 # Increased timeout
143
  )
144
- response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
145
- response_data = response.json()
146
- if 'choices' in response_data and len(response_data['choices']) > 0:
147
- script_content = response_data['choices'][0]['message']['content']
148
- # Basic cleaning: remove potential preamble/postamble if the model adds it
149
- script_content = re.sub(r'^.*?\n*\[', '[', script_content, flags=re.DOTALL) # Remove text before first bracket
150
- script_content = script_content.strip()
151
- print(f"Generated Script:\n{script_content}") # Log the script
152
- return script_content
153
  else:
154
- print(f"Error: No choices found in OpenRouter response. Response: {response_data}")
155
  return None
156
- except requests.exceptions.RequestException as e:
157
- print(f"Error calling OpenRouter API: {e}")
158
- return None
159
  except Exception as e:
160
- print(f"An unexpected error occurred during script generation: {e}")
161
  return None
162
 
163
  def parse_script(script_text):
164
- """Parses the generated script text into structured elements."""
165
- if not script_text:
166
- return []
 
 
 
167
  sections = {}
168
  current_title = None
169
  current_text = ""
 
170
  try:
171
  for line in script_text.splitlines():
172
  line = line.strip()
173
- if not line: # Skip empty lines
174
- continue
175
- match = re.match(r'^\[([^\]]+)\](.*)', line)
176
- if match:
177
- # If a title was being processed, save it
178
- if current_title is not None and current_text:
179
- sections[current_title] = current_text.strip()
180
-
181
- current_title = match.group(1).strip()
182
- current_text = match.group(2).strip()
183
- elif current_title: # Append to the text of the current title
184
- current_text += " " + line # Add space between lines
185
-
186
- # Save the last section
187
- if current_title is not None and current_text:
188
  sections[current_title] = current_text.strip()
189
 
190
  elements = []
191
- if not sections:
192
- print("Warning: Script parsing resulted in no sections.")
193
- # Maybe try a simpler split if the regex fails?
194
- lines = [l.strip() for l in script_text.splitlines() if l.strip()]
195
- if len(lines) >= 2: # Basic fallback: assume first line is title, second is text
196
- print("Attempting basic fallback parsing.")
197
- title = lines[0].replace('[','').replace(']','')
198
- narration = ' '.join(lines[1:])
199
- sections[title] = narration
200
-
201
- print(f"Parsed Sections: {sections}") # Log parsed sections
202
-
203
  for title, narration in sections.items():
204
  if not title or not narration:
205
- print(f"Skipping empty section: Title='{title}', Narration='{narration}'")
206
  continue
207
- # Use title as media prompt
208
- media_element = {"type": "media", "prompt": title, "effects": "random"} # Use random Ken Burns
209
- # Calculate rough duration based on words
210
  words = narration.split()
211
- # Duration: Base 2s + 0.4s per word, capped at ~10s unless very long
212
- duration = min(10.0, max(3.0, 2.0 + len(words) * 0.4))
213
- tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration} # Duration is approximate here
214
  elements.append(media_element)
215
  elements.append(tts_element)
216
 
217
- if not elements:
218
- print("Error: No elements created after parsing.")
219
  return elements
220
  except Exception as e:
221
- print(f"Error parsing script: {e}\nScript content was:\n{script_text}")
222
  return []
223
 
224
- def search_pexels(query, api_key, media_type="videos"):
225
- """Searches Pexels for videos or images."""
226
- if not api_key or api_key == 'YOUR_PEXELS_API_KEY_HERE':
227
- print("Error: Pexels API Key not configured.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  return None
229
 
230
- headers = {'Authorization': api_key}
231
- base_url = f"https://api.pexels.com/{media_type}/search"
232
- results = []
233
- # Search multiple pages for better results
234
- for page in range(1, 4): # Check first 3 pages
 
 
 
 
 
235
  try:
236
- params = {"query": query, "per_page": 15, "page": page}
237
- if media_type == "videos":
238
- params["orientation"] = "landscape" if TARGET_RESOLUTION[0] > TARGET_RESOLUTION[1] else "portrait"
239
- else: # images
240
- params["orientation"] = "landscape" if TARGET_RESOLUTION[0] > TARGET_RESOLUTION[1] else "portrait"
241
-
242
- response = requests.get(base_url, headers=headers, params=params, timeout=15)
243
- response.raise_for_status()
244
- data = response.json()
245
-
246
- if media_type == "videos":
247
- media_items = data.get("videos", [])
248
- for item in media_items:
249
- video_files = item.get("video_files", [])
250
- # Prioritize HD or FHD based on target resolution, fallback to highest available
251
- target_quality = "hd" # 1280x720 or 1920x1080
252
- if TARGET_RESOLUTION[0] >= 1920 or TARGET_RESOLUTION[1] >= 1920:
253
- target_quality = "fhd" # Often not available, but check anyway
254
-
255
- link = None
256
- for file in video_files:
257
- # Pexels uses 'hd' for 1920x1080 too sometimes
258
- if file.get("quality") == target_quality or file.get("quality") == "hd":
259
- link = file.get("link")
260
- break
261
- if not link and video_files: # Fallback to the first link if specific quality not found
262
- link = video_files[0].get("link")
263
-
264
- if link:
265
- results.append(link)
266
-
267
- else: # images
268
- media_items = data.get("photos", [])
269
- for item in media_items:
270
- # Get original size, resizing happens later
271
- link = item.get("src", {}).get("original")
272
- if link:
273
- results.append(link)
274
 
275
  except requests.exceptions.RequestException as e:
276
- print(f"Warning: Pexels API request failed for '{query}' (page {page}, {media_type}): {e}")
277
- # Don't stop searching on a single page failure
278
- continue
279
- except Exception as e:
280
- print(f"Warning: Unexpected error during Pexels search for '{query}': {e}")
281
- continue
282
 
283
- if results:
284
- print(f"Found {len(results)} Pexels {media_type} for '{query}'. Choosing one randomly.")
285
- return random.choice(results)
286
- else:
287
- print(f"Warning: No Pexels {media_type} found for query: '{query}'")
288
- return None
289
 
290
  def search_google_images(query):
291
- """Searches Google Images (use cautiously, scraping can be fragile)."""
292
- print(f"Attempting Google Image search for: '{query}' (Use with caution)")
293
  try:
294
- search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch&safe=active" # Added safe search
295
  headers = {"User-Agent": USER_AGENT}
296
- response = requests.get(search_url, headers=headers, timeout=15)
297
- response.raise_for_status()
298
  soup = BeautifulSoup(response.text, "html.parser")
299
 
300
- image_urls = []
301
- # Google changes its structure often, this might need updates
302
- # Look for image data embedded in script tags or specific img tags
303
- # This is a common pattern, but highly unstable
304
  img_tags = soup.find_all("img")
 
305
  for img in img_tags:
306
- src = img.get("src") or img.get("data-src")
307
- if src and src.startswith("http") and "gstatic" not in src and "googlelogo" not in src:
308
- # Basic check for valid image extensions or base64
309
- if re.search(r'\.(jpg|jpeg|png|webp)$', src, re.IGNORECASE) or src.startswith('data:image'):
310
- image_urls.append(src)
311
-
312
- # Limit the number of results to avoid processing too many
313
- image_urls = image_urls[:10] # Consider first 10 potential images
314
 
315
  if image_urls:
316
- print(f"Found {len(image_urls)} potential Google Images for '{query}'. Choosing one.")
317
- return random.choice(image_urls)
318
  else:
319
- print(f"Warning: No suitable Google Images found for query: '{query}'")
320
  return None
321
- except requests.exceptions.RequestException as e:
322
- print(f"Warning: Google Image search failed for '{query}': {e}")
323
- return None
324
  except Exception as e:
325
- print(f"Warning: Error parsing Google Image search results for '{query}': {e}")
326
  return None
327
 
328
- def download_media(media_url, filename):
329
- """Downloads media (image or video) from a URL."""
330
  try:
331
- headers = {"User-Agent": USER_AGENT} # Use User-Agent for downloads too
332
- response = requests.get(media_url, headers=headers, stream=True, timeout=30) # Increased timeout for large files
 
333
  response.raise_for_status()
 
334
  with open(filename, 'wb') as f:
335
  for chunk in response.iter_content(chunk_size=8192):
336
  f.write(chunk)
337
- print(f"Successfully downloaded media to {filename}")
338
 
339
- # Verify image integrity and convert if necessary
340
- if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
341
- try:
342
- img = Image.open(filename)
343
- img.verify() # Verify that it is, in fact an image
344
- # Re-open image for conversion check
345
- img = Image.open(filename)
346
- if img.mode != 'RGB':
347
- print(f"Converting image {filename} to RGB.")
348
- img = img.convert('RGB')
349
- img.save(filename, "JPEG") # Save as JPEG for compatibility
350
- img.close()
351
- except (IOError, SyntaxError, Image.UnidentifiedImageError) as img_e:
352
- print(f"Warning: Downloaded file {filename} is not a valid image or is corrupted: {img_e}. Removing.")
353
  os.remove(filename)
354
- return None
355
- # Basic video check (can be expanded)
356
- elif filename.lower().endswith(('.mp4', '.mov', '.avi')):
357
- if os.path.getsize(filename) < 1024: # Check if file is too small (likely error)
358
- print(f"Warning: Downloaded video file {filename} is suspiciously small. Removing.")
359
- os.remove(filename)
360
- return None
361
 
362
- return filename
363
- except requests.exceptions.RequestException as e:
364
- print(f"Error downloading media from {media_url}: {e}")
 
 
 
 
365
  if os.path.exists(filename):
366
  os.remove(filename)
367
  return None
 
 
 
 
 
 
 
 
 
 
 
368
  except Exception as e:
369
- print(f"An unexpected error occurred during media download: {e}")
370
  if os.path.exists(filename):
371
  os.remove(filename)
372
  return None
373
 
374
- def generate_media(prompt, current_index=0, total_segments=1):
375
- """Generates media (video or image) based on the prompt."""
 
 
 
 
376
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
377
- if not safe_prompt: safe_prompt = f"media_{current_index}" # Fallback filename
378
- print(f"\n--- Generating Media for Prompt: '{prompt}' ---")
379
-
380
- # --- Strategy ---
381
- # 1. Try Pexels Video
382
- # 2. Try Pexels Image
383
- # 3. If prompt contains 'news' or similar, try Google Image as fallback
384
- # 4. Use generic Pexels image as last resort
385
-
386
- # 1. Try Pexels Video
387
- video_url = search_pexels(prompt, PEXELS_API_KEY, media_type="videos")
388
- if video_url:
 
 
389
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
390
- if download_media(video_url, video_file):
391
- print(f"Using Pexels video for '{prompt}'")
392
- return {"path": video_file, "asset_type": "video"}
 
 
 
393
  else:
394
- print(f"Failed to download Pexels video for '{prompt}'.")
395
 
396
- # 2. Try Pexels Image
397
- image_url = search_pexels(prompt, PEXELS_API_KEY, media_type="photos")
398
  if image_url:
399
- image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_image.jpg")
400
- if download_media(image_url, image_file):
401
- print(f"Using Pexels image for '{prompt}'")
402
- return {"path": image_file, "asset_type": "image"}
403
  else:
404
- print(f"Failed to download Pexels image for '{prompt}'.")
405
-
406
- # 3. Try Google Image (especially for specific/newsy terms)
407
- if "news" in prompt.lower() or "breaking" in prompt.lower() or len(prompt.split()) > 4: # Heuristic for specific terms
408
- google_image_url = search_google_images(prompt)
409
- if google_image_url:
410
- google_image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_google_image.jpg")
411
- if download_media(google_image_url, google_image_file):
412
- print(f"Using Google image for '{prompt}' as fallback.")
413
- return {"path": google_image_file, "asset_type": "image"}
414
- else:
415
- print(f"Failed to download Google image for '{prompt}'.")
416
 
417
- # 4. Fallback to generic Pexels images
418
- print(f"Could not find specific media for '{prompt}'. Using generic fallback.")
419
- fallback_terms = ["abstract", "technology", "texture", "nature", "cityscape"]
420
- random.shuffle(fallback_terms) # Try different fallbacks
421
  for term in fallback_terms:
422
- fallback_url = search_pexels(term, PEXELS_API_KEY, media_type="photos")
 
 
423
  if fallback_url:
424
- fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}_{current_index}.jpg")
425
- if download_media(fallback_url, fallback_file):
426
- print(f"Using fallback Pexels image ('{term}')")
427
- return {"path": fallback_file, "asset_type": "image"}
428
  else:
429
- print(f"Failed to download fallback Pexels image ('{term}')")
430
-
431
- print(f"Error: Failed to generate any media for prompt: '{prompt}'")
432
- return None # Failed to get any media
433
-
434
- def generate_tts(text, voice="en"):
435
- """Generates Text-to-Speech audio using Kokoro or gTTS."""
436
- safe_text = re.sub(r'[^\w\s-]', '', text[:15]).strip().replace(' ', '_')
437
- if not safe_text: safe_text = f"tts_{random.randint(1000, 9999)}"
438
- file_path = os.path.join(TEMP_FOLDER, f"{safe_text}.wav")
439
-
440
- # Attempt Kokoro first if available
441
- if pipeline:
442
- try:
443
- print(f"Generating TTS with Kokoro for: '{text[:30]}...'")
444
- # Kokoro specific voice if needed, 'en' might map internally or use a default
445
- # The original code used 'af_heart' for 'en', let's try that logic
446
- kokoro_voice = 'af_heart' if voice == 'en' else voice # Adjust if Kokoro has different voice codes
447
- generator = pipeline(text, voice=kokoro_voice, speed=0.95, split_pattern=r'\n+|[.!?]+') # Adjust speed/split
448
- audio_segments = [audio for _, _, audio in generator]
449
-
450
- if not audio_segments:
451
- raise ValueError("Kokoro returned no audio segments.")
452
-
453
- # Ensure segments are numpy arrays before concatenating
454
- valid_segments = [seg for seg in audio_segments if isinstance(seg, np.ndarray) and seg.size > 0]
455
-
456
- if not valid_segments:
457
- raise ValueError("Kokoro returned empty or invalid audio segments.")
458
-
459
- full_audio = np.concatenate(valid_segments) if len(valid_segments) > 0 else valid_segments[0]
460
-
461
- # Ensure audio is float32 for soundfile
462
- if full_audio.dtype != np.float32:
463
- full_audio = full_audio.astype(np.float32)
464
- # Normalize if needed (Kokoro might output integers)
465
- max_val = np.max(np.abs(full_audio))
466
- if max_val > 1.0:
467
- full_audio /= max_val
468
-
469
- sf.write(file_path, full_audio, 24000) # Kokoro typically outputs at 24kHz
470
- print(f"Kokoro TTS generated successfully: {file_path}")
471
- return file_path
472
- except Exception as e:
473
- print(f"Warning: Kokoro TTS failed: {e}. Falling back to gTTS.")
474
- # Fall through to gTTS
475
 
476
- # Fallback to gTTS
477
  try:
478
- print(f"Generating TTS with gTTS for: '{text[:30]}...'")
479
- tts = gTTS(text=text, lang=voice, slow=False) # Use voice as language code for gTTS
480
- # Save as mp3 first, then convert
481
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
482
- tts.save(mp3_path)
483
- audio = AudioSegment.from_mp3(mp3_path)
484
- # Export as WAV for consistency with moviepy
485
- audio.export(file_path, format="wav")
486
- os.remove(mp3_path) # Clean up temporary mp3
487
- print(f"gTTS TTS generated successfully: {file_path}")
488
  return file_path
489
  except Exception as e:
490
- print(f"Error: gTTS also failed: {e}. Generating silence.")
491
- # Final fallback: generate silence
492
  try:
493
- # Estimate duration based on text length (similar to parsing logic)
494
- words = text.split()
495
- duration_seconds = min(10.0, max(3.0, 2.0 + len(words) * 0.4))
496
- samplerate = 24000 # Match Kokoro's typical rate
497
- num_samples = int(duration_seconds * samplerate)
498
- silence = np.zeros(num_samples, dtype=np.float32)
499
- sf.write(file_path, silence, samplerate)
500
- print(f"Generated silence fallback: {file_path} ({duration_seconds:.1f}s)")
501
  return file_path
502
- except Exception as silence_e:
503
- print(f"Error: Failed even to generate silence: {silence_e}")
504
- return None # Complete failure
505
 
506
- def apply_kenburns_effect(clip, target_resolution, effect_type="random"):
507
- """Applies a Ken Burns effect (zoom/pan) to an ImageClip."""
508
  target_w, target_h = target_resolution
509
- # Ensure clip has dimensions (might be needed if ImageClip wasn't fully initialized)
510
- if not hasattr(clip, 'w') or not hasattr(clip, 'h') or clip.w == 0 or clip.h == 0:
511
- print("Warning: Clip dimensions not found for Ken Burns effect. Using target resolution.")
512
- # Attempt to get frame to determine size, or default
513
- try:
514
- frame = clip.get_frame(0)
515
- clip.w, clip.h = frame.shape[1], frame.shape[0]
516
- except:
517
- clip.w, clip.h = target_w, target_h # Fallback
518
-
519
- # Resize image to cover target area while maintaining aspect ratio
520
  clip_aspect = clip.w / clip.h
521
  target_aspect = target_w / target_h
522
 
523
- if clip_aspect > target_aspect: # Image is wider than target
524
  new_height = target_h
525
  new_width = int(new_height * clip_aspect)
526
- else: # Image is taller than target
527
  new_width = target_w
528
  new_height = int(new_width / clip_aspect)
529
 
530
- # Resize slightly larger than needed for the effect
531
- base_scale = 1.20 # Zoom factor range
532
- zoom_width = int(new_width * base_scale)
533
- zoom_height = int(new_height * base_scale)
 
534
 
535
- # Use PIL for initial resize - often better quality for large changes
536
- try:
537
- pil_img = Image.fromarray(clip.get_frame(0)) # Get frame as PIL image
538
- resized_pil = pil_img.resize((zoom_width, zoom_height), Image.Resampling.LANCZOS)
539
- resized_clip = ImageClip(np.array(resized_pil)).set_duration(clip.duration)
540
- clip = resized_clip # Use the better resized clip
541
- clip.w, clip.h = zoom_width, zoom_height # Update dimensions
542
- except Exception as pil_e:
543
- print(f"Warning: PIL resize failed ({pil_e}). Using moviepy resize.")
544
- clip = clip.resize(newsize=(zoom_width, zoom_height))
545
-
546
-
547
- # Calculate max offsets for panning
548
- max_offset_x = max(0, clip.w - target_w)
549
- max_offset_y = max(0, clip.h - target_h)
550
-
551
- # Define effect types
552
- available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "pan-up", "pan-down", "slow-zoom"]
553
- if effect_type == "random":
554
- effect_type = random.choice(available_effects)
555
- print(f"Applying Ken Burns effect: {effect_type}")
556
 
557
- # Determine start/end zoom and center positions based on effect
558
- start_zoom, end_zoom = 1.0, 1.0
559
- start_center_x, start_center_y = clip.w / 2, clip.h / 2
560
- end_center_x, end_center_y = clip.w / 2, clip.h / 2
561
 
562
  if effect_type == "zoom-in":
563
- start_zoom = 1.0
564
- end_zoom = 1 / base_scale # Zoom factor applied to crop size
 
 
565
  elif effect_type == "zoom-out":
566
- start_zoom = 1 / base_scale
567
- end_zoom = 1.0
568
- elif effect_type == "slow-zoom":
569
- start_zoom = 1.0
570
- end_zoom = 1 / 1.05 # Very subtle zoom in
571
  elif effect_type == "pan-left":
572
- start_center_x = target_w / 2
573
- end_center_x = clip.w - target_w / 2
574
- start_center_y = end_center_y = clip.h / 2 # Center vertically
 
575
  elif effect_type == "pan-right":
576
- start_center_x = clip.w - target_w / 2
577
- end_center_x = target_w / 2
578
- start_center_y = end_center_y = clip.h / 2
579
- elif effect_type == "pan-up":
580
- start_center_y = target_h / 2
581
- end_center_y = clip.h - target_h / 2
582
- start_center_x = end_center_x = clip.w / 2 # Center horizontally
583
- elif effect_type == "pan-down":
584
- start_center_y = clip.h - target_h / 2
585
- end_center_y = target_h / 2
586
- start_center_x = end_center_x = clip.w / 2
587
- # Add more effects like diagonal pans if desired
588
 
589
  def transform_frame(get_frame, t):
590
- frame = get_frame(t) # Get the frame from the (potentially PIL-resized) clip
591
- # Smooth interpolation (ease-in, ease-out)
592
- ratio = 0.5 - 0.5 * math.cos(math.pi * t / clip.duration) if clip.duration > 0 else 0
593
-
594
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
595
  crop_w = int(target_w / current_zoom)
596
  crop_h = int(target_h / current_zoom)
597
-
598
- # Ensure crop dimensions are not larger than the frame itself
599
- crop_w = min(crop_w, clip.w)
600
- crop_h = min(crop_h, clip.h)
601
-
602
- current_center_x = start_center_x + (end_center_x - start_center_x) * ratio
603
- current_center_y = start_center_y + (end_center_y - start_center_y) * ratio
604
-
605
- # Clamp center position to avoid cropping outside the image boundaries
606
  min_center_x = crop_w / 2
607
- max_center_x = clip.w - crop_w / 2
608
  min_center_y = crop_h / 2
609
- max_center_y = clip.h - crop_h / 2
610
-
611
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
612
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
 
 
 
613
 
614
- # Perform the crop using cv2.getRectSubPix for subpixel accuracy
615
- # Ensure frame is contiguous array for cv2
616
- if not frame.flags['C_CONTIGUOUS']:
617
- frame = np.ascontiguousarray(frame)
618
-
619
- try:
620
- cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
621
- # Resize the cropped area to the final target resolution
622
- # Using LANCZOS4 for potentially better quality resizing
623
- final_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
624
- return final_frame
625
- except cv2.error as cv2_err:
626
- print(f"Error during cv2 operation in Ken Burns: {cv2_err}")
627
- print(f"Frame shape: {frame.shape}, Crop W/H: {crop_w}/{crop_h}, Center X/Y: {current_center_x}/{current_center_y}")
628
- # Fallback: return uncropped frame resized? Or black frame?
629
- return cv2.resize(frame, (target_w, target_h), interpolation=cv2.INTER_LINEAR) # Fallback resize
630
-
631
-
632
- # Apply the transformation function to the clip
633
- return clip.fl(transform_frame, apply_to=['mask']) # Apply to mask if it exists
634
-
635
 
636
  def resize_to_fill(clip, target_resolution):
637
- """Resizes a video clip to fill the target resolution, cropping if necessary."""
638
  target_w, target_h = target_resolution
639
- clip_w, clip_h = clip.w, clip.h
640
-
641
- if clip_w == 0 or clip_h == 0:
642
- print("Warning: Clip has zero dimensions before resize_to_fill. Cannot resize.")
643
- # Return a black clip of the target size?
644
- return ColorClip(size=target_resolution, color=(0,0,0), duration=clip.duration)
645
-
646
-
647
- clip_aspect = clip_w / clip_h
648
  target_aspect = target_w / target_h
649
 
650
- if math.isclose(clip_aspect, target_aspect, rel_tol=1e-3):
651
- # Aspect ratios are close enough, just resize
652
- print(f"Resizing video clip {clip.filename} to {target_resolution} (aspect match).")
653
- return clip.resize(newsize=target_resolution)
654
- elif clip_aspect > target_aspect:
655
- # Clip is wider than target aspect ratio, resize to target height and crop width
656
- print(f"Resizing video clip {clip.filename} to height {target_h} and cropping width.")
657
  clip = clip.resize(height=target_h)
658
- # Calculate amount to crop from each side
659
  crop_amount = (clip.w - target_w) / 2
660
- if crop_amount < 0: # Avoid negative crop
661
- print("Warning: Negative crop amount calculated in resize_to_fill (width). Resizing only.")
662
- return clip.resize(newsize=target_resolution)
663
- return clip.crop(x1=crop_amount, width=target_w)
664
  else:
665
- # Clip is taller than target aspect ratio, resize to target width and crop height
666
- print(f"Resizing video clip {clip.filename} to width {target_w} and cropping height.")
667
  clip = clip.resize(width=target_w)
668
- # Calculate amount to crop from top/bottom
669
  crop_amount = (clip.h - target_h) / 2
670
- if crop_amount < 0: # Avoid negative crop
671
- print("Warning: Negative crop amount calculated in resize_to_fill (height). Resizing only.")
672
- return clip.resize(newsize=target_resolution)
673
- return clip.crop(y1=crop_amount, height=target_h)
674
-
675
-
676
- def add_background_music(final_video, bg_music_path=BACKGROUND_MUSIC_PATH, bg_music_volume=0.08):
677
- """Adds background music to the final video."""
678
- if not os.path.exists(bg_music_path):
679
- print(f"Warning: Background music file not found at {bg_music_path}. Skipping.")
680
- return final_video
681
-
 
 
 
 
 
682
  try:
683
- print("Adding background music...")
684
- bg_music = AudioFileClip(bg_music_path)
685
-
686
- if final_video.duration is None or final_video.duration <= 0:
687
- print("Warning: Final video has no duration. Cannot add background music.")
688
- return final_video
689
- if bg_music.duration is None or bg_music.duration <= 0:
690
- print("Warning: Background music has no duration. Skipping.")
691
- return final_video
692
-
693
-
694
- # Loop or trim background music to match video duration
695
- if bg_music.duration < final_video.duration:
696
- loops_needed = math.ceil(final_video.duration / bg_music.duration)
697
- print(f"Looping background music {loops_needed} times.")
698
- bg_music = concatenate_audioclips([bg_music] * loops_needed)
699
-
700
- # Trim to exact duration
701
- bg_music = bg_music.subclip(0, final_video.duration)
702
-
703
- # Adjust volume
704
- bg_music = bg_music.volumex(bg_music_volume)
705
-
706
- # Combine with existing audio (if any)
707
- video_audio = final_video.audio
708
- if video_audio:
709
- # Normalize main audio slightly? Optional.
710
- # video_audio = video_audio.volumex(1.0) # Keep original volume
711
- print("Mixing existing audio with background music.")
712
  mixed_audio = CompositeAudioClip([video_audio, bg_music])
 
 
713
  else:
714
- print("No existing audio found. Using only background music.")
715
- mixed_audio = bg_music
716
-
717
- # Set the new audio track
718
- final_video = final_video.set_audio(mixed_audio)
719
- print("Background music added successfully.")
720
  return final_video
721
-
722
  except Exception as e:
723
  print(f"Error adding background music: {e}")
724
- # Return the original video without crashing
725
  return final_video
726
 
727
-
728
  def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
729
- """Creates a single video clip segment with media, audio, and optional captions."""
730
- print(f"\n--- Creating Clip Segment {segment_index} ---")
731
- print(f"Media: {media_path} ({asset_type})")
732
- print(f"TTS: {tts_path}")
733
- print(f"Narration: '{narration_text[:50]}...'")
734
-
735
  try:
736
- # Validate inputs
737
- if not media_path or not os.path.exists(media_path):
738
- print(f"Error: Media path not found or invalid: {media_path}")
739
- return None
740
- if not tts_path or not os.path.exists(tts_path):
741
- print(f"Error: TTS path not found or invalid: {tts_path}")
742
- # Attempt to use media without audio? Or fail? Let's fail for now.
743
  return None
744
 
745
- # Load audio first to determine duration
746
- audio_clip = AudioFileClip(tts_path)
747
- # Add slight fade out to avoid abrupt cuts
748
- audio_clip = audio_clip.audio_fadeout(0.2)
749
- target_duration = audio_clip.duration
750
- if target_duration is None or target_duration <= 0.1: # Check for valid duration
751
- print(f"Warning: Audio clip {tts_path} has invalid duration ({target_duration}). Estimating 3 seconds.")
752
- target_duration = 3.0 # Fallback duration
753
- # Recreate audio clip with fixed duration if possible? Or just use the duration.
754
- audio_clip = audio_clip.set_duration(target_duration)
755
-
756
 
757
- print(f"Audio Duration: {target_duration:.2f}s")
758
-
759
- # --- Create Video/Image Clip ---
760
- clip = None
761
  if asset_type == "video":
762
- try:
763
- clip = VideoFileClip(media_path, target_resolution=TARGET_RESOLUTION[::-1]) # Provide target res hint
764
- # Ensure video has audio track removed initially if we overlay TTS fully
765
- clip = clip.without_audio()
766
-
767
- # Resize/Crop to fill target resolution
768
- clip = resize_to_fill(clip, TARGET_RESOLUTION)
769
-
770
- # Loop or cut video to match audio duration
771
- if clip.duration < target_duration:
772
- print(f"Looping video (duration {clip.duration:.2f}s) to match audio.")
773
- clip = clip.loop(duration=target_duration)
774
- else:
775
- # Start from a random point if video is longer? Or just take the start?
776
- start_time = 0
777
- # Optional: random start time if video is much longer
778
- # if clip.duration > target_duration + 2:
779
- # start_time = random.uniform(0, clip.duration - target_duration)
780
- print(f"Subclipping video from {start_time:.2f}s to {start_time + target_duration:.2f}s.")
781
- clip = clip.subclip(start_time, start_time + target_duration)
782
-
783
- # Add fade in/out for smoother transitions
784
- clip = clip.fadein(0.3).fadeout(0.3)
785
-
786
- except Exception as video_e:
787
- print(f"Error processing video file {media_path}: {video_e}")
788
- # Fallback to a black screen?
789
- clip = ColorClip(size=TARGET_RESOLUTION, color=(0,0,0), duration=target_duration)
790
-
791
  elif asset_type == "image":
792
- try:
793
- # Use tempfile for converted image if needed (handled in download now)
794
- # Load image clip
795
- clip = ImageClip(media_path).set_duration(target_duration)
796
-
797
- # Apply Ken Burns effect
798
- clip = apply_kenburns_effect(clip, TARGET_RESOLUTION, effect_type=effects or "random")
799
-
800
- # Fades are good for images too
801
- clip = clip.fadein(0.3).fadeout(0.3)
802
-
803
- except Exception as img_e:
804
- print(f"Error processing image file {media_path}: {img_e}")
805
- # Fallback to a grey screen?
806
- clip = ColorClip(size=TARGET_RESOLUTION, color=(50,50,50), duration=target_duration)
807
  else:
808
- print(f"Error: Unknown asset type '{asset_type}'")
809
- return None # Unknown type
810
-
811
- # Ensure clip has the correct duration after processing
812
- clip = clip.set_duration(target_duration)
813
 
814
- # --- Add Captions ---
815
- subtitle_clips = []
816
  if narration_text and CAPTION_COLOR != "transparent":
817
- print("Adding captions...")
818
  try:
819
- # Simple word splitting for timing (can be improved with proper SRT/timing info)
820
  words = narration_text.split()
821
- words_per_chunk = 5 # Adjust number of words per caption line
822
- chunks = [' '.join(words[i:i+words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
823
- if not chunks: chunks = [narration_text] # Handle empty or short text
824
-
825
- chunk_duration = target_duration / len(chunks) if len(chunks) > 0 else target_duration
826
-
827
- # Calculate font size based on resolution (heuristic)
828
- font_size = int(TARGET_RESOLUTION[1] / 25) # Adjust divisor as needed
829
-
830
- # Position captions towards the bottom
831
- subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.85) # Lower position
 
 
832
 
833
  for i, chunk_text in enumerate(chunks):
834
  start_time = i * chunk_duration
835
- # Ensure end time doesn't exceed clip duration
836
- end_time = min((i + 1) * chunk_duration, target_duration)
837
- # Avoid zero-duration captions
838
- if end_time <= start_time: end_time = start_time + 0.1
839
-
840
- # Create TextClip for the chunk
841
- # Ensure font is available in the environment (Arial is common, but might need install)
842
- # Added stroke for better visibility
843
  txt_clip = TextClip(
844
  chunk_text,
845
- fontsize=font_size,
846
- font='Arial-Bold', # Ensure this font is available or choose another like 'Liberation-Sans-Bold'
847
  color=CAPTION_COLOR,
848
- bg_color='rgba(0, 0, 0, 0.5)', # Slightly darker background
849
- method='caption', # Wraps text
850
  align='center',
851
- stroke_color='black', # Black stroke
852
- stroke_width=max(1, font_size // 20), # Stroke width relative to font size
853
- size=(TARGET_RESOLUTION[0] * 0.85, None) # Limit width
854
- ).set_start(start_time).set_duration(end_time - start_time).set_position(('center', subtitle_y_position))
855
-
856
  subtitle_clips.append(txt_clip)
857
 
858
- # Composite the main clip with subtitles
859
- if subtitle_clips:
860
- clip = CompositeVideoClip([clip] + subtitle_clips, size=TARGET_RESOLUTION)
861
- print(f"Added {len(subtitle_clips)} caption segments.")
 
 
 
 
 
 
 
862
 
863
- except Exception as caption_e:
864
- # This often happens if ImageMagick or fonts are missing/misconfigured
865
- print(f"ERROR: Failed to create captions: {caption_e}")
866
- print("Check if ImageMagick is installed and configured, and if the font (e.g., Arial-Bold) is available.")
867
- # Continue without captions if they fail
868
-
869
- # Set the audio track
870
  clip = clip.set_audio(audio_clip)
871
-
872
- print(f"Clip Segment {segment_index} created successfully.")
873
  return clip
 
 
 
874
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
875
  except Exception as e:
876
- print(f"FATAL ERROR creating clip segment {segment_index}: {e}")
877
- import traceback
878
- traceback.print_exc() # Print detailed traceback for debugging
879
- # Return a short, silent black clip to avoid crashing the concatenation
880
- return ColorClip(size=TARGET_RESOLUTION, color=(0,0,0), duration=1.0).set_audio(None)
881
-
882
-
883
- # Main Gradio Function
884
- def generate_video(video_concept, resolution_choice, caption_option):
885
- """The main function called by Gradio to generate the video."""
886
- print("\n\n--- Starting Video Generation ---")
887
- print(f"Concept: {video_concept}")
888
- print(f"Resolution: {resolution_choice}")
889
- print(f"Captions: {caption_option}")
890
-
891
- global TARGET_RESOLUTION, CAPTION_COLOR
892
- # Set global config based on input
893
- if resolution_choice == "Short (9:16)":
894
- TARGET_RESOLUTION = (1080, 1920)
895
- else: # Default to Full HD
896
  TARGET_RESOLUTION = (1920, 1080)
897
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent" # Use "transparent" to disable
 
 
 
898
 
899
- # --- Cleanup and Setup ---
900
- if os.path.exists(TEMP_FOLDER):
901
- print(f"Removing existing temp folder: {TEMP_FOLDER}")
902
- shutil.rmtree(TEMP_FOLDER)
903
- try:
904
- os.makedirs(TEMP_FOLDER)
905
- print(f"Created temp folder: {TEMP_FOLDER}")
906
- except OSError as e:
907
- print(f"Error creating temp folder {TEMP_FOLDER}: {e}")
908
- return f"Error: Could not create temporary directory. Check permissions. {e}" # Return error message to Gradio
909
-
910
- # --- Script Generation ---
911
- print("Generating script...")
912
- script = generate_script(video_concept)
913
- if not script:
914
- print("Error: Failed to generate script.")
915
- shutil.rmtree(TEMP_FOLDER) # Clean up
916
- return "Error: Failed to generate script from AI. Please try a different concept or check API keys." # Return error message
917
 
918
- # --- Script Parsing ---
919
- print("Parsing script...")
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  elements = parse_script(script)
921
  if not elements:
922
- print("Error: Failed to parse script into elements.")
923
- shutil.rmtree(TEMP_FOLDER) # Clean up
924
- return "Error: Failed to parse the generated script. The script might be malformed." # Return error message
 
925
 
926
- # Pair media prompts with TTS elements
927
  paired_elements = []
928
- if len(elements) >= 2:
929
- for i in range(0, len(elements), 2):
930
- if i + 1 < len(elements) and elements[i]['type'] == 'media' and elements[i+1]['type'] == 'tts':
931
- paired_elements.append((elements[i], elements[i+1]))
932
- else:
933
- print(f"Warning: Skipping mismatched elements at index {i}")
934
 
935
  if not paired_elements:
936
- print("Error: No valid media/TTS pairs found after parsing.")
937
- shutil.rmtree(TEMP_FOLDER) # Clean up
938
- return "Error: Could not find valid [Title]/Narration pairs in the script." # Return error message
939
-
940
- print(f"Found {len(paired_elements)} pairs of media prompts and narrations.")
941
 
942
- # --- Clip Generation Loop ---
943
  clips = []
944
- total_segments = len(paired_elements)
945
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
946
- print(f"\nProcessing Segment {idx+1}/{total_segments}: Prompt='{media_elem['prompt']}'")
947
-
948
- # 1. Generate Media (Video/Image)
949
- media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=total_segments)
950
- if not media_asset or not media_asset.get('path'):
951
- print(f"Warning: Failed to generate media for '{media_elem['prompt']}'. Skipping segment.")
952
- # Option: Create a placeholder clip instead of skipping?
953
- # clips.append(ColorClip(size=TARGET_RESOLUTION, color=(20,0,0), duration=3.0)) # Short red flash?
954
- continue # Skip this segment
955
-
956
- # 2. Generate TTS
957
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
958
  if not tts_path:
959
- print(f"Warning: Failed to generate TTS for segment {idx}. Skipping segment.")
960
- # Option: Create clip without audio? Requires adjusting create_clip
961
- continue # Skip this segment
962
-
963
- # 3. Create MoviePy Clip Segment
964
  clip = create_clip(
965
  media_path=media_asset['path'],
966
  asset_type=media_asset['asset_type'],
967
  tts_path=tts_path,
968
- duration=tts_elem['duration'], # Duration hint (create_clip prioritizes actual audio length)
969
- effects=media_elem.get('effects', 'random'),
970
  narration_text=tts_elem['text'],
971
  segment_index=idx
972
  )
973
-
974
  if clip:
975
  clips.append(clip)
976
  else:
977
- print(f"Warning: Failed to create clip for segment {idx}. Skipping.")
978
- # Maybe add a fallback black clip here too?
979
 
980
- # --- Final Video Assembly ---
981
  if not clips:
982
- print("Error: No clips were successfully created.")
983
- shutil.rmtree(TEMP_FOLDER) # Clean up
984
- return "Error: Failed to create any video segments. Check logs for media/TTS/clip creation errors." # Return error message
985
-
986
- print(f"\nConcatenating {len(clips)} video clips...")
987
- try:
988
- # Concatenate all the generated clips
989
- final_video = concatenate_videoclips(clips, method="compose") # 'compose' handles transparency if needed
990
- except Exception as concat_e:
991
- print(f"Error during video concatenation: {concat_e}")
992
  shutil.rmtree(TEMP_FOLDER)
993
- return f"Error: Failed to combine video segments: {concat_e}"
994
 
995
- # --- Add Background Music ---
996
- final_video = add_background_music(final_video, bg_music_volume=0.08) # Adjust volume as needed
 
997
 
998
- # --- Write Output File ---
999
- print(f"Writing final video to {OUTPUT_VIDEO_FILENAME}...")
1000
- try:
1001
- # Write the final video file
1002
- # Use preset 'medium' or 'slow' for better quality/compression ratio if time allows
1003
- # Use 'libx264' for wide compatibility, 'aac' for audio codec
1004
- # threads=4 can speed up encoding on multi-core CPUs
1005
- final_video.write_videofile(
1006
- OUTPUT_VIDEO_FILENAME,
1007
- codec='libx264',
1008
- audio_codec='aac',
1009
- fps=24, # Standard frame rate
1010
- preset='medium', # 'veryfast', 'fast', 'medium', 'slow', 'veryslow'
1011
- threads=4, # Adjust based on CPU cores
1012
- logger='bar' # Show progress bar
1013
- )
1014
- print("Final video written successfully.")
1015
- except Exception as write_e:
1016
- print(f"Error writing final video file: {write_e}")
1017
- shutil.rmtree(TEMP_FOLDER)
1018
- return f"Error: Failed to write the final video file: {write_e}"
1019
- finally:
1020
- # --- Cleanup ---
1021
- # Close clips to release file handles (important on some OS)
1022
- for clip in clips:
1023
- clip.close()
1024
- if final_video:
1025
- final_video.close()
1026
- if 'bg_music' in locals() and bg_music: # Close bg music if loaded
1027
- bg_music.close()
1028
- if 'audio_clip' in locals() and audio_clip: # Close last audio clip
1029
- audio_clip.close()
1030
-
1031
- print(f"Cleaning up temporary folder: {TEMP_FOLDER}")
1032
- shutil.rmtree(TEMP_FOLDER)
1033
 
 
 
 
 
1034
 
1035
- print("--- Video Generation Complete ---")
1036
- # Return the path to the generated video for Gradio
1037
  return OUTPUT_VIDEO_FILENAME
1038
 
1039
- # --- Gradio Interface Definition ---
1040
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
1041
- gr.Markdown(
1042
- """
1043
- # 🎬 AI Documentary Video Generator 🎥
1044
- Enter a concept or topic, and the AI will generate a short, humorous documentary-style video.
1045
- Configure API keys (Pexels, OpenRouter) and ensure `background_music.mp3` exists before running.
1046
- """
1047
- )
1048
- with gr.Row():
1049
- with gr.Column(scale=2):
1050
- video_concept = gr.Textbox(
1051
- label="Video Concept / Topic / Script",
1052
- placeholder="e.g., 'The secret life of squirrels', 'Why cats secretly judge us', or paste a full script starting with [Title]...",
1053
- lines=4
1054
- )
1055
- with gr.Row():
1056
- resolution = gr.Dropdown(
1057
- ["Full HD (16:9)", "Short (9:16)"],
1058
- label="Resolution",
1059
- value="Full HD (16:9)"
1060
- )
1061
- caption_option = gr.Dropdown(
1062
- ["Yes", "No"],
1063
- label="Add Captions",
1064
- value="Yes"
1065
- )
1066
- generate_btn = gr.Button("✨ Generate Video ✨", variant="primary")
1067
-
1068
- with gr.Column(scale=3):
1069
- output_video = gr.Video(label="Generated Video")
1070
- status_message = gr.Textbox(label="Status", interactive=False) # To show errors or progress
1071
-
1072
- # Connect button click to the main function
1073
- generate_btn.click(
1074
- fn=generate_video,
1075
- inputs=[video_concept, resolution, caption_option],
1076
- outputs=[output_video] # Can also output to status_message if needed
1077
- # Example with status: outputs=[output_video, status_message]
1078
- )
1079
-
1080
- # Launch the Gradio app
1081
- if __name__ == "__main__":
1082
- # Check for background music file on startup
1083
- if not os.path.exists(BACKGROUND_MUSIC_PATH):
1084
- print(f"\n*** WARNING: Background music file '{BACKGROUND_MUSIC_PATH}' not found. Background music will be skipped. ***\n")
1085
- demo.launch(debug=True) # debug=True provides more detailed logs
 
1
+ # Install necessary packages
2
+ !pip install transformers==4.49.0
3
+ !pip install moviepy gTTS requests pydub pillow
4
+ !pip cache purge
5
+ !apt-get install imagemagick -y
6
+ !pip install kokoro>=0.3.4 soundfile
7
+ !apt-get-qq -y install espeak-ng > /dev/null 2>&1
8
+ !pip install pysrt
9
+ !pip install gradio # Added Gradio installation
10
+
11
+ # Import necessary libraries
12
+ from kokoro import KPipeline
13
+ from IPython.display import display, Audio
14
+ import soundfile as sf
15
+ import torch
16
+ from IPython.display import display, Audio, HTML
17
+ import soundfile as sf
18
  import os
19
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
20
+ from PIL import Image
21
+ import tempfile
 
 
22
  import random
 
 
23
  import cv2
24
+ import math
25
+ import os, requests, io, time, re, random
26
+ from moviepy.editor import (
27
+ VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
28
+ CompositeVideoClip, TextClip
29
+ )
30
+ import moviepy.video.fx.all as vfx
31
+ import moviepy.config as mpy_config
32
+ from pydub import AudioSegment
33
+ from pydub.generators import Sine
34
+ from google.colab import files
35
+ from PIL import Image, ImageDraw, ImageFont
36
  import numpy as np
 
 
 
 
37
  from bs4 import BeautifulSoup
38
+ import base64
39
  from urllib.parse import quote
40
+ import pysrt
41
  from gtts import gTTS
42
+ import gradio as gr # Import Gradio
 
43
 
44
+ # Initialize Kokoro TTS pipeline (using American English)
45
+ pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
46
+ # Ensure ImageMagick binary is set
47
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
48
+
49
+ # ---------------- Global Configuration ---------------- #
50
+ PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
51
+ OPENROUTER_API_KEY = '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'
52
+ OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
54
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
55
 
56
+ # ---------------- Helper Functions ---------------- #
57
+ # (Your existing helper functions remain unchanged: generate_script, parse_script,
58
+ # search_pexels_videos, search_pexels_images, search_google_images, download_image,
59
+ # download_video, generate_media, generate_tts, apply_kenburns_effect,
60
+ # resize_to_fill, find_mp3_files, add_background_music, create_clip,
61
+ # fix_imagemagick_policy)
62
 
63
+ # Define these globally as they were in your original code but will be set per run
64
+ TARGET_RESOLUTION = None
65
+ CAPTION_COLOR = None
66
+ TEMP_FOLDER = None
67
 
68
  def generate_script(user_input):
69
+ """Generate documentary script with proper OpenRouter handling."""
 
 
 
 
70
  headers = {
71
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
72
+ 'HTTP-Referer': 'https://your-domain.com',
73
+ 'X-Title': 'AI Documentary Maker'
74
  }
75
+
76
+ prompt = f"""Short Documentary Script GeneratorInstructions:
77
+
78
  If I say "use this," just output the script exactly as I gave it.
79
  If I only give topics, generate a script based on them.
80
+ If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
81
+ And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 
 
 
 
 
 
82
  Formatting Rules:
83
+
84
+
85
+ Title in Square Brackets:
86
+
87
+
88
+ Each section starts with a one-word title inside [ ] (max two words if necessary).
89
+ This title will be used as a search term for Pexels footage.
90
+
91
+
92
+
93
+ Casual & Funny Narration:
94
+
95
+
96
+ Each section has 5-10 words of narration.
97
+ Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
98
+
99
+
100
+
101
+ No Special Formatting:
102
+
103
+
104
+ No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
105
+
106
+
107
+
108
+ Generalized Search Terms:
109
+
110
+
111
+ If a term is too specific, make it more general for Pexels search.
112
+
113
+
114
+
115
+ Scene-Specific Writing:
116
+
117
+
118
+ Each section describes only what should be shown in the video.
119
+
120
+
121
+
122
+ Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
123
+
124
+
125
+ No extra text, just the script.
126
+
127
+
128
 
129
  Example Output:
130
  [North Korea]
131
+
132
+ Top 5 unknown facts about North Korea.
133
+
134
  [Invisibility]
135
+
136
+ North Korea’s internet speed is so fast… it doesn’t exist.
137
+
138
  [Leadership]
139
+
140
+ Kim Jong-un once won an election with 100% votes… against himself.
141
+
142
  [Magic]
143
+
144
+ North Korea discovered time travel. That’s why their news is always from the past.
145
+
146
  [Warning]
147
+
148
+ Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
149
+
150
  [Freedom]
 
151
 
152
+ North Korean citizens can do anything… as long as it's government-approved.
153
+ Now here is the Topic/scrip: {user_input}
154
  """
155
+
156
  data = {
157
  'model': OPENROUTER_MODEL,
158
  'messages': [{'role': 'user', 'content': prompt}],
159
+ 'temperature': 0.4,
160
+ 'max_tokens': 5000
161
  }
162
+
163
  try:
164
  response = requests.post(
165
  'https://openrouter.ai/api/v1/chat/completions',
166
  headers=headers,
167
  json=data,
168
+ timeout=30
169
  )
170
+
171
+ if response.status_code == 200:
172
+ response_data = response.json()
173
+ if 'choices' in response_data and len(response_data['choices']) > 0:
174
+ return response_data['choices'][0]['message']['content']
175
+ else:
176
+ print("Unexpected response format:", response_data)
177
+ return None
 
178
  else:
179
+ print(f"API Error {response.status_code}: {response.text}")
180
  return None
181
+
 
 
182
  except Exception as e:
183
+ print(f"Request failed: {str(e)}")
184
  return None
185
 
186
  def parse_script(script_text):
187
+ """
188
+ Parse the generated script into a list of elements.
189
+ For each section, create two elements:
190
+ - A 'media' element using the section title as the visual prompt.
191
+ - A 'tts' element with the narration text, voice info, and computed duration.
192
+ """
193
  sections = {}
194
  current_title = None
195
  current_text = ""
196
+
197
  try:
198
  for line in script_text.splitlines():
199
  line = line.strip()
200
+ if line.startswith("[") and "]" in line:
201
+ bracket_start = line.find("[")
202
+ bracket_end = line.find("]", bracket_start)
203
+ if bracket_start != -1 and bracket_end != -1:
204
+ if current_title is not None:
205
+ sections[current_title] = current_text.strip()
206
+ current_title = line[bracket_start+1:bracket_end]
207
+ current_text = line[bracket_end+1:].strip()
208
+ elif current_title:
209
+ current_text += line + " "
210
+
211
+ if current_title:
 
 
 
212
  sections[current_title] = current_text.strip()
213
 
214
  elements = []
 
 
 
 
 
 
 
 
 
 
 
 
215
  for title, narration in sections.items():
216
  if not title or not narration:
 
217
  continue
218
+
219
+ media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
 
220
  words = narration.split()
221
+ duration = max(3, len(words) * 0.5)
222
+ tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
 
223
  elements.append(media_element)
224
  elements.append(tts_element)
225
 
 
 
226
  return elements
227
  except Exception as e:
228
+ print(f"Error parsing script: {e}")
229
  return []
230
 
231
+ def search_pexels_videos(query, pexels_api_key):
232
+ """Search for a video on Pexels by query and return a random HD video."""
233
+ headers = {'Authorization': pexels_api_key}
234
+ base_url = "https://api.pexels.com/videos/search"
235
+ num_pages = 3
236
+ videos_per_page = 15
237
+
238
+ max_retries = 3
239
+ retry_delay = 1
240
+
241
+ search_query = query
242
+ all_videos = []
243
+
244
+ for page in range(1, num_pages + 1):
245
+ for attempt in range(max_retries):
246
+ try:
247
+ params = {"query": search_query, "per_page": videos_per_page, "page": page}
248
+ response = requests.get(base_url, headers=headers, params=params, timeout=10)
249
+
250
+ if response.status_code == 200:
251
+ data = response.json()
252
+ videos = data.get("videos", [])
253
+
254
+ if not videos:
255
+ print(f"No videos found on page {page}.")
256
+ break
257
+
258
+ for video in videos:
259
+ video_files = video.get("video_files", [])
260
+ for file in video_files:
261
+ if file.get("quality") == "hd":
262
+ all_videos.append(file.get("link"))
263
+ break
264
+
265
+ break
266
+
267
+ elif response.status_code == 429:
268
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
269
+ time.sleep(retry_delay)
270
+ retry_delay *= 2
271
+ else:
272
+ print(f"Error fetching videos: {response.status_code} {response.text}")
273
+ if attempt < max_retries - 1:
274
+ print(f"Retrying in {retry_delay} seconds...")
275
+ time.sleep(retry_delay)
276
+ retry_delay *= 2
277
+ else:
278
+ break
279
+
280
+ except requests.exceptions.RequestException as e:
281
+ print(f"Request exception: {e}")
282
+ if attempt < max_retries - 1:
283
+ print(f"Retrying in {retry_delay} seconds...")
284
+ time.sleep(retry_delay)
285
+ retry_delay *= 2
286
+ else:
287
+ break
288
+
289
+ if all_videos:
290
+ random_video = random.choice(all_videos)
291
+ print(f"Selected random video from {len(all_videos)} HD videos")
292
+ return random_video
293
+ else:
294
+ print("No suitable videos found after searching all pages.")
295
  return None
296
 
297
+ def search_pexels_images(query, pexels_api_key):
298
+ """Search for an image on Pexels by query."""
299
+ headers = {'Authorization': pexels_api_key}
300
+ url = "https://api.pexels.com/v1/search"
301
+ params = {"query": query, "per_page": 5, "orientation": "landscape"}
302
+
303
+ max_retries = 3
304
+ retry_delay = 1
305
+
306
+ for attempt in range(max_retries):
307
  try:
308
+ response = requests.get(url, headers=headers, params=params, timeout=10)
309
+
310
+ if response.status_code == 200:
311
+ data = response.json()
312
+ photos = data.get("photos", [])
313
+ if photos:
314
+ photo = random.choice(photos[:min(5, len(photos))])
315
+ img_url = photo.get("src", {}).get("original")
316
+ return img_url
317
+ else:
318
+ print(f"No images found for query: {query}")
319
+ return None
320
+
321
+ elif response.status_code == 429:
322
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
323
+ time.sleep(retry_delay)
324
+ retry_delay *= 2
325
+ else:
326
+ print(f"Error fetching images: {response.status_code} {response.text}")
327
+ if attempt < max_retries - 1:
328
+ print(f"Retrying in {retry_delay} seconds...")
329
+ time.sleep(retry_delay)
330
+ retry_delay *= 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  except requests.exceptions.RequestException as e:
333
+ print(f"Request exception: {e}")
334
+ if attempt < max_retries - 1:
335
+ print(f"Retrying in {retry_delay} seconds...")
336
+ time.sleep(retry_delay)
337
+ retry_delay *= 2
 
338
 
339
+ print(f"No Pexels images found for query: {query} after all attempts")
340
+ return None
 
 
 
 
341
 
342
  def search_google_images(query):
343
+ """Search for images on Google Images (for news-related queries)"""
 
344
  try:
345
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
346
  headers = {"User-Agent": USER_AGENT}
347
+ response = requests.get(search_url, headers=headers, timeout=10)
 
348
  soup = BeautifulSoup(response.text, "html.parser")
349
 
 
 
 
 
350
  img_tags = soup.find_all("img")
351
+ image_urls = []
352
  for img in img_tags:
353
+ src = img.get("src", "")
354
+ if src.startswith("http") and "gstatic" not in src:
355
+ image_urls.append(src)
 
 
 
 
 
356
 
357
  if image_urls:
358
+ return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
 
359
  else:
360
+ print(f"No Google Images found for query: {query}")
361
  return None
 
 
 
362
  except Exception as e:
363
+ print(f"Error in Google Images search: {e}")
364
  return None
365
 
366
+ def download_image(image_url, filename):
367
+ """Download an image from a URL to a local file with enhanced error handling."""
368
  try:
369
+ headers = {"User-Agent": USER_AGENT}
370
+ print(f"Downloading image from: {image_url} to {filename}")
371
+ response = requests.get(image_url, headers=headers, stream=True, timeout=15)
372
  response.raise_for_status()
373
+
374
  with open(filename, 'wb') as f:
375
  for chunk in response.iter_content(chunk_size=8192):
376
  f.write(chunk)
 
377
 
378
+ print(f"Image downloaded successfully to: {filename}")
379
+
380
+ try:
381
+ img = Image.open(filename)
382
+ img.verify()
383
+ img = Image.open(filename)
384
+ if img.mode != 'RGB':
385
+ img = img.convert('RGB')
386
+ img.save(filename)
387
+ print(f"Image validated and processed: {filename}")
388
+ return filename
389
+ except Exception as e_validate:
390
+ print(f"Downloaded file is not a valid image: {e_validate}")
391
+ if os.path.exists(filename):
392
  os.remove(filename)
393
+ return None
 
 
 
 
 
 
394
 
395
+ except requests.exceptions.RequestException as e_download:
396
+ print(f"Image download error: {e_download}")
397
+ if os.path.exists(filename):
398
+ os.remove(filename)
399
+ return None
400
+ except Exception as e_general:
401
+ print(f"General error during image processing: {e_general}")
402
  if os.path.exists(filename):
403
  os.remove(filename)
404
  return None
405
+
406
+ def download_video(video_url, filename):
407
+ """Download a video from a URL to a local file."""
408
+ try:
409
+ response = requests.get(video_url, stream=True, timeout=30)
410
+ response.raise_for_status()
411
+ with open(filename, 'wb') as f:
412
+ for chunk in response.iter_content(chunk_size=8192):
413
+ f.write(chunk)
414
+ print(f"Video downloaded successfully to: {filename}")
415
+ return filename
416
  except Exception as e:
417
+ print(f"Video download error: {e}")
418
  if os.path.exists(filename):
419
  os.remove(filename)
420
  return None
421
 
422
+ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
423
+ """
424
+ Generate a visual asset by first searching for a video or using a specific search strategy.
425
+ For news-related queries, use Google Images.
426
+ Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
427
+ """
428
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
429
+
430
+ if "news" in prompt.lower():
431
+ print(f"News-related query detected: {prompt}. Using Google Images...")
432
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
433
+ image_url = search_google_images(prompt)
434
+ if image_url:
435
+ downloaded_image = download_image(image_url, image_file)
436
+ if downloaded_image:
437
+ print(f"News image saved to {downloaded_image}")
438
+ return {"path": downloaded_image, "asset_type": "image"}
439
+ else:
440
+ print(f"Google Images search failed for prompt: {prompt}")
441
+
442
+ if random.random() < 0.25:
443
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
444
+ video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
445
+ if video_url:
446
+ downloaded_video = download_video(video_url, video_file)
447
+ if downloaded_video:
448
+ print(f"Video asset saved to {downloaded_video}")
449
+ return {"path": downloaded_video, "asset_type": "video"}
450
  else:
451
+ print(f"Pexels video search failed for prompt: {prompt}")
452
 
453
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
454
+ image_url = search_pexels_images(prompt, PEXELS_API_KEY)
455
  if image_url:
456
+ downloaded_image = download_image(image_url, image_file)
457
+ if downloaded_image:
458
+ print(f"Image asset saved to {downloaded_image}")
459
+ return {"path": downloaded_image, "asset_type": "image"}
460
  else:
461
+ print(f"Pexels image download failed for prompt: {prompt}")
 
 
 
 
 
 
 
 
 
 
 
462
 
463
+ fallback_terms = ["nature", "people", "landscape", "technology", "business"]
 
 
 
464
  for term in fallback_terms:
465
+ print(f"Trying fallback image search with term: {term}")
466
+ fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
467
+ fallback_url = search_pexels_images(term, PEXELS_API_KEY)
468
  if fallback_url:
469
+ downloaded_fallback = download_image(fallback_url, fallback_file)
470
+ if downloaded_fallback:
471
+ print(f"Fallback image saved to {downloaded_fallback}")
472
+ return {"path": downloaded_fallback, "asset_type": "image"}
473
  else:
474
+ print(f"Fallback image download failed for term: {term}")
475
+ else:
476
+ print(f"Fallback image search failed for term: {term}")
477
+
478
+ print(f"Failed to generate visual asset for prompt: {prompt}")
479
+ return None
480
+
481
+ def generate_silent_audio(duration, sample_rate=24000):
482
+ """Generate a silent WAV audio file lasting 'duration' seconds."""
483
+ num_samples = int(duration * sample_rate)
484
+ silence = np.zeros(num_samples, dtype=np.float32)
485
+ silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
486
+ sf.write(silent_path, silence, sample_rate)
487
+ print(f"Silent audio generated: {silent_path}")
488
+ return silent_path
489
+
490
+ def generate_tts(text, voice):
491
+ """
492
+ Generate TTS audio using Kokoro, falling back to gTTS or silent audio if needed.
493
+ """
494
+ safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
495
+ file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
496
+
497
+ if os.path.exists(file_path):
498
+ print(f"Using cached TTS for text '{text[:10]}...'")
499
+ return file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
 
 
501
  try:
502
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
503
+ generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
504
+ audio_segments = []
505
+ for i, (gs, ps, audio) in enumerate(generator):
506
+ audio_segments.append(audio)
507
+ full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
508
+ sf.write(file_path, full_audio, 24000)
509
+ print(f"TTS audio saved to {file_path} (Kokoro)")
 
 
510
  return file_path
511
  except Exception as e:
512
+ print(f"Error with Kokoro TTS: {e}")
 
513
  try:
514
+ print("Falling back to gTTS...")
515
+ tts = gTTS(text=text, lang='en')
516
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
517
+ tts.save(mp3_path)
518
+ audio = AudioSegment.from_mp3(mp3_path)
519
+ audio.export(file_path, format="wav")
520
+ os.remove(mp3_path)
521
+ print(f"Fallback TTS saved to {file_path} (gTTS)")
522
  return file_path
523
+ except Exception as fallback_error:
524
+ print(f"Both TTS methods failed: {fallback_error}")
525
+ return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
526
 
527
+ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
528
+ """Apply a smooth Ken Burns effect with a single movement pattern."""
529
  target_w, target_h = target_resolution
 
 
 
 
 
 
 
 
 
 
 
530
  clip_aspect = clip.w / clip.h
531
  target_aspect = target_w / target_h
532
 
533
+ if clip_aspect > target_aspect:
534
  new_height = target_h
535
  new_width = int(new_height * clip_aspect)
536
+ else:
537
  new_width = target_w
538
  new_height = int(new_width / clip_aspect)
539
 
540
+ clip = clip.resize(newsize=(new_width, new_height))
541
+ base_scale = 1.15
542
+ new_width = int(new_width * base_scale)
543
+ new_height = int(new_height * base_scale)
544
+ clip = clip.resize(newsize=(new_width, new_height))
545
 
546
+ max_offset_x = new_width - target_w
547
+ max_offset_y = new_height - target_h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
 
549
+ available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
550
+ if effect_type is None or effect_type == "random":
551
+ effect_type = random.choice(available_effects)
 
552
 
553
  if effect_type == "zoom-in":
554
+ start_zoom = 0.9
555
+ end_zoom = 1.1
556
+ start_center = (new_width / 2, new_height / 2)
557
+ end_center = start_center
558
  elif effect_type == "zoom-out":
559
+ start_zoom = 1.1
560
+ end_zoom = 0.9
561
+ start_center = (new_width / 2, new_height / 2)
562
+ end_center = start_center
 
563
  elif effect_type == "pan-left":
564
+ start_zoom = 1.0
565
+ end_zoom = 1.0
566
+ start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
567
+ end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
568
  elif effect_type == "pan-right":
569
+ start_zoom = 1.0
570
+ end_zoom = 1.0
571
+ start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
572
+ end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
573
+ elif effect_type == "up-left":
574
+ start_zoom = 1.0
575
+ end_zoom = 1.0
576
+ start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
577
+ end_center = (target_w / 2, target_h / 2)
578
+ else:
579
+ raise ValueError(f"Unsupported effect_type: {effect_type}")
 
580
 
581
  def transform_frame(get_frame, t):
582
+ frame = get_frame(t)
583
+ ratio = t / clip.duration if clip.duration > 0 else 0
584
+ ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
 
585
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
586
  crop_w = int(target_w / current_zoom)
587
  crop_h = int(target_h / current_zoom)
588
+ current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
589
+ current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
 
 
 
 
 
 
 
590
  min_center_x = crop_w / 2
591
+ max_center_x = new_width - crop_w / 2
592
  min_center_y = crop_h / 2
593
+ max_center_y = new_height - crop_h / 2
 
594
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
595
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
596
+ cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
597
+ resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
598
+ return resized_frame
599
 
600
+ return clip.fl(transform_frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
  def resize_to_fill(clip, target_resolution):
603
+ """Resize and crop a clip to fill the target resolution while maintaining aspect ratio."""
604
  target_w, target_h = target_resolution
605
+ clip_aspect = clip.w / clip.h
 
 
 
 
 
 
 
 
606
  target_aspect = target_w / target_h
607
 
608
+ if clip_aspect > target_aspect:
 
 
 
 
 
 
609
  clip = clip.resize(height=target_h)
 
610
  crop_amount = (clip.w - target_w) / 2
611
+ clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
 
 
 
612
  else:
 
 
613
  clip = clip.resize(width=target_w)
 
614
  crop_amount = (clip.h - target_h) / 2
615
+ clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
616
+
617
+ return clip
618
+
619
+ def find_mp3_files():
620
+ """Search for any MP3 files in the current directory and subdirectories."""
621
+ mp3_files = []
622
+ for root, dirs, files in os.walk('.'):
623
+ for file in files:
624
+ if file.endswith('.mp3'):
625
+ mp3_path = os.path.join(root, file)
626
+ mp3_files.append(mp3_path)
627
+ print(f"Found MP3 file: {mp3_path}")
628
+ return mp3_files[0] if mp3_files else None
629
+
630
+ def add_background_music(final_video, bg_music_volume=0.08):
631
+ """Add background music to the final video using any MP3 file found."""
632
  try:
633
+ bg_music_path = find_mp3_files()
634
+ if bg_music_path and os.path.exists(bg_music_path):
635
+ print(f"Adding background music from: {bg_music_path}")
636
+ bg_music = AudioFileClip(bg_music_path)
637
+ if bg_music.duration < final_video.duration:
638
+ loops_needed = math.ceil(final_video.duration / bg_music.duration)
639
+ bg_segments = [bg_music] * loops_needed
640
+ bg_music = concatenate_audioclips(bg_segments)
641
+ bg_music = bg_music.subclip(0, final_video.duration)
642
+ bg_music = bg_music.volumex(bg_music_volume)
643
+ video_audio = final_video.audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  mixed_audio = CompositeAudioClip([video_audio, bg_music])
645
+ final_video = final_video.set_audio(mixed_audio)
646
+ print("Background music added successfully")
647
  else:
648
+ print("No MP3 files found, skipping background music")
 
 
 
 
 
649
  return final_video
 
650
  except Exception as e:
651
  print(f"Error adding background music: {e}")
652
+ print("Continuing without background music")
653
  return final_video
654
 
 
655
  def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
656
+ """Create a video clip with synchronized subtitles and narration."""
 
 
 
 
 
657
  try:
658
+ print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
659
+ if not os.path.exists(media_path) or not os.path.exists(tts_path):
660
+ print("Missing media or TTS file")
 
 
 
 
661
  return None
662
 
663
+ audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
664
+ audio_duration = audio_clip.duration
665
+ target_duration = audio_duration + 0.2
 
 
 
 
 
 
 
 
666
 
 
 
 
 
667
  if asset_type == "video":
668
+ clip = VideoFileClip(media_path)
669
+ clip = resize_to_fill(clip, TARGET_RESOLUTION)
670
+ if clip.duration < target_duration:
671
+ clip = clip.loop(duration=target_duration)
672
+ else:
673
+ clip = clip.subclip(0, target_duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674
  elif asset_type == "image":
675
+ img = Image.open(media_path)
676
+ if img.mode != 'RGB':
677
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp:
678
+ img.convert('RGB').save(temp.name)
679
+ media_path = temp.name
680
+ img.close()
681
+ clip = ImageClip(media_path).set_duration(target_duration)
682
+ clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
683
+ clip = clip.fadein(0.3).fadeout(0.3)
 
 
 
 
 
 
684
  else:
685
+ return None
 
 
 
 
686
 
 
 
687
  if narration_text and CAPTION_COLOR != "transparent":
 
688
  try:
 
689
  words = narration_text.split()
690
+ chunks = []
691
+ current_chunk = []
692
+ for word in words:
693
+ current_chunk.append(word)
694
+ if len(current_chunk) >= 5:
695
+ chunks.append(' '.join(current_chunk))
696
+ current_chunk = []
697
+ if current_chunk:
698
+ chunks.append(' '.join(current_chunk))
699
+
700
+ chunk_duration = audio_duration / len(chunks)
701
+ subtitle_clips = []
702
+ subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
703
 
704
  for i, chunk_text in enumerate(chunks):
705
  start_time = i * chunk_duration
706
+ end_time = (i + 1) * chunk_duration
 
 
 
 
 
 
 
707
  txt_clip = TextClip(
708
  chunk_text,
709
+ fontsize=45,
710
+ font='Arial-Bold',
711
  color=CAPTION_COLOR,
712
+ bg_color='rgba(0, 0, 0, 0.25)',
713
+ method='caption',
714
  align='center',
715
+ stroke_width=2,
716
+ stroke_color=CAPTION_COLOR,
717
+ size=(TARGET_RESOLUTION[0] * 0.8, None)
718
+ ).set_start(start_time).set_end(end_time)
719
+ txt_clip = txt_clip.set_position(('center', subtitle_y_position))
720
  subtitle_clips.append(txt_clip)
721
 
722
+ clip = CompositeVideoClip([clip] + subtitle_clips)
723
+ except Exception as sub_error:
724
+ print(f"Subtitle error: {sub_error}")
725
+ txt_clip = TextClip(
726
+ narration_text,
727
+ fontsize=28,
728
+ color=CAPTION_COLOR,
729
+ align='center',
730
+ size=(TARGET_RESOLUTION[0] * 0.7, None)
731
+ ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
732
+ clip = CompositeVideoClip([clip, txt_clip])
733
 
 
 
 
 
 
 
 
734
  clip = clip.set_audio(audio_clip)
735
+ print(f"Clip created: {clip.duration:.1f}s")
 
736
  return clip
737
+ except Exception as e:
738
+ print(f"Error in create_clip: {str(e)}")
739
+ return None
740
 
741
+ def fix_imagemagick_policy():
742
+ """Fix ImageMagick security policies."""
743
+ try:
744
+ print("Attempting to fix ImageMagick security policies...")
745
+ policy_paths = [
746
+ "/etc/ImageMagick-6/policy.xml",
747
+ "/etc/ImageMagick-7/policy.xml",
748
+ "/etc/ImageMagick/policy.xml",
749
+ "/usr/local/etc/ImageMagick-7/policy.xml"
750
+ ]
751
+ found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
752
+ if not found_policy:
753
+ print("No policy.xml found. Using alternative subtitle method.")
754
+ return False
755
+ print(f"Modifying policy file at {found_policy}")
756
+ os.system(f"sudo cp {found_policy} {found_policy}.bak")
757
+ os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
758
+ os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
759
+ os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
760
+ print("ImageMagick policies updated successfully.")
761
+ return True
762
  except Exception as e:
763
+ print(f"Error fixing policies: {e}")
764
+ return False
765
+
766
+ # ---------------- Main Function with Gradio Integration ---------------- #
767
+ def generate_video(user_input, resolution, caption_option):
768
+ """Generate a video based on user input via Gradio."""
769
+ global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
770
+ import shutil
771
+
772
+ # Set resolution
773
+ if resolution == "Full":
 
 
 
 
 
 
 
 
 
774
  TARGET_RESOLUTION = (1920, 1080)
775
+ elif resolution == "Short":
776
+ TARGET_RESOLUTION = (1080, 1920)
777
+ else:
778
+ TARGET_RESOLUTION = (1920, 1080) # Default
779
 
780
+ # Set caption color
781
+ CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
 
783
+ # Create a unique temporary folder
784
+ TEMP_FOLDER = tempfile.mkdtemp()
785
+
786
+ # Fix ImageMagick policy
787
+ fix_success = fix_imagemagick_policy()
788
+ if not fix_success:
789
+ print("Will use alternative methods if needed")
790
+
791
+ print("Generating script from API...")
792
+ script = generate_script(user_input)
793
+ if not script:
794
+ print("Failed to generate script.")
795
+ shutil.rmtree(TEMP_FOLDER)
796
+ return None
797
+ print("Generated Script:\n", script)
798
  elements = parse_script(script)
799
  if not elements:
800
+ print("Failed to parse script into elements.")
801
+ shutil.rmtree(TEMP_FOLDER)
802
+ return None
803
+ print(f"Parsed {len(elements)//2} script segments.")
804
 
 
805
  paired_elements = []
806
+ for i in range(0, len(elements), 2):
807
+ if i + 1 < len(elements):
808
+ paired_elements.append((elements[i], elements[i + 1]))
 
 
 
809
 
810
  if not paired_elements:
811
+ print("No valid script segments found.")
812
+ shutil.rmtree(TEMP_FOLDER)
813
+ return None
 
 
814
 
 
815
  clips = []
 
816
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
817
+ print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
818
+ media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
819
+ if not media_asset:
820
+ print(f"Skipping segment {idx+1} due to missing media asset.")
821
+ continue
 
 
 
 
 
 
822
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
823
  if not tts_path:
824
+ print(f"Skipping segment {idx+1} due to TTS generation failure.")
825
+ continue
 
 
 
826
  clip = create_clip(
827
  media_path=media_asset['path'],
828
  asset_type=media_asset['asset_type'],
829
  tts_path=tts_path,
830
+ duration=tts_elem['duration'],
831
+ effects=media_elem.get('effects', 'fade-in'),
832
  narration_text=tts_elem['text'],
833
  segment_index=idx
834
  )
 
835
  if clip:
836
  clips.append(clip)
837
  else:
838
+ print(f"Clip creation failed for segment {idx+1}.")
 
839
 
 
840
  if not clips:
841
+ print("No clips were successfully created.")
 
 
 
 
 
 
 
 
 
842
  shutil.rmtree(TEMP_FOLDER)
843
+ return None
844
 
845
+ print("\nConcatenating clips...")
846
+ final_video = concatenate_videoclips(clips, method="compose")
847
+ final_video = add_background_music(final_video, bg_music_volume=0.08)
848
 
849
+ print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
850
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
851
+ print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
852
 
853
+ # Clean up
854
+ print("Cleaning up temporary files...")
855
+ shutil.rmtree(TEMP_FOLDER)
856
+ print("Temporary files removed.")
857
 
 
 
858
  return OUTPUT_VIDEO_FILENAME
859
 
860
+ # ---------------- Gradio Interface ---------------- #
861
+ iface = gr.Interface(
862
+ fn=generate_video,
863
+ inputs=[
864
+ gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
865
+ gr.Radio(["Full", "Short"], label="Resolution", value="Full"),
866
+ gr.Radio(["Yes", "No"], label="Captions", value="Yes")
867
+ ],
868
+ outputs=gr.Video(label="Generated Video"),
869
+ title="AI Documentary Video Generator",
870
+ description="Create a funny documentary-style video based on your concept. Note: Generation may take several minutes on CPU."
871
+ )
872
+
873
+ # Launch the interface
874
+ iface.launch(share=True)