testdeep123 commited on
Commit
45faa4c
·
verified ·
1 Parent(s): 97ed4cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +564 -229
app.py CHANGED
@@ -1,5 +1,45 @@
1
- # app.py
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import soundfile as sf
4
  import torch
5
  from IPython.display import display, Audio, HTML
@@ -14,12 +54,13 @@ import math
14
  import os, requests, io, time, re, random
15
  from moviepy.editor import (
16
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
17
- CompositeVideoClip, TextClip, CompositeAudioClip
18
  )
19
  import moviepy.video.fx.all as vfx
20
  import moviepy.config as mpy_config
21
  from pydub import AudioSegment
22
  from pydub.generators import Sine
 
23
  from PIL import Image, ImageDraw, ImageFont
24
  import numpy as np
25
  from bs4 import BeautifulSoup
@@ -27,42 +68,54 @@ import base64
27
  from urllib.parse import quote
28
  import pysrt
29
  from gtts import gTTS
30
- import shutil
31
- import webbrowser # This won't work in HF Spaces, but keep for local testing reference
32
- import sys
33
 
34
- # --- API Keys (Embed directly as requested for private space) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
36
  OPENROUTER_API_KEY = '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'
37
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
38
-
39
- # --- Global Configuration ---
40
  TEMP_FOLDER = "temp_video_processing"
41
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
42
- USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
43
 
44
- # --- Initialize Kokoro TTS pipeline (if compatible with HF CPU) ---
45
- # NOTE: Kokoro might be too resource-intensive for free CPU spaces.
46
- # If it causes issues, you might need to remove it and rely solely on gTTS.
47
- try:
48
- from kokoro import KPipeline
49
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
50
- print("Kokoro TTS pipeline initialized.")
51
- except ImportError:
52
- print("Kokoro library not found or failed to initialize. Will rely on gTTS.")
53
- pipeline = None
54
- except Exception as e:
55
- print(f"Error initializing Kokoro: {e}. Will rely on gTTS.")
56
- pipeline = None
57
-
58
-
59
- # Ensure ImageMagick binary is set (might need adjustment for HF Spaces)
60
- # This path might differ in the HF environment.
61
- # If this causes issues, you might need to use a Dockerfile or configure the space differently.
62
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
63
 
64
- # --- Helper Functions (from original script) ---
 
 
65
 
 
66
  def generate_script(user_input):
67
  """Generate documentary script with proper OpenRouter handling."""
68
  headers = {
@@ -166,7 +219,9 @@ Now here is the Topic/scrip: {user_input}
166
  timeout=30
167
  )
168
 
169
- print("API Response Status:", response.status_code)
 
 
170
  if response.status_code == 200:
171
  response_data = response.json()
172
  if 'choices' in response_data and len(response_data['choices']) > 0:
@@ -197,25 +252,28 @@ def parse_script(script_text):
197
  for line in script_text.splitlines():
198
  line = line.strip()
199
  if line.startswith("[") and "]" in line:
 
200
  bracket_start = line.find("[")
201
  bracket_end = line.find("]", bracket_start)
202
  if bracket_start != -1 and bracket_end != -1:
203
  if current_title is not None:
204
  sections[current_title] = current_text.strip()
205
  current_title = line[bracket_start+1:bracket_end]
206
- current_text = line[bracket_end+1:].strip()
207
  elif current_title:
208
  current_text += line + " "
209
 
 
210
  if current_title:
211
  sections[current_title] = current_text.strip()
212
 
213
  elements = []
214
  for title, narration in sections.items():
215
- if not title or not narration:
216
  continue
217
 
218
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
 
219
  words = narration.split()
220
  duration = max(3, len(words) * 0.5)
221
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
@@ -231,10 +289,13 @@ def search_pexels_videos(query, pexels_api_key):
231
  """Search for a video on Pexels by query and return a random HD video."""
232
  headers = {'Authorization': pexels_api_key}
233
  base_url = "https://api.pexels.com/videos/search"
234
- num_pages = 3
235
  videos_per_page = 15
 
 
236
  max_retries = 3
237
  retry_delay = 1
 
238
  search_query = query
239
  all_videos = []
240
 
@@ -247,35 +308,50 @@ def search_pexels_videos(query, pexels_api_key):
247
  if response.status_code == 200:
248
  data = response.json()
249
  videos = data.get("videos", [])
 
250
  if not videos:
251
- break
 
 
 
252
  for video in videos:
253
  video_files = video.get("video_files", [])
254
  for file in video_files:
255
- if file.get("quality") == "hd":
256
  all_videos.append(file.get("link"))
257
- break
258
- break
259
- elif response.status_code == 429:
 
 
 
260
  time.sleep(retry_delay)
261
  retry_delay *= 2
262
  else:
 
263
  if attempt < max_retries - 1:
 
264
  time.sleep(retry_delay)
265
  retry_delay *= 2
266
  else:
267
  break
 
268
  except requests.exceptions.RequestException as e:
 
269
  if attempt < max_retries - 1:
 
270
  time.sleep(retry_delay)
271
  retry_delay *= 2
272
  else:
273
  break
274
 
275
  if all_videos:
 
276
  random_video = random.choice(all_videos)
 
277
  return random_video
278
  else:
 
279
  return None
280
 
281
  def search_pexels_images(query, pexels_api_key):
@@ -283,33 +359,46 @@ def search_pexels_images(query, pexels_api_key):
283
  headers = {'Authorization': pexels_api_key}
284
  url = "https://api.pexels.com/v1/search"
285
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
 
 
286
  max_retries = 3
287
  retry_delay = 1
288
 
289
  for attempt in range(max_retries):
290
  try:
291
  response = requests.get(url, headers=headers, params=params, timeout=10)
 
292
  if response.status_code == 200:
293
  data = response.json()
294
  photos = data.get("photos", [])
295
  if photos:
 
296
  photo = random.choice(photos[:min(5, len(photos))])
297
  img_url = photo.get("src", {}).get("original")
298
  return img_url
299
  else:
 
300
  return None
301
- elif response.status_code == 429:
 
 
302
  time.sleep(retry_delay)
303
  retry_delay *= 2
304
  else:
 
305
  if attempt < max_retries - 1:
 
306
  time.sleep(retry_delay)
307
  retry_delay *= 2
 
308
  except requests.exceptions.RequestException as e:
 
309
  if attempt < max_retries - 1:
 
310
  time.sleep(retry_delay)
311
  retry_delay *= 2
312
 
 
313
  return None
314
 
315
  def search_google_images(query):
@@ -319,15 +408,21 @@ def search_google_images(query):
319
  headers = {"User-Agent": USER_AGENT}
320
  response = requests.get(search_url, headers=headers, timeout=10)
321
  soup = BeautifulSoup(response.text, "html.parser")
 
 
322
  img_tags = soup.find_all("img")
 
 
323
  image_urls = []
324
  for img in img_tags:
325
  src = img.get("src", "")
326
  if src.startswith("http") and "gstatic" not in src:
327
  image_urls.append(src)
 
328
  if image_urls:
329
  return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
330
  else:
 
331
  return None
332
  except Exception as e:
333
  print(f"Error in Google Images search: {e}")
@@ -337,24 +432,33 @@ def download_image(image_url, filename):
337
  """Download an image from a URL to a local file with enhanced error handling."""
338
  try:
339
  headers = {"User-Agent": USER_AGENT}
 
340
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
341
  response.raise_for_status()
 
342
  with open(filename, 'wb') as f:
343
  for chunk in response.iter_content(chunk_size=8192):
344
  f.write(chunk)
 
 
 
 
345
  try:
346
  img = Image.open(filename)
347
- img.verify()
 
348
  img = Image.open(filename)
349
  if img.mode != 'RGB':
350
  img = img.convert('RGB')
351
  img.save(filename)
 
352
  return filename
353
  except Exception as e_validate:
354
  print(f"Downloaded file is not a valid image: {e_validate}")
355
  if os.path.exists(filename):
356
  os.remove(filename)
357
  return None
 
358
  except requests.exceptions.RequestException as e_download:
359
  print(f"Image download error: {e_download}")
360
  if os.path.exists(filename):
@@ -374,6 +478,7 @@ def download_video(video_url, filename):
374
  with open(filename, 'wb') as f:
375
  for chunk in response.iter_content(chunk_size=8192):
376
  f.write(chunk)
 
377
  return filename
378
  except Exception as e:
379
  print(f"Video download error: {e}")
@@ -387,44 +492,55 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
387
  For news-related queries, use Google Images.
388
  Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
389
  """
 
390
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
391
 
 
392
  if "news" in prompt.lower():
 
393
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
394
  image_url = search_google_images(prompt)
395
  if image_url:
396
  downloaded_image = download_image(image_url, image_file)
397
  if downloaded_image:
 
398
  return {"path": downloaded_image, "asset_type": "image"}
399
  else:
400
  print(f"Google Images search failed for prompt: {prompt}")
401
 
402
- if random.random() < 0.25:
 
403
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
404
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
405
  if video_url:
406
  downloaded_video = download_video(video_url, video_file)
407
  if downloaded_video:
 
408
  return {"path": downloaded_video, "asset_type": "video"}
409
  else:
410
  print(f"Pexels video search failed for prompt: {prompt}")
411
 
 
412
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
413
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
414
  if image_url:
415
  downloaded_image = download_image(image_url, image_file)
416
  if downloaded_image:
 
417
  return {"path": downloaded_image, "asset_type": "image"}
418
  else:
419
  print(f"Pexels image download failed for prompt: {prompt}")
420
 
 
421
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
422
  for term in fallback_terms:
 
423
  fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
424
  fallback_url = search_pexels_images(term, PEXELS_API_KEY)
425
  if fallback_url:
426
  downloaded_fallback = download_image(fallback_url, fallback_file)
427
  if downloaded_fallback:
 
428
  return {"path": downloaded_fallback, "asset_type": "image"}
429
  else:
430
  print(f"Fallback image download failed for term: {term}")
@@ -434,10 +550,73 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
434
  print(f"Failed to generate visual asset for prompt: {prompt}")
435
  return None
436
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  def generate_silent_audio(duration, sample_rate=24000):
438
  """
439
  Generate a silent WAV audio file lasting 'duration' seconds.
440
  """
 
 
441
  num_samples = int(duration * sample_rate)
442
  silence = np.zeros(num_samples, dtype=np.float32)
443
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
@@ -457,67 +636,77 @@ def generate_tts(text, voice):
457
  print(f"Using cached TTS for text '{text[:10]}...'")
458
  return file_path
459
 
460
- # Try Kokoro first
461
- if pipeline:
 
 
 
 
 
 
 
 
 
 
462
  try:
463
- kokoro_voice = 'af_heart' if voice == 'en' else voice
464
- generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
465
- audio_segments = []
466
- for i, (gs, ps, audio) in enumerate(generator):
467
- audio_segments.append(audio)
468
- full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
469
- sf.write(file_path, full_audio, 24000)
470
- print(f"TTS audio saved to {file_path} (Kokoro)")
 
471
  return file_path
472
- except Exception as e:
473
- print(f"Error with Kokoro TTS: {e}")
 
 
474
 
475
- # Fallback to gTTS
476
- try:
477
- print("Falling back to gTTS...")
478
- tts = gTTS(text=text, lang='en')
479
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
480
- tts.save(mp3_path)
481
- audio = AudioSegment.from_mp3(mp3_path)
482
- audio.export(file_path, format="wav")
483
- if os.path.exists(mp3_path):
484
- os.remove(mp3_path)
485
- print(f"Fallback TTS saved to {file_path} (gTTS)")
486
- return file_path
487
- except Exception as fallback_error:
488
- print(f"Both TTS methods failed: {fallback_error}")
489
- # Generate silent audio as fallback
490
- return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
491
 
492
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
493
  """
494
  Apply a smooth Ken Burns effect with a single, clean movement pattern.
495
  """
 
496
  target_w, target_h = target_resolution
 
 
 
497
  clip_aspect = clip.w / clip.h
498
  target_aspect = target_w / target_h
499
 
500
- if clip_aspect > target_aspect:
 
501
  new_height = target_h
502
  new_width = int(new_height * clip_aspect)
503
- else:
504
  new_width = target_w
505
  new_height = int(new_width / clip_aspect)
506
 
 
507
  clip = clip.resize(newsize=(new_width, new_height))
508
 
 
509
  base_scale = 1.15
510
  new_width = int(new_width * base_scale)
511
  new_height = int(new_height * base_scale)
512
  clip = clip.resize(newsize=(new_width, new_height))
513
 
 
 
514
  max_offset_x = new_width - target_w
515
  max_offset_y = new_height - target_h
516
 
 
517
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
 
 
518
  if effect_type is None or effect_type == "random":
519
  effect_type = random.choice(available_effects)
520
 
 
521
  if effect_type == "zoom-in":
522
  start_zoom = 0.9
523
  end_zoom = 1.1
@@ -546,18 +735,23 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
546
  else:
547
  raise ValueError(f"Unsupported effect_type: {effect_type}")
548
 
 
549
  def transform_frame(get_frame, t):
550
  frame = get_frame(t)
 
551
  ratio = t / clip.duration if clip.duration > 0 else 0
552
- ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
553
 
 
554
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
555
  crop_w = int(target_w / current_zoom)
556
  crop_h = int(target_h / current_zoom)
557
 
 
558
  current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
559
  current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
560
 
 
561
  min_center_x = crop_w / 2
562
  max_center_x = new_width - crop_w / 2
563
  min_center_y = crop_h / 2
@@ -565,96 +759,103 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
565
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
566
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
567
 
568
- # Ensure frame is numpy array and correct type for cv2
569
- if isinstance(frame, Image.Image):
570
- frame = np.array(frame)
571
- if frame.dtype != np.uint8:
572
- frame = frame.astype(np.uint8)
573
-
574
- # Ensure frame has 3 channels for color images
575
- if len(frame.shape) == 2: # Grayscale
576
- frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
577
- elif frame.shape[2] == 4: # RGBA
578
- frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
579
-
580
- # Ensure crop dimensions are valid
581
- crop_w = max(1, crop_w)
582
- crop_h = max(1, crop_h)
583
-
584
- # Ensure center point is valid for cv2.getRectSubPix
585
- current_center_x = max(0, min(current_center_x, frame.shape[1] - 1))
586
- current_center_y = max(0, min(current_center_y, frame.shape[0] - 1))
587
-
588
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
589
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
590
 
591
  return resized_frame
592
 
 
593
  return clip.fl(transform_frame)
594
 
 
 
 
 
 
 
595
  def resize_to_fill(clip, target_resolution):
596
  """
597
  Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
 
 
 
 
 
 
 
598
  """
599
  target_w, target_h = target_resolution
600
  clip_aspect = clip.w / clip.h
601
  target_aspect = target_w / target_h
602
 
603
  if clip_aspect > target_aspect:
 
604
  clip = clip.resize(height=target_h)
605
  crop_amount = (clip.w - target_w) / 2
606
  clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
607
  else:
 
608
  clip = clip.resize(width=target_w)
609
  crop_amount = (clip.h - target_h) / 2
610
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
611
 
612
  return clip
613
 
 
614
  def find_mp3_files():
615
  """
616
- Search for a background music file (e.g., background_music.mp3) in the current directory.
617
  Returns the path to the first MP3 file found or None if none is found.
618
  """
619
- # Look for a specific file name first
620
- bg_music_path = "background_music.mp3"
621
- if os.path.exists(bg_music_path):
622
- print(f"Found background music file: {bg_music_path}")
623
- return bg_music_path
624
-
625
- # If not found, search for any mp3 in the current directory
626
- for file in os.listdir('.'):
627
- if file.endswith('.mp3'):
628
- print(f"Found background music file: {file}")
629
- return file
630
-
631
- print("No background music file found in the current directory.")
632
  return None
633
 
 
634
  def add_background_music(final_video, bg_music_volume=0.08):
635
  """Add background music to the final video using any MP3 file found in directories."""
636
  try:
 
637
  bg_music_path = find_mp3_files()
 
638
  if bg_music_path and os.path.exists(bg_music_path):
639
  print(f"Adding background music from: {bg_music_path}")
 
640
  bg_music = AudioFileClip(bg_music_path)
 
 
641
  if bg_music.duration < final_video.duration:
642
  loops_needed = math.ceil(final_video.duration / bg_music.duration)
643
  bg_segments = [bg_music] * loops_needed
644
  bg_music = concatenate_audioclips(bg_segments)
 
 
645
  bg_music = bg_music.subclip(0, final_video.duration)
 
 
646
  bg_music = bg_music.volumex(bg_music_volume)
647
 
 
648
  video_audio = final_video.audio
649
- if video_audio:
650
- mixed_audio = CompositeAudioClip([video_audio, bg_music])
651
- else:
652
- mixed_audio = bg_music
653
 
 
654
  final_video = final_video.set_audio(mixed_audio)
655
  print("Background music added successfully")
656
  else:
657
- print("No suitable background music file found, skipping background music")
658
 
659
  return final_video
660
 
@@ -663,7 +864,10 @@ def add_background_music(final_video, bg_music_volume=0.08):
663
  print("Continuing without background music")
664
  return final_video
665
 
666
- def create_clip(media_path, asset_type, tts_path, duration, effects, narration_text, segment_index, target_resolution, caption_color):
 
 
 
667
  """Create a video clip with synchronized subtitles and properly timed narration."""
668
  try:
669
  print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
@@ -672,13 +876,15 @@ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_t
672
  print("Missing media or TTS file")
673
  return None
674
 
 
675
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
676
  audio_duration = audio_clip.duration
677
  target_duration = audio_duration + 0.2
678
 
 
679
  if asset_type == "video":
680
  clip = VideoFileClip(media_path)
681
- clip = resize_to_fill(clip, target_resolution)
682
  if clip.duration < target_duration:
683
  clip = clip.loop(duration=target_duration)
684
  else:
@@ -692,62 +898,72 @@ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_t
692
  img.close()
693
 
694
  clip = ImageClip(media_path).set_duration(target_duration)
695
- clip = apply_kenburns_effect(clip, target_resolution)
696
  clip = clip.fadein(0.3).fadeout(0.3)
697
  else:
698
  return None
699
 
700
- # Add subtitles
701
- if narration_text and caption_color != "transparent":
702
  try:
 
703
  words = narration_text.split()
704
  chunks = []
705
  current_chunk = []
 
 
706
  for word in words:
707
  current_chunk.append(word)
708
- if len(current_chunk) >= 5:
709
  chunks.append(' '.join(current_chunk))
710
  current_chunk = []
 
 
711
  if current_chunk:
712
  chunks.append(' '.join(current_chunk))
713
 
714
- chunk_duration = audio_duration / len(chunks) if len(chunks) > 0 else audio_duration
 
715
  subtitle_clips = []
716
 
717
- subtitle_y_position = int(target_resolution[1] * 0.70)
 
718
 
719
  for i, chunk_text in enumerate(chunks):
720
  start_time = i * chunk_duration
721
  end_time = (i + 1) * chunk_duration
722
 
 
723
  txt_clip = TextClip(
724
  chunk_text,
725
  fontsize=45,
726
  font='Arial-Bold',
727
- color=caption_color,
728
  bg_color='rgba(0, 0, 0, 0.25)',
729
  method='caption',
730
  align='center',
731
- stroke_width=2,
732
- stroke_color=caption_color,
733
- size=(target_resolution[0] * 0.8, None)
734
  ).set_start(start_time).set_end(end_time)
735
 
 
736
  txt_clip = txt_clip.set_position(('center', subtitle_y_position))
737
  subtitle_clips.append(txt_clip)
738
 
 
739
  clip = CompositeVideoClip([clip] + subtitle_clips)
740
 
741
  except Exception as sub_error:
742
  print(f"Subtitle error: {sub_error}")
743
- # Fallback to simpler subtitle if needed
744
  txt_clip = TextClip(
745
  narration_text,
746
  fontsize=28,
747
- color=caption_color,
748
  align='center',
749
- size=(target_resolution[0] * 0.7, None)
750
- ).set_position(('center', int(target_resolution[1] / 3))).set_duration(clip.duration)
751
  clip = CompositeVideoClip([clip, txt_clip])
752
 
753
  clip = clip.set_audio(audio_clip)
@@ -758,140 +974,259 @@ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_t
758
  print(f"Error in create_clip: {str(e)}")
759
  return None
760
 
761
- # --- Main Video Generation Function ---
762
- def generate_video(user_input, resolution_choice, caption_option, caption_color_input, progress=gr.Progress()):
763
- """
764
- Main function to orchestrate video generation based on Gradio inputs.
765
- """
766
- progress(0, desc="Starting video generation...")
767
 
768
- # Set target resolution
769
- if resolution_choice == "Full":
770
- target_resolution = (1920, 1080)
771
- elif resolution_choice == "Short":
772
- target_resolution = (1080, 1920)
773
- else:
774
- return "Invalid resolution choice.", None
775
 
776
- # Set caption color
777
- caption_color = caption_color_input if caption_option == "Yes" else "transparent"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778
 
779
- # Create temporary folder
780
- if os.path.exists(TEMP_FOLDER):
781
- shutil.rmtree(TEMP_FOLDER)
782
- os.makedirs(TEMP_FOLDER)
783
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
  try:
785
- progress(0.1, desc="Generating script...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
786
  script = generate_script(user_input)
787
  if not script:
788
- return "Failed to generate script.", None
789
- print("Generated Script:\n", script)
790
-
791
- progress(0.2, desc="Parsing script...")
792
  elements = parse_script(script)
793
  if not elements:
794
- return "Failed to parse script into elements.", None
795
- print(f"Parsed {len(elements)//2} script segments.")
796
-
797
  paired_elements = []
798
  for i in range(0, len(elements), 2):
799
- if i + 1 < len(elements):
800
  paired_elements.append((elements[i], elements[i+1]))
801
-
802
- if not paired_elements:
803
- return "No valid script segments found.", None
804
-
 
805
  clips = []
806
- total_segments = len(paired_elements)
807
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
808
- progress(0.3 + (idx * 0.5 / total_segments), desc=f"Processing segment {idx+1}/{total_segments}...")
809
- print(f"\nProcessing segment {idx+1}/{total_segments} with prompt: '{media_elem['prompt']}'")
810
-
811
- media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=total_segments)
812
  if not media_asset:
813
- print(f"Skipping segment {idx+1} due to missing media asset.")
814
  continue
815
-
816
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
817
  if not tts_path:
818
- print(f"Skipping segment {idx+1} due to TTS generation failure.")
819
  continue
820
-
821
  clip = create_clip(
822
  media_path=media_asset['path'],
823
  asset_type=media_asset['asset_type'],
824
  tts_path=tts_path,
825
- duration=tts_elem['duration'],
826
- effects=media_elem.get('effects', 'fade-in'),
827
  narration_text=tts_elem['text'],
828
- segment_index=idx,
829
- target_resolution=target_resolution,
830
- caption_color=caption_color
831
  )
832
  if clip:
833
  clips.append(clip)
834
- else:
835
- print(f"Clip creation failed for segment {idx+1}.")
836
-
837
  if not clips:
838
- return "No clips were successfully created.", None
839
-
840
- progress(0.8, desc="Concatenating clips...")
841
  final_video = concatenate_videoclips(clips, method="compose")
842
-
843
- progress(0.9, desc="Adding background music...")
844
- final_video = add_background_music(final_video, bg_music_volume=0.08)
845
-
846
- progress(0.95, desc="Exporting final video...")
847
- output_path = os.path.join(TEMP_FOLDER, OUTPUT_VIDEO_FILENAME)
848
- final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast')
849
-
850
- progress(1.0, desc="Video generation complete.")
851
- return "Video generated successfully!", output_path
852
-
 
 
853
  except Exception as e:
854
- print(f"An error occurred: {e}")
855
- return f"An error occurred: {e}", None
856
-
857
  finally:
858
- # Clean up temporary files
859
- if os.path.exists(TEMP_FOLDER):
860
- shutil.rmtree(TEMP_FOLDER)
861
- print("Temporary files removed.")
862
-
863
- # --- Gradio Interface ---
864
- with gr.Blocks() as demo:
865
- gr.Markdown("# AI Documentary Video Generator")
866
- gr.Markdown("Enter a concept, choose settings, and generate a short documentary video.")
867
-
868
- with gr.Row():
869
- user_input = gr.Textbox(label="Video Concept", placeholder="e.g., The secret life of squirrels")
870
- resolution_choice = gr.Radio(["Full", "Short"], label="Target Resolution", value="Short")
871
-
872
  with gr.Row():
873
- caption_option = gr.Radio(["Yes", "No"], label="Add Captions?", value="Yes")
874
- caption_color_input = gr.Textbox(label="Caption Color (e.g., white, yellow)", value="white", visible=True)
875
-
876
- # Update caption color visibility based on caption option
877
- caption_option.change(
878
- lambda x: gr.update(visible=x == "Yes"),
879
- inputs=caption_option,
880
- outputs=caption_color_input
881
- )
882
-
883
- generate_button = gr.Button("Generate Video")
884
- status_output = gr.Textbox(label="Status", interactive=False)
885
- video_output = gr.Video(label="Generated Video")
886
-
887
- generate_button.click(
888
- fn=generate_video,
889
- inputs=[user_input, resolution_choice, caption_option, caption_color_input],
890
- outputs=[status_output, video_output]
891
  )
892
 
893
  if __name__ == "__main__":
894
- # Ensure TEMP_FOLDER exists before starting
895
- if not os.path.exists(TEMP_FOLDER):
896
- os.makedirs(TEMP_FOLDER)
897
- demo.launch()
 
1
+ import os
2
  import gradio as gr
3
+ from kokoro import KPipeline
4
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip, TextClip, concatenate_videoclips
5
+ from PIL import Image
6
+ import tempfile
7
+ import random
8
+ import cv2
9
+ import math
10
+ import requests
11
+ import re
12
+ import time
13
+ import pydub
14
+ import pysrt
15
+ from gtts import gTTS
16
+ import numpy as np
17
+ import soundfile as sf
18
+
19
+ # Initialize Kokoro TTS pipeline
20
+ pipeline = KPipeline(lang_code='a')
21
+
22
+ # API Constants
23
+ PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
24
+ OPENROUTER_API_KEY = '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'
25
+ OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
26
+ TEMP_FOLDER = "temp_video_processing"
27
+ os.makedirs(TEMP_FOLDER, exist_ok=True)
28
+
29
+ # --------------- ORIGINAL FUNCTIONS (UNMODIFIED) --------------- #
30
+
31
+
32
+ !pip install transformers==4.49.0
33
+ !pip install moviepy gTTS requests pydub pillow
34
+ !pip cache purge
35
+ !apt-get install imagemagick -y
36
+ !pip install kokoro>=0.3.4 soundfile
37
+ !apt-get-qq -y install espeak-ng > /dev/null 2>&1
38
+ !pip install pysrt
39
+
40
+
41
+ from kokoro import KPipeline
42
+ from IPython.display import display, Audio
43
  import soundfile as sf
44
  import torch
45
  from IPython.display import display, Audio, HTML
 
54
  import os, requests, io, time, re, random
55
  from moviepy.editor import (
56
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
57
+ CompositeVideoClip, TextClip
58
  )
59
  import moviepy.video.fx.all as vfx
60
  import moviepy.config as mpy_config
61
  from pydub import AudioSegment
62
  from pydub.generators import Sine
63
+ from google.colab import files
64
  from PIL import Image, ImageDraw, ImageFont
65
  import numpy as np
66
  from bs4 import BeautifulSoup
 
68
  from urllib.parse import quote
69
  import pysrt
70
  from gtts import gTTS
 
 
 
71
 
72
+ # Initialize Kokoro TTS pipeline (using American English, adjust lang_code as needed)
73
+ pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
74
+ # Ensure ImageMagick binary is set (to avoid "unset" errors)
75
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
76
+
77
+ # ---------------- Global Configuration ---------------- #
78
+
79
+ TARGET_RESOLUTION_INPUT = input("RESOLUTION:")
80
+
81
+ CLIPS_AMMOUNT = int(input("Clips:"))
82
+
83
+ CAPTION_OPTION = input("Caption Yes/No:")
84
+
85
+
86
+ if CAPTION_OPTION == "Yes":
87
+ CAPTION_COLOR = "white"
88
+ else:
89
+ CAPTION_COLOR = "transparent"
90
+
91
+
92
+ if TARGET_RESOLUTION_INPUT == "Full":
93
+ TARGET_RESOLUTION = (1920, 1080)
94
+ elif TARGET_RESOLUTION_INPUT == "Short":
95
+ TARGET_RESOLUTION = (1080, 1920)
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
107
  OPENROUTER_API_KEY = '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'
108
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 
 
109
  TEMP_FOLDER = "temp_video_processing"
110
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 
111
 
112
+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ # Create temporary folder if it doesn't exist
115
+ if not os.path.exists(TEMP_FOLDER):
116
+ os.makedirs(TEMP_FOLDER)
117
 
118
+ # ---------------- Helper Functions ---------------- #
119
  def generate_script(user_input):
120
  """Generate documentary script with proper OpenRouter handling."""
121
  headers = {
 
219
  timeout=30
220
  )
221
 
222
+ # Debug: Print raw response
223
+ print("API Response:", response.text)
224
+
225
  if response.status_code == 200:
226
  response_data = response.json()
227
  if 'choices' in response_data and len(response_data['choices']) > 0:
 
252
  for line in script_text.splitlines():
253
  line = line.strip()
254
  if line.startswith("[") and "]" in line:
255
+ # Extract content between first [ and first ]
256
  bracket_start = line.find("[")
257
  bracket_end = line.find("]", bracket_start)
258
  if bracket_start != -1 and bracket_end != -1:
259
  if current_title is not None:
260
  sections[current_title] = current_text.strip()
261
  current_title = line[bracket_start+1:bracket_end]
262
+ current_text = line[bracket_end+1:].strip() # Get any text after the bracket
263
  elif current_title:
264
  current_text += line + " "
265
 
266
+ # Don't forget the last section
267
  if current_title:
268
  sections[current_title] = current_text.strip()
269
 
270
  elements = []
271
  for title, narration in sections.items():
272
+ if not title or not narration: # Skip empty sections
273
  continue
274
 
275
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
276
+ # Duration: at least 3 sec, or 0.5 sec per word
277
  words = narration.split()
278
  duration = max(3, len(words) * 0.5)
279
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
 
289
  """Search for a video on Pexels by query and return a random HD video."""
290
  headers = {'Authorization': pexels_api_key}
291
  base_url = "https://api.pexels.com/videos/search"
292
+ num_pages = 3 # Search through first 3 pages
293
  videos_per_page = 15
294
+
295
+ # Add retry mechanism
296
  max_retries = 3
297
  retry_delay = 1
298
+
299
  search_query = query
300
  all_videos = []
301
 
 
308
  if response.status_code == 200:
309
  data = response.json()
310
  videos = data.get("videos", [])
311
+
312
  if not videos:
313
+ print(f"No videos found on page {page}.")
314
+ break # No videos on this page, move to the next
315
+
316
+ # Collect all HD videos
317
  for video in videos:
318
  video_files = video.get("video_files", [])
319
  for file in video_files:
320
+ if file.get("quality") == "hd": # Only collect HD quality
321
  all_videos.append(file.get("link"))
322
+ break # Only add one file per video
323
+
324
+ break # Success, exit retry loop
325
+
326
+ elif response.status_code == 429: # Rate limit
327
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
328
  time.sleep(retry_delay)
329
  retry_delay *= 2
330
  else:
331
+ print(f"Error fetching videos: {response.status_code} {response.text}")
332
  if attempt < max_retries - 1:
333
+ print(f"Retrying in {retry_delay} seconds...")
334
  time.sleep(retry_delay)
335
  retry_delay *= 2
336
  else:
337
  break
338
+
339
  except requests.exceptions.RequestException as e:
340
+ print(f"Request exception: {e}")
341
  if attempt < max_retries - 1:
342
+ print(f"Retrying in {retry_delay} seconds...")
343
  time.sleep(retry_delay)
344
  retry_delay *= 2
345
  else:
346
  break
347
 
348
  if all_videos:
349
+ # Select a random video from the collected ones
350
  random_video = random.choice(all_videos)
351
+ print(f"Selected random video from {len(all_videos)} HD videos")
352
  return random_video
353
  else:
354
+ print("No suitable videos found after searching all pages.")
355
  return None
356
 
357
  def search_pexels_images(query, pexels_api_key):
 
359
  headers = {'Authorization': pexels_api_key}
360
  url = "https://api.pexels.com/v1/search"
361
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
362
+
363
+ # Add retry mechanism
364
  max_retries = 3
365
  retry_delay = 1
366
 
367
  for attempt in range(max_retries):
368
  try:
369
  response = requests.get(url, headers=headers, params=params, timeout=10)
370
+
371
  if response.status_code == 200:
372
  data = response.json()
373
  photos = data.get("photos", [])
374
  if photos:
375
+ # Get a random image from the first 5 results (if available)
376
  photo = random.choice(photos[:min(5, len(photos))])
377
  img_url = photo.get("src", {}).get("original")
378
  return img_url
379
  else:
380
+ print(f"No images found for query: {query}")
381
  return None
382
+
383
+ elif response.status_code == 429: # Rate limit
384
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
385
  time.sleep(retry_delay)
386
  retry_delay *= 2
387
  else:
388
+ print(f"Error fetching images: {response.status_code} {response.text}")
389
  if attempt < max_retries - 1:
390
+ print(f"Retrying in {retry_delay} seconds...")
391
  time.sleep(retry_delay)
392
  retry_delay *= 2
393
+
394
  except requests.exceptions.RequestException as e:
395
+ print(f"Request exception: {e}")
396
  if attempt < max_retries - 1:
397
+ print(f"Retrying in {retry_delay} seconds...")
398
  time.sleep(retry_delay)
399
  retry_delay *= 2
400
 
401
+ print(f"No Pexels images found for query: {query} after all attempts")
402
  return None
403
 
404
  def search_google_images(query):
 
408
  headers = {"User-Agent": USER_AGENT}
409
  response = requests.get(search_url, headers=headers, timeout=10)
410
  soup = BeautifulSoup(response.text, "html.parser")
411
+
412
+ # Look for image elements or JSON data containing image URLs
413
  img_tags = soup.find_all("img")
414
+
415
+ # Filter out small images (icons, etc.)
416
  image_urls = []
417
  for img in img_tags:
418
  src = img.get("src", "")
419
  if src.startswith("http") and "gstatic" not in src:
420
  image_urls.append(src)
421
+
422
  if image_urls:
423
  return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
424
  else:
425
+ print(f"No Google Images found for query: {query}")
426
  return None
427
  except Exception as e:
428
  print(f"Error in Google Images search: {e}")
 
432
  """Download an image from a URL to a local file with enhanced error handling."""
433
  try:
434
  headers = {"User-Agent": USER_AGENT}
435
+ print(f"Downloading image from: {image_url} to {filename}")
436
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
437
  response.raise_for_status()
438
+
439
  with open(filename, 'wb') as f:
440
  for chunk in response.iter_content(chunk_size=8192):
441
  f.write(chunk)
442
+
443
+ print(f"Image downloaded successfully to: {filename}")
444
+
445
+ # Validate the image
446
  try:
447
  img = Image.open(filename)
448
+ img.verify() # Verify it's an actual image
449
+ # If it passes verification, reopen and convert to RGB if needed
450
  img = Image.open(filename)
451
  if img.mode != 'RGB':
452
  img = img.convert('RGB')
453
  img.save(filename)
454
+ print(f"Image validated and processed: {filename}")
455
  return filename
456
  except Exception as e_validate:
457
  print(f"Downloaded file is not a valid image: {e_validate}")
458
  if os.path.exists(filename):
459
  os.remove(filename)
460
  return None
461
+
462
  except requests.exceptions.RequestException as e_download:
463
  print(f"Image download error: {e_download}")
464
  if os.path.exists(filename):
 
478
  with open(filename, 'wb') as f:
479
  for chunk in response.iter_content(chunk_size=8192):
480
  f.write(chunk)
481
+ print(f"Video downloaded successfully to: {filename}")
482
  return filename
483
  except Exception as e:
484
  print(f"Video download error: {e}")
 
492
  For news-related queries, use Google Images.
493
  Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
494
  """
495
+ # Make prompt URL-safe and a valid filename
496
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
497
 
498
+ # For news-related queries, use Google Images
499
  if "news" in prompt.lower():
500
+ print(f"News-related query detected: {prompt}. Using Google Images...")
501
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
502
  image_url = search_google_images(prompt)
503
  if image_url:
504
  downloaded_image = download_image(image_url, image_file)
505
  if downloaded_image:
506
+ print(f"News image saved to {downloaded_image}")
507
  return {"path": downloaded_image, "asset_type": "image"}
508
  else:
509
  print(f"Google Images search failed for prompt: {prompt}")
510
 
511
+ # Try video first (with reduced frequency for better media mix)
512
+ if random.random() < 0.25: # 25% chance of using a video
513
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
514
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
515
  if video_url:
516
  downloaded_video = download_video(video_url, video_file)
517
  if downloaded_video:
518
+ print(f"Video asset saved to {downloaded_video}")
519
  return {"path": downloaded_video, "asset_type": "video"}
520
  else:
521
  print(f"Pexels video search failed for prompt: {prompt}")
522
 
523
+ # Fallback or primary choice for images
524
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
525
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
526
  if image_url:
527
  downloaded_image = download_image(image_url, image_file)
528
  if downloaded_image:
529
+ print(f"Image asset saved to {downloaded_image}")
530
  return {"path": downloaded_image, "asset_type": "image"}
531
  else:
532
  print(f"Pexels image download failed for prompt: {prompt}")
533
 
534
+ # Last resort: try to find a stock image for common terms
535
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
536
  for term in fallback_terms:
537
+ print(f"Trying fallback image search with term: {term}")
538
  fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
539
  fallback_url = search_pexels_images(term, PEXELS_API_KEY)
540
  if fallback_url:
541
  downloaded_fallback = download_image(fallback_url, fallback_file)
542
  if downloaded_fallback:
543
+ print(f"Fallback image saved to {downloaded_fallback}")
544
  return {"path": downloaded_fallback, "asset_type": "image"}
545
  else:
546
  print(f"Fallback image download failed for term: {term}")
 
550
  print(f"Failed to generate visual asset for prompt: {prompt}")
551
  return None
552
 
553
+ # ---------------- TTS Function Using Kokoro ---------------- #
554
+
555
+ def generate_tts(text, voice):
556
+ """
557
+ Generate TTS audio using Kokoro and save to a WAV file.
558
+ Uses the global Kokoro pipeline.
559
+ Falls back to gTTS if Kokoro fails.
560
+ """
561
+ # Create a safe filename
562
+ safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
563
+ file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
564
+
565
+ # If file already exists, reuse it
566
+ if os.path.exists(file_path):
567
+ print(f"Using cached TTS for text '{text[:10]}...'")
568
+ return file_path
569
+
570
+ try:
571
+ # Map voice 'en' to kokoro's American English voice.
572
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
573
+ generator = pipeline(text, voice=kokoro_voice, speed=0.8, split_pattern=r'\n+')
574
+
575
+ audio_segments = []
576
+ for i, (gs, ps, audio) in enumerate(generator):
577
+ audio_segments.append(audio)
578
+
579
+ if len(audio_segments) > 1:
580
+ full_audio = np.concatenate(audio_segments)
581
+ else:
582
+ full_audio = audio_segments[0]
583
+
584
+ sf.write(file_path, full_audio, 24000) # Save as WAV at 24000 Hz
585
+ print(f"TTS audio saved to {file_path} (Kokoro)")
586
+ return file_path
587
+
588
+ except Exception as e:
589
+ print(f"Error generating TTS with Kokoro: {e}")
590
+ # Fallback to gTTS if Kokoro fails
591
+ try:
592
+ print("Falling back to gTTS...")
593
+ tts = gTTS(text=text, lang='en')
594
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
595
+ tts.save(mp3_path)
596
+
597
+ # Convert mp3 to wav using pydub
598
+ audio = AudioSegment.from_mp3(mp3_path)
599
+ audio.export(file_path, format="wav")
600
+
601
+ if os.path.exists(mp3_path):
602
+ os.remove(mp3_path) # Clean up the temporary mp3
603
+
604
+ print(f"Fallback TTS saved to {file_path} (gTTS)")
605
+ return file_path
606
+
607
+ except Exception as fallback_error:
608
+ print(f"Fallback TTS with gTTS also failed: {fallback_error}")
609
+ return None
610
+
611
+
612
+
613
+
614
  def generate_silent_audio(duration, sample_rate=24000):
615
  """
616
  Generate a silent WAV audio file lasting 'duration' seconds.
617
  """
618
+ import numpy as np
619
+ import soundfile as sf
620
  num_samples = int(duration * sample_rate)
621
  silence = np.zeros(num_samples, dtype=np.float32)
622
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
 
636
  print(f"Using cached TTS for text '{text[:10]}...'")
637
  return file_path
638
 
639
+ try:
640
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
641
+ generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
642
+ audio_segments = []
643
+ for i, (gs, ps, audio) in enumerate(generator):
644
+ audio_segments.append(audio)
645
+ full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
646
+ sf.write(file_path, full_audio, 24000)
647
+ print(f"TTS audio saved to {file_path} (Kokoro)")
648
+ return file_path
649
+ except Exception as e:
650
+ print(f"Error with Kokoro TTS: {e}")
651
  try:
652
+ print("Falling back to gTTS...")
653
+ from gtts import gTTS
654
+ tts = gTTS(text=text, lang='en')
655
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
656
+ tts.save(mp3_path)
657
+ audio = AudioSegment.from_mp3(mp3_path)
658
+ audio.export(file_path, format="wav")
659
+ os.remove(mp3_path)
660
+ print(f"Fallback TTS saved to {file_path} (gTTS)")
661
  return file_path
662
+ except Exception as fallback_error:
663
+ print(f"Both TTS methods failed: {fallback_error}")
664
+ # Generate silent audio as fallback
665
+ return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
 
668
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
669
  """
670
  Apply a smooth Ken Burns effect with a single, clean movement pattern.
671
  """
672
+ # Unpack target resolution
673
  target_w, target_h = target_resolution
674
+
675
+ # First ensure the image fills the target resolution by resizing it properly
676
+ # Calculate the aspect ratio of the original clip
677
  clip_aspect = clip.w / clip.h
678
  target_aspect = target_w / target_h
679
 
680
+ # Resize to fill the entire frame
681
+ if clip_aspect > target_aspect: # Image is wider than the target frame
682
  new_height = target_h
683
  new_width = int(new_height * clip_aspect)
684
+ else: # Image is taller than the target frame
685
  new_width = target_w
686
  new_height = int(new_width / clip_aspect)
687
 
688
+ # Resize the clip to ensure it fills the target resolution
689
  clip = clip.resize(newsize=(new_width, new_height))
690
 
691
+ # Now apply the base_scale for Ken Burns effect
692
  base_scale = 1.15
693
  new_width = int(new_width * base_scale)
694
  new_height = int(new_height * base_scale)
695
  clip = clip.resize(newsize=(new_width, new_height))
696
 
697
+ # Rest of your function stays the same...
698
+ # Calculate maximum offsets for panning
699
  max_offset_x = new_width - target_w
700
  max_offset_y = new_height - target_h
701
 
702
+ # Define available effects
703
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
704
+
705
+ # Randomly select an effect if not specified
706
  if effect_type is None or effect_type == "random":
707
  effect_type = random.choice(available_effects)
708
 
709
+ # Set effect parameters
710
  if effect_type == "zoom-in":
711
  start_zoom = 0.9
712
  end_zoom = 1.1
 
735
  else:
736
  raise ValueError(f"Unsupported effect_type: {effect_type}")
737
 
738
+ # Define the transformation function for each frame
739
  def transform_frame(get_frame, t):
740
  frame = get_frame(t)
741
+ # Smooth interpolation using cosine easing
742
  ratio = t / clip.duration if clip.duration > 0 else 0
743
+ ratio = 0.5 - 0.5 * math.cos(math.pi * ratio) # Ease in/out
744
 
745
+ # Calculate current zoom and crop size
746
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
747
  crop_w = int(target_w / current_zoom)
748
  crop_h = int(target_h / current_zoom)
749
 
750
+ # Calculate current center with floating-point precision
751
  current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
752
  current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
753
 
754
+ # Clamp center to keep the crop within image bounds
755
  min_center_x = crop_w / 2
756
  max_center_x = new_width - crop_w / 2
757
  min_center_y = crop_h / 2
 
759
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
760
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
761
 
762
+ # Crop with subpixel accuracy and resize
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
764
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
765
 
766
  return resized_frame
767
 
768
+ # Apply the transformation to the clip
769
  return clip.fl(transform_frame)
770
 
771
+
772
+
773
+
774
+
775
+ # Define target resolution (e.g., 1920x1080 for Full HD)
776
+
777
  def resize_to_fill(clip, target_resolution):
778
  """
779
  Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
780
+
781
+ Args:
782
+ clip: MoviePy VideoClip or ImageClip object.
783
+ target_resolution: Tuple of (width, height) to resize to.
784
+
785
+ Returns:
786
+ Resized and cropped clip.
787
  """
788
  target_w, target_h = target_resolution
789
  clip_aspect = clip.w / clip.h
790
  target_aspect = target_w / target_h
791
 
792
  if clip_aspect > target_aspect:
793
+ # Clip is wider than target; resize to target height and crop width
794
  clip = clip.resize(height=target_h)
795
  crop_amount = (clip.w - target_w) / 2
796
  clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
797
  else:
798
+ # Clip is taller than target; resize to target width and crop height
799
  clip = clip.resize(width=target_w)
800
  crop_amount = (clip.h - target_h) / 2
801
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
802
 
803
  return clip
804
 
805
+ # Function to find any MP3 file in the directory tree
806
  def find_mp3_files():
807
  """
808
+ Search for any MP3 files in the current directory and subdirectories.
809
  Returns the path to the first MP3 file found or None if none is found.
810
  """
811
+ mp3_files = []
812
+
813
+ # Walk through all directories starting from current directory
814
+ for root, dirs, files in os.walk('.'):
815
+ for file in files:
816
+ if file.endswith('.mp3'):
817
+ mp3_path = os.path.join(root, file)
818
+ mp3_files.append(mp3_path)
819
+ print(f"Found MP3 file: {mp3_path}")
820
+
821
+ if mp3_files:
822
+ # Return the first MP3 file found
823
+ return mp3_files[0]
824
  return None
825
 
826
+ # Update the add_background_music function to use the first found MP3
827
  def add_background_music(final_video, bg_music_volume=0.08):
828
  """Add background music to the final video using any MP3 file found in directories."""
829
  try:
830
+ # Find MP3 files
831
  bg_music_path = find_mp3_files()
832
+
833
  if bg_music_path and os.path.exists(bg_music_path):
834
  print(f"Adding background music from: {bg_music_path}")
835
+ # Load the background music
836
  bg_music = AudioFileClip(bg_music_path)
837
+
838
+ # Loop the music if shorter than the video
839
  if bg_music.duration < final_video.duration:
840
  loops_needed = math.ceil(final_video.duration / bg_music.duration)
841
  bg_segments = [bg_music] * loops_needed
842
  bg_music = concatenate_audioclips(bg_segments)
843
+
844
+ # Trim if longer than the video
845
  bg_music = bg_music.subclip(0, final_video.duration)
846
+
847
+ # Set volume to 8%
848
  bg_music = bg_music.volumex(bg_music_volume)
849
 
850
+ # Mix the background music with the existing audio
851
  video_audio = final_video.audio
852
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
 
 
 
853
 
854
+ # Set the mixed audio to the final video
855
  final_video = final_video.set_audio(mixed_audio)
856
  print("Background music added successfully")
857
  else:
858
+ print("No MP3 files found, skipping background music")
859
 
860
  return final_video
861
 
 
864
  print("Continuing without background music")
865
  return final_video
866
 
867
+ # Update the subtitle positioning in the create_clip function
868
+ # Find the section in create_clip that handles subtitles, and modify the positioning:
869
+
870
+ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
871
  """Create a video clip with synchronized subtitles and properly timed narration."""
872
  try:
873
  print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
 
876
  print("Missing media or TTS file")
877
  return None
878
 
879
+ # Load and process audio
880
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
881
  audio_duration = audio_clip.duration
882
  target_duration = audio_duration + 0.2
883
 
884
+ # Process visual asset
885
  if asset_type == "video":
886
  clip = VideoFileClip(media_path)
887
+ clip = resize_to_fill(clip, TARGET_RESOLUTION)
888
  if clip.duration < target_duration:
889
  clip = clip.loop(duration=target_duration)
890
  else:
 
898
  img.close()
899
 
900
  clip = ImageClip(media_path).set_duration(target_duration)
901
+ clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
902
  clip = clip.fadein(0.3).fadeout(0.3)
903
  else:
904
  return None
905
 
906
+ # Add subtitles with shorter chunks (4-5 words per line) and position at 2/3 of screen height
907
+ if narration_text and CAPTION_COLOR != "transparent":
908
  try:
909
+ # Create SRT-style subtitles
910
  words = narration_text.split()
911
  chunks = []
912
  current_chunk = []
913
+
914
+ # Create chunks of 4-5 words for better readability
915
  for word in words:
916
  current_chunk.append(word)
917
+ if len(current_chunk) >= 5: # Maximum 5 words per chunk
918
  chunks.append(' '.join(current_chunk))
919
  current_chunk = []
920
+
921
+ # Add the last chunk if it exists
922
  if current_chunk:
923
  chunks.append(' '.join(current_chunk))
924
 
925
+ # Calculate timing for each chunk based on audio duration
926
+ chunk_duration = audio_duration / len(chunks)
927
  subtitle_clips = []
928
 
929
+ # Position subtitles at 1/3 of the screen height instead of bottom
930
+ subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
931
 
932
  for i, chunk_text in enumerate(chunks):
933
  start_time = i * chunk_duration
934
  end_time = (i + 1) * chunk_duration
935
 
936
+ # Create text clip for this chunk
937
  txt_clip = TextClip(
938
  chunk_text,
939
  fontsize=45,
940
  font='Arial-Bold',
941
+ color=CAPTION_COLOR,
942
  bg_color='rgba(0, 0, 0, 0.25)',
943
  method='caption',
944
  align='center',
945
+ stroke_width=2, # Light shadow
946
+ stroke_color=CAPTION_COLOR, # Must be set if stroke is used
947
+ size=(TARGET_RESOLUTION[0] * 0.8, None) # Width for better readability
948
  ).set_start(start_time).set_end(end_time)
949
 
950
+
951
  txt_clip = txt_clip.set_position(('center', subtitle_y_position))
952
  subtitle_clips.append(txt_clip)
953
 
954
+ # Combine all subtitle chunks with the main clip
955
  clip = CompositeVideoClip([clip] + subtitle_clips)
956
 
957
  except Exception as sub_error:
958
  print(f"Subtitle error: {sub_error}")
959
+ # Fallback to a simpler method if the chunk approach fails
960
  txt_clip = TextClip(
961
  narration_text,
962
  fontsize=28,
963
+ color=CAPTION_COLOR,
964
  align='center',
965
+ size=(TARGET_RESOLUTION[0] * 0.7, None)
966
+ ).set_position(('center', int(TARGET_RESOLUTION[2] / 3))).set_duration(clip.duration)
967
  clip = CompositeVideoClip([clip, txt_clip])
968
 
969
  clip = clip.set_audio(audio_clip)
 
974
  print(f"Error in create_clip: {str(e)}")
975
  return None
976
 
 
 
 
 
 
 
977
 
 
 
 
 
 
 
 
978
 
979
+ def fix_imagemagick_policy():
980
+ """Comprehensive fix for ImageMagick security policies"""
981
+ try:
982
+ print("Attempting to fix ImageMagick security policies...")
983
+
984
+ # Find all possible policy.xml locations
985
+ policy_paths = [
986
+ "/etc/ImageMagick-6/policy.xml",
987
+ "/etc/ImageMagick-7/policy.xml",
988
+ "/etc/ImageMagick/policy.xml",
989
+ "/usr/local/etc/ImageMagick-7/policy.xml"
990
+ ]
991
+
992
+ found_policy = None
993
+ for path in policy_paths:
994
+ if os.path.exists(path):
995
+ found_policy = path
996
+ break
997
+
998
+ if not found_policy:
999
+ print("No policy.xml found. Using alternative subtitle method.")
1000
+ return False
1001
+
1002
+ print(f"Modifying policy file at {found_policy}")
1003
+
1004
+ # Create backup
1005
+ backup_path = f"{found_policy}.bak"
1006
+ os.system(f"sudo cp {found_policy} {backup_path}")
1007
+
1008
+ # Apply security policy modifications
1009
+ os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
1010
+ os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
1011
+ os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
1012
+
1013
+ print("ImageMagick policies updated successfully.")
1014
+ return True
1015
+
1016
+ except Exception as e:
1017
+ print(f"Error fixing policies: {e}")
1018
+ return False
1019
+
1020
 
 
 
 
 
1021
 
1022
+
1023
+
1024
+
1025
+ # ---------------- Main Function ---------------- #
1026
+
1027
+ import os
1028
+ import shutil
1029
+ import webbrowser
1030
+
1031
+ def main_fixed():
1032
+ # Fix ImageMagick policy first
1033
+ fix_success = fix_imagemagick_policy()
1034
+ if not fix_success:
1035
+ print("Will use alternative methods if needed")
1036
+
1037
+ # Create temp folder if not exists
1038
+ if not os.path.exists(TEMP_FOLDER):
1039
+ os.makedirs(TEMP_FOLDER)
1040
+
1041
+ user_input = input("Enter your video concept: ")
1042
+
1043
+ print("Generating script from Gemini API...")
1044
+ script = generate_script(user_input)
1045
+ if not script:
1046
+ print("Failed to generate script.")
1047
+ return
1048
+ print("Generated Script:\n", script)
1049
+ elements = parse_script(script)
1050
+ if not elements:
1051
+ print("Failed to parse script into elements.")
1052
+ return
1053
+ print(f"Parsed {len(elements)//2} script segments.")
1054
+
1055
+ # Pair media elements with their corresponding TTS elements
1056
+ paired_elements = []
1057
+ for i in range(0, len(elements), 2):
1058
+ if i+1 < len(elements):
1059
+ paired_elements.append((elements[i], elements[i+1]))
1060
+
1061
+ if not paired_elements:
1062
+ print("No valid script segments found. Exiting.")
1063
+ return
1064
+
1065
+ # Process each paired segment to create video clips
1066
+ clips = []
1067
+ for idx, (media_elem, tts_elem) in enumerate(paired_elements):
1068
+ print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
1069
+
1070
+ # Generate the visual asset (video or image) based on the prompt
1071
+ media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
1072
+ if not media_asset:
1073
+ print(f"Skipping segment {idx+1} due to missing media asset.")
1074
+ continue
1075
+
1076
+ # Generate the TTS audio for the narration
1077
+ tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
1078
+ if not tts_path:
1079
+ print(f"Skipping segment {idx+1} due to TTS generation failure.")
1080
+ continue
1081
+
1082
+ # Create the video clip using the media asset and the TTS audio
1083
+ clip = create_clip(
1084
+ media_path=media_asset['path'],
1085
+ asset_type=media_asset['asset_type'],
1086
+ tts_path=tts_path,
1087
+ duration=tts_elem['duration'],
1088
+ effects=media_elem.get('effects', 'fade-in'),
1089
+ narration_text=tts_elem['text'],
1090
+ segment_index=idx
1091
+ )
1092
+ if clip:
1093
+ clips.append(clip)
1094
+ else:
1095
+ print(f"Clip creation failed for segment {idx+1}.")
1096
+
1097
+ if not clips:
1098
+ print("No clips were successfully created. Exiting.")
1099
+ return
1100
+
1101
+ # Concatenate all the clips into one final video
1102
+ print("\nConcatenating clips...")
1103
+ final_video = concatenate_videoclips(clips, method="compose")
1104
+
1105
+ # Add background music before exporting
1106
+ final_video = add_background_music(final_video, bg_music_volume=0.08)
1107
+
1108
+ # Write the final video to a file with the veryfast preset
1109
+ print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME} with veryfast rendering preset...")
1110
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
1111
+ print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
1112
+
1113
+ # Auto-open the video file (acts as an auto-download/view feature)
1114
  try:
1115
+ webbrowser.open(OUTPUT_VIDEO_FILENAME)
1116
+ print("Video is being opened for download/viewing.")
1117
+ except Exception as e:
1118
+ print("Failed to auto-download/open the video:", e)
1119
+
1120
+ # Clean up temporary files so they don't interfere with the next run
1121
+ print("Cleaning up temporary files...")
1122
+ shutil.rmtree(TEMP_FOLDER)
1123
+ print("Temporary files removed.")
1124
+
1125
+ if __name__ == "__main__":
1126
+ main_fixed()
1127
+ # --------------- GRADIO INTERFACE --------------- #
1128
+
1129
+ def run_pipeline(user_input, resolution, clip_amount, caption_enabled):
1130
+ global TARGET_RESOLUTION, CAPTION_COLOR
1131
+
1132
+ # Set parameters based on user input
1133
+ TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
1134
+ CAPTION_COLOR = "white" if caption_enabled else "transparent"
1135
+ CLIPS_AMMOUNT = int(clip_amount)
1136
+
1137
+ try:
1138
+ # Generate script
1139
  script = generate_script(user_input)
1140
  if not script:
1141
+ raise gr.Error("Failed to generate script")
1142
+
1143
+ # Parse elements
 
1144
  elements = parse_script(script)
1145
  if not elements:
1146
+ raise gr.Error("Failed to parse script")
1147
+
1148
+ # Process elements
1149
  paired_elements = []
1150
  for i in range(0, len(elements), 2):
1151
+ if i+1 < len(elements):
1152
  paired_elements.append((elements[i], elements[i+1]))
1153
+
1154
+ # Limit clips
1155
+ paired_elements = paired_elements[:CLIPS_AMMOUNT]
1156
+
1157
+ # Create clips
1158
  clips = []
 
1159
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
1160
+ media_asset = generate_media(media_elem['prompt'])
 
 
 
1161
  if not media_asset:
 
1162
  continue
1163
+
1164
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
1165
  if not tts_path:
 
1166
  continue
1167
+
1168
  clip = create_clip(
1169
  media_path=media_asset['path'],
1170
  asset_type=media_asset['asset_type'],
1171
  tts_path=tts_path,
 
 
1172
  narration_text=tts_elem['text'],
1173
+ segment_index=idx
 
 
1174
  )
1175
  if clip:
1176
  clips.append(clip)
1177
+
1178
+ # Create final video
 
1179
  if not clips:
1180
+ raise gr.Error("Failed to create any video clips")
1181
+
 
1182
  final_video = concatenate_videoclips(clips, method="compose")
1183
+ final_video = add_background_music(final_video)
1184
+
1185
+ # Save output
1186
+ output_path = "final_video.mp4"
1187
+ final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast', threads=4)
1188
+
1189
+ # Cleanup
1190
+ for clip in clips:
1191
+ clip.close()
1192
+ final_video.close()
1193
+
1194
+ return output_path
1195
+
1196
  except Exception as e:
1197
+ raise gr.Error(f"Error: {str(e)}")
 
 
1198
  finally:
1199
+ # Clean temporary files
1200
+ for f in os.listdir(TEMP_FOLDER):
1201
+ os.remove(os.path.join(TEMP_FOLDER, f))
1202
+
1203
+ # --------------- GRADIO UI --------------- #
1204
+
1205
+ with gr.Blocks(title="AI Documentary Maker", theme=gr.themes.Default()) as demo:
1206
+ gr.Markdown("""
1207
+ # 🎥 AI Documentary Maker
1208
+ Create viral documentary-style videos with AI!
1209
+ """)
1210
+
 
 
1211
  with gr.Row():
1212
+ with gr.Column(scale=1):
1213
+ user_input = gr.Textbox(label="Documentary Topic",
1214
+ placeholder="Enter your topic or script...")
1215
+ resolution = gr.Dropdown(["Full (1920x1080)", "Short (1080x1920)"],
1216
+ label="Video Format", value="Short (1080x1920)")
1217
+ clip_amount = gr.Slider(1, 10, value=5, step=1,
1218
+ label="Number of Clips")
1219
+ caption_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
1220
+ generate_btn = gr.Button("Generate Video", variant="primary")
1221
+
1222
+ with gr.Column(scale=2):
1223
+ output_video = gr.Video(label="Generated Video", format="mp4")
1224
+
1225
+ generate_btn.click(
1226
+ fn=run_pipeline,
1227
+ inputs=[user_input, resolution, clip_amount, caption_enabled],
1228
+ outputs=output_video
 
1229
  )
1230
 
1231
  if __name__ == "__main__":
1232
+ demo.launch(server_name="0.0.0.0", server_port=7860)