Spaces:

testdeep123
/

video

Build error

App Files Files Community

testdeep123 commited on Apr 11

Commit

45faa4c

verified ·

1 Parent(s): 97ed4cf

Update app.py

Browse files

Files changed (1) hide show

app.py +564 -229

app.py CHANGED Viewed

@@ -1,5 +1,45 @@
-# app.py
 import gradio as gr
 import soundfile as sf
 import torch
 from IPython.display import display, Audio, HTML
@@ -14,12 +54,13 @@ import math
 import os, requests, io, time, re, random
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
-    CompositeVideoClip, TextClip, CompositeAudioClip
 )
 import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
 from pydub.generators import Sine
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from bs4 import BeautifulSoup
@@ -27,42 +68,54 @@ import base64
 from urllib.parse import quote
 import pysrt
 from gtts import gTTS
-import shutil
-import webbrowser # This won't work in HF Spaces, but keep for local testing reference
-import sys
-# --- API Keys (Embed directly as requested for private space) ---
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
-# --- Global Configuration ---
 TEMP_FOLDER = "temp_video_processing"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
-USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-# --- Initialize Kokoro TTS pipeline (if compatible with HF CPU) ---
-# NOTE: Kokoro might be too resource-intensive for free CPU spaces.
-# If it causes issues, you might need to remove it and rely solely on gTTS.
-try:
-    from kokoro import KPipeline
-    pipeline = KPipeline(lang_code='a')  # Use voice 'af_heart' for American English
-    print("Kokoro TTS pipeline initialized.")
-except ImportError:
-    print("Kokoro library not found or failed to initialize. Will rely on gTTS.")
-    pipeline = None
-except Exception as e:
-    print(f"Error initializing Kokoro: {e}. Will rely on gTTS.")
-    pipeline = None
-# Ensure ImageMagick binary is set (might need adjustment for HF Spaces)
-# This path might differ in the HF environment.
-# If this causes issues, you might need to use a Dockerfile or configure the space differently.
-mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
-# --- Helper Functions (from original script) ---
 def generate_script(user_input):
     """Generate documentary script with proper OpenRouter handling."""
     headers = {
@@ -166,7 +219,9 @@ Now here is the Topic/scrip: {user_input}
             timeout=30
         )
-        print("API Response Status:", response.status_code)
         if response.status_code == 200:
             response_data = response.json()
             if 'choices' in response_data and len(response_data['choices']) > 0:
@@ -197,25 +252,28 @@ def parse_script(script_text):
         for line in script_text.splitlines():
             line = line.strip()
             if line.startswith("[") and "]" in line:
                 bracket_start = line.find("[")
                 bracket_end = line.find("]", bracket_start)
                 if bracket_start != -1 and bracket_end != -1:
                     if current_title is not None:
                         sections[current_title] = current_text.strip()
                     current_title = line[bracket_start+1:bracket_end]
-                    current_text = line[bracket_end+1:].strip()
             elif current_title:
                 current_text += line + " "
         if current_title:
             sections[current_title] = current_text.strip()
         elements = []
         for title, narration in sections.items():
-            if not title or not narration:
                 continue
             media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
             words = narration.split()
             duration = max(3, len(words) * 0.5)
             tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
@@ -231,10 +289,13 @@ def search_pexels_videos(query, pexels_api_key):
     """Search for a video on Pexels by query and return a random HD video."""
     headers = {'Authorization': pexels_api_key}
     base_url = "https://api.pexels.com/videos/search"
-    num_pages = 3
     videos_per_page = 15
     max_retries = 3
     retry_delay = 1
     search_query = query
     all_videos = []
@@ -247,35 +308,50 @@ def search_pexels_videos(query, pexels_api_key):
                 if response.status_code == 200:
                     data = response.json()
                     videos = data.get("videos", [])
                     if not videos:
-                        break
                     for video in videos:
                         video_files = video.get("video_files", [])
                         for file in video_files:
-                            if file.get("quality") == "hd":
                                 all_videos.append(file.get("link"))
-                                break
-                    break
-                elif response.status_code == 429:
                     time.sleep(retry_delay)
                     retry_delay *= 2
                 else:
                     if attempt < max_retries - 1:
                         time.sleep(retry_delay)
                         retry_delay *= 2
                     else:
                         break
             except requests.exceptions.RequestException as e:
                 if attempt < max_retries - 1:
                     time.sleep(retry_delay)
                     retry_delay *= 2
                 else:
                     break
     if all_videos:
         random_video = random.choice(all_videos)
         return random_video
     else:
         return None
 def search_pexels_images(query, pexels_api_key):
@@ -283,33 +359,46 @@ def search_pexels_images(query, pexels_api_key):
     headers = {'Authorization': pexels_api_key}
     url = "https://api.pexels.com/v1/search"
     params = {"query": query, "per_page": 5, "orientation": "landscape"}
     max_retries = 3
     retry_delay = 1
     for attempt in range(max_retries):
         try:
             response = requests.get(url, headers=headers, params=params, timeout=10)
             if response.status_code == 200:
                 data = response.json()
                 photos = data.get("photos", [])
                 if photos:
                     photo = random.choice(photos[:min(5, len(photos))])
                     img_url = photo.get("src", {}).get("original")
                     return img_url
                 else:
                     return None
-            elif response.status_code == 429:
                 time.sleep(retry_delay)
                 retry_delay *= 2
             else:
                 if attempt < max_retries - 1:
                     time.sleep(retry_delay)
                     retry_delay *= 2
         except requests.exceptions.RequestException as e:
             if attempt < max_retries - 1:
                 time.sleep(retry_delay)
                 retry_delay *= 2
     return None
 def search_google_images(query):
@@ -319,15 +408,21 @@ def search_google_images(query):
         headers = {"User-Agent": USER_AGENT}
         response = requests.get(search_url, headers=headers, timeout=10)
         soup = BeautifulSoup(response.text, "html.parser")
         img_tags = soup.find_all("img")
         image_urls = []
         for img in img_tags:
             src = img.get("src", "")
             if src.startswith("http") and "gstatic" not in src:
                 image_urls.append(src)
         if image_urls:
             return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
         else:
             return None
     except Exception as e:
         print(f"Error in Google Images search: {e}")
@@ -337,24 +432,33 @@ def download_image(image_url, filename):
     """Download an image from a URL to a local file with enhanced error handling."""
     try:
         headers = {"User-Agent": USER_AGENT}
         response = requests.get(image_url, headers=headers, stream=True, timeout=15)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
         try:
             img = Image.open(filename)
-            img.verify()
             img = Image.open(filename)
             if img.mode != 'RGB':
                 img = img.convert('RGB')
                 img.save(filename)
             return filename
         except Exception as e_validate:
             print(f"Downloaded file is not a valid image: {e_validate}")
             if os.path.exists(filename):
                 os.remove(filename)
             return None
     except requests.exceptions.RequestException as e_download:
         print(f"Image download error: {e_download}")
         if os.path.exists(filename):
@@ -374,6 +478,7 @@ def download_video(video_url, filename):
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
         return filename
     except Exception as e:
         print(f"Video download error: {e}")
@@ -387,44 +492,55 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
     For news-related queries, use Google Images.
     Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
     """
     safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
     if "news" in prompt.lower():
         image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
         image_url = search_google_images(prompt)
         if image_url:
             downloaded_image = download_image(image_url, image_file)
             if downloaded_image:
                 return {"path": downloaded_image, "asset_type": "image"}
         else:
             print(f"Google Images search failed for prompt: {prompt}")
-    if random.random() < 0.25:
         video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
         video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
         if video_url:
             downloaded_video = download_video(video_url, video_file)
             if downloaded_video:
                 return {"path": downloaded_video, "asset_type": "video"}
         else:
             print(f"Pexels video search failed for prompt: {prompt}")
     image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
     image_url = search_pexels_images(prompt, PEXELS_API_KEY)
     if image_url:
         downloaded_image = download_image(image_url, image_file)
         if downloaded_image:
             return {"path": downloaded_image, "asset_type": "image"}
         else:
             print(f"Pexels image download failed for prompt: {prompt}")
     fallback_terms = ["nature", "people", "landscape", "technology", "business"]
     for term in fallback_terms:
         fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
         fallback_url = search_pexels_images(term, PEXELS_API_KEY)
         if fallback_url:
             downloaded_fallback = download_image(fallback_url, fallback_file)
             if downloaded_fallback:
                 return {"path": downloaded_fallback, "asset_type": "image"}
             else:
                 print(f"Fallback image download failed for term: {term}")
@@ -434,10 +550,73 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
     print(f"Failed to generate visual asset for prompt: {prompt}")
     return None
 def generate_silent_audio(duration, sample_rate=24000):
     """
     Generate a silent WAV audio file lasting 'duration' seconds.
     """
     num_samples = int(duration * sample_rate)
     silence = np.zeros(num_samples, dtype=np.float32)
     silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
@@ -457,67 +636,77 @@ def generate_tts(text, voice):
         print(f"Using cached TTS for text '{text[:10]}...'")
         return file_path
-    # Try Kokoro first
-    if pipeline:
         try:
-            kokoro_voice = 'af_heart' if voice == 'en' else voice
-            generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
-            audio_segments = []
-            for i, (gs, ps, audio) in enumerate(generator):
-                audio_segments.append(audio)
-            full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
-            sf.write(file_path, full_audio, 24000)
-            print(f"TTS audio saved to {file_path} (Kokoro)")
             return file_path
-        except Exception as e:
-            print(f"Error with Kokoro TTS: {e}")
-    # Fallback to gTTS
-    try:
-        print("Falling back to gTTS...")
-        tts = gTTS(text=text, lang='en')
-        mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
-        tts.save(mp3_path)
-        audio = AudioSegment.from_mp3(mp3_path)
-        audio.export(file_path, format="wav")
-        if os.path.exists(mp3_path):
-            os.remove(mp3_path)
-        print(f"Fallback TTS saved to {file_path} (gTTS)")
-        return file_path
-    except Exception as fallback_error:
-        print(f"Both TTS methods failed: {fallback_error}")
-        # Generate silent audio as fallback
-        return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
 def apply_kenburns_effect(clip, target_resolution, effect_type=None):
     """
     Apply a smooth Ken Burns effect with a single, clean movement pattern.
     """
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
-    if clip_aspect > target_aspect:
         new_height = target_h
         new_width = int(new_height * clip_aspect)
-    else:
         new_width = target_w
         new_height = int(new_width / clip_aspect)
     clip = clip.resize(newsize=(new_width, new_height))
     base_scale = 1.15
     new_width = int(new_width * base_scale)
     new_height = int(new_height * base_scale)
     clip = clip.resize(newsize=(new_width, new_height))
     max_offset_x = new_width - target_w
     max_offset_y = new_height - target_h
     available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
     if effect_type is None or effect_type == "random":
         effect_type = random.choice(available_effects)
     if effect_type == "zoom-in":
         start_zoom = 0.9
         end_zoom = 1.1
@@ -546,18 +735,23 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
     else:
         raise ValueError(f"Unsupported effect_type: {effect_type}")
     def transform_frame(get_frame, t):
         frame = get_frame(t)
         ratio = t / clip.duration if clip.duration > 0 else 0
-        ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
         current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
         crop_w = int(target_w / current_zoom)
         crop_h = int(target_h / current_zoom)
         current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
         current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
         min_center_x = crop_w / 2
         max_center_x = new_width - crop_w / 2
         min_center_y = crop_h / 2
@@ -565,96 +759,103 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
         current_center_x = max(min_center_x, min(current_center_x, max_center_x))
         current_center_y = max(min_center_y, min(current_center_y, max_center_y))
-        # Ensure frame is numpy array and correct type for cv2
-        if isinstance(frame, Image.Image):
-            frame = np.array(frame)
-        if frame.dtype != np.uint8:
-             frame = frame.astype(np.uint8)
-        # Ensure frame has 3 channels for color images
-        if len(frame.shape) == 2: # Grayscale
-            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
-        elif frame.shape[2] == 4: # RGBA
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
-        # Ensure crop dimensions are valid
-        crop_w = max(1, crop_w)
-        crop_h = max(1, crop_h)
-        # Ensure center point is valid for cv2.getRectSubPix
-        current_center_x = max(0, min(current_center_x, frame.shape[1] - 1))
-        current_center_y = max(0, min(current_center_y, frame.shape[0] - 1))
         cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
         resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
         return resized_frame
     return clip.fl(transform_frame)
 def resize_to_fill(clip, target_resolution):
     """
     Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
     """
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
     if clip_aspect > target_aspect:
         clip = clip.resize(height=target_h)
         crop_amount = (clip.w - target_w) / 2
         clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
     else:
         clip = clip.resize(width=target_w)
         crop_amount = (clip.h - target_h) / 2
         clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
     return clip
 def find_mp3_files():
     """
-    Search for a background music file (e.g., background_music.mp3) in the current directory.
     Returns the path to the first MP3 file found or None if none is found.
     """
-    # Look for a specific file name first
-    bg_music_path = "background_music.mp3"
-    if os.path.exists(bg_music_path):
-        print(f"Found background music file: {bg_music_path}")
-        return bg_music_path
-    # If not found, search for any mp3 in the current directory
-    for file in os.listdir('.'):
-        if file.endswith('.mp3'):
-            print(f"Found background music file: {file}")
-            return file
-    print("No background music file found in the current directory.")
     return None
 def add_background_music(final_video, bg_music_volume=0.08):
     """Add background music to the final video using any MP3 file found in directories."""
     try:
         bg_music_path = find_mp3_files()
         if bg_music_path and os.path.exists(bg_music_path):
             print(f"Adding background music from: {bg_music_path}")
             bg_music = AudioFileClip(bg_music_path)
             if bg_music.duration < final_video.duration:
                 loops_needed = math.ceil(final_video.duration / bg_music.duration)
                 bg_segments = [bg_music] * loops_needed
                 bg_music = concatenate_audioclips(bg_segments)
             bg_music = bg_music.subclip(0, final_video.duration)
             bg_music = bg_music.volumex(bg_music_volume)
             video_audio = final_video.audio
-            if video_audio:
-                mixed_audio = CompositeAudioClip([video_audio, bg_music])
-            else:
-                mixed_audio = bg_music
             final_video = final_video.set_audio(mixed_audio)
             print("Background music added successfully")
         else:
-            print("No suitable background music file found, skipping background music")
         return final_video
@@ -663,7 +864,10 @@ def add_background_music(final_video, bg_music_volume=0.08):
         print("Continuing without background music")
         return final_video
-def create_clip(media_path, asset_type, tts_path, duration, effects, narration_text, segment_index, target_resolution, caption_color):
     """Create a video clip with synchronized subtitles and properly timed narration."""
     try:
         print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
@@ -672,13 +876,15 @@ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_t
             print("Missing media or TTS file")
             return None
         audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
         audio_duration = audio_clip.duration
         target_duration = audio_duration + 0.2
         if asset_type == "video":
             clip = VideoFileClip(media_path)
-            clip = resize_to_fill(clip, target_resolution)
             if clip.duration < target_duration:
                 clip = clip.loop(duration=target_duration)
             else:
@@ -692,62 +898,72 @@ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_t
                 img.close()
             clip = ImageClip(media_path).set_duration(target_duration)
-            clip = apply_kenburns_effect(clip, target_resolution)
             clip = clip.fadein(0.3).fadeout(0.3)
         else:
             return None
-        # Add subtitles
-        if narration_text and caption_color != "transparent":
             try:
                 words = narration_text.split()
                 chunks = []
                 current_chunk = []
                 for word in words:
                     current_chunk.append(word)
-                    if len(current_chunk) >= 5:
                         chunks.append(' '.join(current_chunk))
                         current_chunk = []
                 if current_chunk:
                     chunks.append(' '.join(current_chunk))
-                chunk_duration = audio_duration / len(chunks) if len(chunks) > 0 else audio_duration
                 subtitle_clips = []
-                subtitle_y_position = int(target_resolution[1] * 0.70)
                 for i, chunk_text in enumerate(chunks):
                     start_time = i * chunk_duration
                     end_time = (i + 1) * chunk_duration
                     txt_clip = TextClip(
                         chunk_text,
                         fontsize=45,
                         font='Arial-Bold',
-                        color=caption_color,
                         bg_color='rgba(0, 0, 0, 0.25)',
                         method='caption',
                         align='center',
-                        stroke_width=2,
-                        stroke_color=caption_color,
-                        size=(target_resolution[0] * 0.8, None)
                     ).set_start(start_time).set_end(end_time)
                     txt_clip = txt_clip.set_position(('center', subtitle_y_position))
                     subtitle_clips.append(txt_clip)
                 clip = CompositeVideoClip([clip] + subtitle_clips)
             except Exception as sub_error:
                 print(f"Subtitle error: {sub_error}")
-                # Fallback to simpler subtitle if needed
                 txt_clip = TextClip(
                     narration_text,
                     fontsize=28,
-                    color=caption_color,
                     align='center',
-                    size=(target_resolution[0] * 0.7, None)
-                ).set_position(('center', int(target_resolution[1] / 3))).set_duration(clip.duration)
                 clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
@@ -758,140 +974,259 @@ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_t
         print(f"Error in create_clip: {str(e)}")
         return None
-# --- Main Video Generation Function ---
-def generate_video(user_input, resolution_choice, caption_option, caption_color_input, progress=gr.Progress()):
-    """
-    Main function to orchestrate video generation based on Gradio inputs.
-    """
-    progress(0, desc="Starting video generation...")
-    # Set target resolution
-    if resolution_choice == "Full":
-        target_resolution = (1920, 1080)
-    elif resolution_choice == "Short":
-        target_resolution = (1080, 1920)
-    else:
-        return "Invalid resolution choice.", None
-    # Set caption color
-    caption_color = caption_color_input if caption_option == "Yes" else "transparent"
-    # Create temporary folder
-    if os.path.exists(TEMP_FOLDER):
-        shutil.rmtree(TEMP_FOLDER)
-    os.makedirs(TEMP_FOLDER)
     try:
-        progress(0.1, desc="Generating script...")
         script = generate_script(user_input)
         if not script:
-            return "Failed to generate script.", None
-        print("Generated Script:\n", script)
-        progress(0.2, desc="Parsing script...")
         elements = parse_script(script)
         if not elements:
-            return "Failed to parse script into elements.", None
-        print(f"Parsed {len(elements)//2} script segments.")
         paired_elements = []
         for i in range(0, len(elements), 2):
-            if i + 1 < len(elements):
                 paired_elements.append((elements[i], elements[i+1]))
-        if not paired_elements:
-            return "No valid script segments found.", None
         clips = []
-        total_segments = len(paired_elements)
         for idx, (media_elem, tts_elem) in enumerate(paired_elements):
-            progress(0.3 + (idx * 0.5 / total_segments), desc=f"Processing segment {idx+1}/{total_segments}...")
-            print(f"\nProcessing segment {idx+1}/{total_segments} with prompt: '{media_elem['prompt']}'")
-            media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=total_segments)
             if not media_asset:
-                print(f"Skipping segment {idx+1} due to missing media asset.")
                 continue
             tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
             if not tts_path:
-                print(f"Skipping segment {idx+1} due to TTS generation failure.")
                 continue
             clip = create_clip(
                 media_path=media_asset['path'],
                 asset_type=media_asset['asset_type'],
                 tts_path=tts_path,
-                duration=tts_elem['duration'],
-                effects=media_elem.get('effects', 'fade-in'),
                 narration_text=tts_elem['text'],
-                segment_index=idx,
-                target_resolution=target_resolution,
-                caption_color=caption_color
             )
             if clip:
                 clips.append(clip)
-            else:
-                print(f"Clip creation failed for segment {idx+1}.")
         if not clips:
-            return "No clips were successfully created.", None
-        progress(0.8, desc="Concatenating clips...")
         final_video = concatenate_videoclips(clips, method="compose")
-        progress(0.9, desc="Adding background music...")
-        final_video = add_background_music(final_video, bg_music_volume=0.08)
-        progress(0.95, desc="Exporting final video...")
-        output_path = os.path.join(TEMP_FOLDER, OUTPUT_VIDEO_FILENAME)
-        final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast')
-        progress(1.0, desc="Video generation complete.")
-        return "Video generated successfully!", output_path
     except Exception as e:
-        print(f"An error occurred: {e}")
-        return f"An error occurred: {e}", None
     finally:
-        # Clean up temporary files
-        if os.path.exists(TEMP_FOLDER):
-            shutil.rmtree(TEMP_FOLDER)
-        print("Temporary files removed.")
-# --- Gradio Interface ---
-with gr.Blocks() as demo:
-    gr.Markdown("# AI Documentary Video Generator")
-    gr.Markdown("Enter a concept, choose settings, and generate a short documentary video.")
-    with gr.Row():
-        user_input = gr.Textbox(label="Video Concept", placeholder="e.g., The secret life of squirrels")
-        resolution_choice = gr.Radio(["Full", "Short"], label="Target Resolution", value="Short")
     with gr.Row():
-        caption_option = gr.Radio(["Yes", "No"], label="Add Captions?", value="Yes")
-        caption_color_input = gr.Textbox(label="Caption Color (e.g., white, yellow)", value="white", visible=True)
-    # Update caption color visibility based on caption option
-    caption_option.change(
-        lambda x: gr.update(visible=x == "Yes"),
-        inputs=caption_option,
-        outputs=caption_color_input
-    )
-    generate_button = gr.Button("Generate Video")
-    status_output = gr.Textbox(label="Status", interactive=False)
-    video_output = gr.Video(label="Generated Video")
-    generate_button.click(
-        fn=generate_video,
-        inputs=[user_input, resolution_choice, caption_option, caption_color_input],
-        outputs=[status_output, video_output]
     )
 if __name__ == "__main__":
-    # Ensure TEMP_FOLDER exists before starting
-    if not os.path.exists(TEMP_FOLDER):
-        os.makedirs(TEMP_FOLDER)
-    demo.launch()

+import os
 import gradio as gr
+from kokoro import KPipeline
+from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip, TextClip, concatenate_videoclips
+from PIL import Image
+import tempfile
+import random
+import cv2
+import math
+import requests
+import re
+import time
+import pydub
+import pysrt
+from gtts import gTTS
+import numpy as np
+import soundfile as sf
+# Initialize Kokoro TTS pipeline
+pipeline = KPipeline(lang_code='a')
+# API Constants
+PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
+OPENROUTER_API_KEY = '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'
+OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
+TEMP_FOLDER = "temp_video_processing"
+os.makedirs(TEMP_FOLDER, exist_ok=True)
+# --------------- ORIGINAL FUNCTIONS (UNMODIFIED) --------------- #
+!pip install transformers==4.49.0
+!pip install moviepy gTTS requests pydub pillow
+!pip cache purge
+!apt-get install imagemagick -y
+!pip install kokoro>=0.3.4 soundfile
+!apt-get-qq -y install espeak-ng > /dev/null 2>&1
+!pip install pysrt
+from kokoro import KPipeline
+from IPython.display import display, Audio
 import soundfile as sf
 import torch
 from IPython.display import display, Audio, HTML
 import os, requests, io, time, re, random
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
+    CompositeVideoClip, TextClip
 )
 import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
 from pydub.generators import Sine
+from google.colab import files
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 import pysrt
 from gtts import gTTS
+# Initialize Kokoro TTS pipeline (using American English, adjust lang_code as needed)
+pipeline = KPipeline(lang_code='a')  # Use voice 'af_heart' for American English
+# Ensure ImageMagick binary is set (to avoid "unset" errors)
+mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
+# ---------------- Global Configuration ---------------- #
+TARGET_RESOLUTION_INPUT = input("RESOLUTION:")
+CLIPS_AMMOUNT = int(input("Clips:"))
+CAPTION_OPTION = input("Caption Yes/No:")
+if CAPTION_OPTION == "Yes":
+  CAPTION_COLOR = "white"
+else:
+  CAPTION_COLOR = "transparent"
+if TARGET_RESOLUTION_INPUT == "Full":
+  TARGET_RESOLUTION = (1920, 1080)
+elif TARGET_RESOLUTION_INPUT == "Short":
+  TARGET_RESOLUTION = (1080, 1920)
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 TEMP_FOLDER = "temp_video_processing"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+# Create temporary folder if it doesn't exist
+if not os.path.exists(TEMP_FOLDER):
+    os.makedirs(TEMP_FOLDER)
+# ---------------- Helper Functions ---------------- #
 def generate_script(user_input):
     """Generate documentary script with proper OpenRouter handling."""
     headers = {
             timeout=30
         )
+        # Debug: Print raw response
+        print("API Response:", response.text)
         if response.status_code == 200:
             response_data = response.json()
             if 'choices' in response_data and len(response_data['choices']) > 0:
         for line in script_text.splitlines():
             line = line.strip()
             if line.startswith("[") and "]" in line:
+                # Extract content between first [ and first ]
                 bracket_start = line.find("[")
                 bracket_end = line.find("]", bracket_start)
                 if bracket_start != -1 and bracket_end != -1:
                     if current_title is not None:
                         sections[current_title] = current_text.strip()
                     current_title = line[bracket_start+1:bracket_end]
+                    current_text = line[bracket_end+1:].strip()  # Get any text after the bracket
             elif current_title:
                 current_text += line + " "
+        # Don't forget the last section
         if current_title:
             sections[current_title] = current_text.strip()
         elements = []
         for title, narration in sections.items():
+            if not title or not narration:  # Skip empty sections
                 continue
             media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
+            # Duration: at least 3 sec, or 0.5 sec per word
             words = narration.split()
             duration = max(3, len(words) * 0.5)
             tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
     """Search for a video on Pexels by query and return a random HD video."""
     headers = {'Authorization': pexels_api_key}
     base_url = "https://api.pexels.com/videos/search"
+    num_pages = 3  # Search through first 3 pages
     videos_per_page = 15
+    # Add retry mechanism
     max_retries = 3
     retry_delay = 1
     search_query = query
     all_videos = []
                 if response.status_code == 200:
                     data = response.json()
                     videos = data.get("videos", [])
                     if not videos:
+                        print(f"No videos found on page {page}.")
+                        break  # No videos on this page, move to the next
+                    # Collect all HD videos
                     for video in videos:
                         video_files = video.get("video_files", [])
                         for file in video_files:
+                            if file.get("quality") == "hd":  # Only collect HD quality
                                 all_videos.append(file.get("link"))
+                                break  # Only add one file per video
+                    break  # Success, exit retry loop
+                elif response.status_code == 429:  # Rate limit
+                    print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
                     time.sleep(retry_delay)
                     retry_delay *= 2
                 else:
+                    print(f"Error fetching videos: {response.status_code} {response.text}")
                     if attempt < max_retries - 1:
+                        print(f"Retrying in {retry_delay} seconds...")
                         time.sleep(retry_delay)
                         retry_delay *= 2
                     else:
                         break
             except requests.exceptions.RequestException as e:
+                print(f"Request exception: {e}")
                 if attempt < max_retries - 1:
+                    print(f"Retrying in {retry_delay} seconds...")
                     time.sleep(retry_delay)
                     retry_delay *= 2
                 else:
                     break
     if all_videos:
+        # Select a random video from the collected ones
         random_video = random.choice(all_videos)
+        print(f"Selected random video from {len(all_videos)} HD videos")
         return random_video
     else:
+        print("No suitable videos found after searching all pages.")
         return None
 def search_pexels_images(query, pexels_api_key):
     headers = {'Authorization': pexels_api_key}
     url = "https://api.pexels.com/v1/search"
     params = {"query": query, "per_page": 5, "orientation": "landscape"}
+    # Add retry mechanism
     max_retries = 3
     retry_delay = 1
     for attempt in range(max_retries):
         try:
             response = requests.get(url, headers=headers, params=params, timeout=10)
             if response.status_code == 200:
                 data = response.json()
                 photos = data.get("photos", [])
                 if photos:
+                    # Get a random image from the first 5 results (if available)
                     photo = random.choice(photos[:min(5, len(photos))])
                     img_url = photo.get("src", {}).get("original")
                     return img_url
                 else:
+                    print(f"No images found for query: {query}")
                     return None
+            elif response.status_code == 429:  # Rate limit
+                print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
                 time.sleep(retry_delay)
                 retry_delay *= 2
             else:
+                print(f"Error fetching images: {response.status_code} {response.text}")
                 if attempt < max_retries - 1:
+                    print(f"Retrying in {retry_delay} seconds...")
                     time.sleep(retry_delay)
                     retry_delay *= 2
         except requests.exceptions.RequestException as e:
+            print(f"Request exception: {e}")
             if attempt < max_retries - 1:
+                print(f"Retrying in {retry_delay} seconds...")
                 time.sleep(retry_delay)
                 retry_delay *= 2
+    print(f"No Pexels images found for query: {query} after all attempts")
     return None
 def search_google_images(query):
         headers = {"User-Agent": USER_AGENT}
         response = requests.get(search_url, headers=headers, timeout=10)
         soup = BeautifulSoup(response.text, "html.parser")
+        # Look for image elements or JSON data containing image URLs
         img_tags = soup.find_all("img")
+        # Filter out small images (icons, etc.)
         image_urls = []
         for img in img_tags:
             src = img.get("src", "")
             if src.startswith("http") and "gstatic" not in src:
                 image_urls.append(src)
         if image_urls:
             return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
         else:
+            print(f"No Google Images found for query: {query}")
             return None
     except Exception as e:
         print(f"Error in Google Images search: {e}")
     """Download an image from a URL to a local file with enhanced error handling."""
     try:
         headers = {"User-Agent": USER_AGENT}
+        print(f"Downloading image from: {image_url} to {filename}")
         response = requests.get(image_url, headers=headers, stream=True, timeout=15)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
+        print(f"Image downloaded successfully to: {filename}")
+        # Validate the image
         try:
             img = Image.open(filename)
+            img.verify()  # Verify it's an actual image
+            # If it passes verification, reopen and convert to RGB if needed
             img = Image.open(filename)
             if img.mode != 'RGB':
                 img = img.convert('RGB')
                 img.save(filename)
+            print(f"Image validated and processed: {filename}")
             return filename
         except Exception as e_validate:
             print(f"Downloaded file is not a valid image: {e_validate}")
             if os.path.exists(filename):
                 os.remove(filename)
             return None
     except requests.exceptions.RequestException as e_download:
         print(f"Image download error: {e_download}")
         if os.path.exists(filename):
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
+        print(f"Video downloaded successfully to: {filename}")
         return filename
     except Exception as e:
         print(f"Video download error: {e}")
     For news-related queries, use Google Images.
     Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
     """
+    # Make prompt URL-safe and a valid filename
     safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
+    # For news-related queries, use Google Images
     if "news" in prompt.lower():
+        print(f"News-related query detected: {prompt}. Using Google Images...")
         image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
         image_url = search_google_images(prompt)
         if image_url:
             downloaded_image = download_image(image_url, image_file)
             if downloaded_image:
+                print(f"News image saved to {downloaded_image}")
                 return {"path": downloaded_image, "asset_type": "image"}
         else:
             print(f"Google Images search failed for prompt: {prompt}")
+    # Try video first (with reduced frequency for better media mix)
+    if random.random() < 0.25:  # 25% chance of using a video
         video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
         video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
         if video_url:
             downloaded_video = download_video(video_url, video_file)
             if downloaded_video:
+                print(f"Video asset saved to {downloaded_video}")
                 return {"path": downloaded_video, "asset_type": "video"}
         else:
             print(f"Pexels video search failed for prompt: {prompt}")
+    # Fallback or primary choice for images
     image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
     image_url = search_pexels_images(prompt, PEXELS_API_KEY)
     if image_url:
         downloaded_image = download_image(image_url, image_file)
         if downloaded_image:
+            print(f"Image asset saved to {downloaded_image}")
             return {"path": downloaded_image, "asset_type": "image"}
         else:
             print(f"Pexels image download failed for prompt: {prompt}")
+    # Last resort: try to find a stock image for common terms
     fallback_terms = ["nature", "people", "landscape", "technology", "business"]
     for term in fallback_terms:
+        print(f"Trying fallback image search with term: {term}")
         fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
         fallback_url = search_pexels_images(term, PEXELS_API_KEY)
         if fallback_url:
             downloaded_fallback = download_image(fallback_url, fallback_file)
             if downloaded_fallback:
+                print(f"Fallback image saved to {downloaded_fallback}")
                 return {"path": downloaded_fallback, "asset_type": "image"}
             else:
                 print(f"Fallback image download failed for term: {term}")
     print(f"Failed to generate visual asset for prompt: {prompt}")
     return None
+# ---------------- TTS Function Using Kokoro ---------------- #
+def generate_tts(text, voice):
+    """
+    Generate TTS audio using Kokoro and save to a WAV file.
+    Uses the global Kokoro pipeline.
+    Falls back to gTTS if Kokoro fails.
+    """
+    # Create a safe filename
+    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
+    file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
+    # If file already exists, reuse it
+    if os.path.exists(file_path):
+        print(f"Using cached TTS for text '{text[:10]}...'")
+        return file_path
+    try:
+        # Map voice 'en' to kokoro's American English voice.
+        kokoro_voice = 'af_heart' if voice == 'en' else voice
+        generator = pipeline(text, voice=kokoro_voice, speed=0.8, split_pattern=r'\n+')
+        audio_segments = []
+        for i, (gs, ps, audio) in enumerate(generator):
+            audio_segments.append(audio)
+        if len(audio_segments) > 1:
+            full_audio = np.concatenate(audio_segments)
+        else:
+            full_audio = audio_segments[0]
+        sf.write(file_path, full_audio, 24000)  # Save as WAV at 24000 Hz
+        print(f"TTS audio saved to {file_path} (Kokoro)")
+        return file_path
+    except Exception as e:
+        print(f"Error generating TTS with Kokoro: {e}")
+        # Fallback to gTTS if Kokoro fails
+        try:
+            print("Falling back to gTTS...")
+            tts = gTTS(text=text, lang='en')
+            mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
+            tts.save(mp3_path)
+            # Convert mp3 to wav using pydub
+            audio = AudioSegment.from_mp3(mp3_path)
+            audio.export(file_path, format="wav")
+            if os.path.exists(mp3_path):
+                os.remove(mp3_path)  # Clean up the temporary mp3
+            print(f"Fallback TTS saved to {file_path} (gTTS)")
+            return file_path
+        except Exception as fallback_error:
+            print(f"Fallback TTS with gTTS also failed: {fallback_error}")
+            return None
 def generate_silent_audio(duration, sample_rate=24000):
     """
     Generate a silent WAV audio file lasting 'duration' seconds.
     """
+    import numpy as np
+    import soundfile as sf
     num_samples = int(duration * sample_rate)
     silence = np.zeros(num_samples, dtype=np.float32)
     silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
         print(f"Using cached TTS for text '{text[:10]}...'")
         return file_path
+    try:
+        kokoro_voice = 'af_heart' if voice == 'en' else voice
+        generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
+        audio_segments = []
+        for i, (gs, ps, audio) in enumerate(generator):
+            audio_segments.append(audio)
+        full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
+        sf.write(file_path, full_audio, 24000)
+        print(f"TTS audio saved to {file_path} (Kokoro)")
+        return file_path
+    except Exception as e:
+        print(f"Error with Kokoro TTS: {e}")
         try:
+            print("Falling back to gTTS...")
+            from gtts import gTTS
+            tts = gTTS(text=text, lang='en')
+            mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
+            tts.save(mp3_path)
+            audio = AudioSegment.from_mp3(mp3_path)
+            audio.export(file_path, format="wav")
+            os.remove(mp3_path)
+            print(f"Fallback TTS saved to {file_path} (gTTS)")
             return file_path
+        except Exception as fallback_error:
+            print(f"Both TTS methods failed: {fallback_error}")
+            # Generate silent audio as fallback
+            return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
 def apply_kenburns_effect(clip, target_resolution, effect_type=None):
     """
     Apply a smooth Ken Burns effect with a single, clean movement pattern.
     """
+    # Unpack target resolution
     target_w, target_h = target_resolution
+    # First ensure the image fills the target resolution by resizing it properly
+    # Calculate the aspect ratio of the original clip
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
+    # Resize to fill the entire frame
+    if clip_aspect > target_aspect:  # Image is wider than the target frame
         new_height = target_h
         new_width = int(new_height * clip_aspect)
+    else:  # Image is taller than the target frame
         new_width = target_w
         new_height = int(new_width / clip_aspect)
+    # Resize the clip to ensure it fills the target resolution
     clip = clip.resize(newsize=(new_width, new_height))
+    # Now apply the base_scale for Ken Burns effect
     base_scale = 1.15
     new_width = int(new_width * base_scale)
     new_height = int(new_height * base_scale)
     clip = clip.resize(newsize=(new_width, new_height))
+    # Rest of your function stays the same...
+    # Calculate maximum offsets for panning
     max_offset_x = new_width - target_w
     max_offset_y = new_height - target_h
+    # Define available effects
     available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
+    # Randomly select an effect if not specified
     if effect_type is None or effect_type == "random":
         effect_type = random.choice(available_effects)
+    # Set effect parameters
     if effect_type == "zoom-in":
         start_zoom = 0.9
         end_zoom = 1.1
     else:
         raise ValueError(f"Unsupported effect_type: {effect_type}")
+    # Define the transformation function for each frame
     def transform_frame(get_frame, t):
         frame = get_frame(t)
+        # Smooth interpolation using cosine easing
         ratio = t / clip.duration if clip.duration > 0 else 0
+        ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)  # Ease in/out
+        # Calculate current zoom and crop size
         current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
         crop_w = int(target_w / current_zoom)
         crop_h = int(target_h / current_zoom)
+        # Calculate current center with floating-point precision
         current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
         current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
+        # Clamp center to keep the crop within image bounds
         min_center_x = crop_w / 2
         max_center_x = new_width - crop_w / 2
         min_center_y = crop_h / 2
         current_center_x = max(min_center_x, min(current_center_x, max_center_x))
         current_center_y = max(min_center_y, min(current_center_y, max_center_y))
+        # Crop with subpixel accuracy and resize
         cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
         resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
         return resized_frame
+    # Apply the transformation to the clip
     return clip.fl(transform_frame)
+# Define target resolution (e.g., 1920x1080 for Full HD)
 def resize_to_fill(clip, target_resolution):
     """
     Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
+    Args:
+        clip: MoviePy VideoClip or ImageClip object.
+        target_resolution: Tuple of (width, height) to resize to.
+    Returns:
+        Resized and cropped clip.
     """
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
     if clip_aspect > target_aspect:
+        # Clip is wider than target; resize to target height and crop width
         clip = clip.resize(height=target_h)
         crop_amount = (clip.w - target_w) / 2
         clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
     else:
+        # Clip is taller than target; resize to target width and crop height
         clip = clip.resize(width=target_w)
         crop_amount = (clip.h - target_h) / 2
         clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
     return clip
+# Function to find any MP3 file in the directory tree
 def find_mp3_files():
     """
+    Search for any MP3 files in the current directory and subdirectories.
     Returns the path to the first MP3 file found or None if none is found.
     """
+    mp3_files = []
+    # Walk through all directories starting from current directory
+    for root, dirs, files in os.walk('.'):
+        for file in files:
+            if file.endswith('.mp3'):
+                mp3_path = os.path.join(root, file)
+                mp3_files.append(mp3_path)
+                print(f"Found MP3 file: {mp3_path}")
+    if mp3_files:
+        # Return the first MP3 file found
+        return mp3_files[0]
     return None
+# Update the add_background_music function to use the first found MP3
 def add_background_music(final_video, bg_music_volume=0.08):
     """Add background music to the final video using any MP3 file found in directories."""
     try:
+        # Find MP3 files
         bg_music_path = find_mp3_files()
         if bg_music_path and os.path.exists(bg_music_path):
             print(f"Adding background music from: {bg_music_path}")
+            # Load the background music
             bg_music = AudioFileClip(bg_music_path)
+            # Loop the music if shorter than the video
             if bg_music.duration < final_video.duration:
                 loops_needed = math.ceil(final_video.duration / bg_music.duration)
                 bg_segments = [bg_music] * loops_needed
                 bg_music = concatenate_audioclips(bg_segments)
+            # Trim if longer than the video
             bg_music = bg_music.subclip(0, final_video.duration)
+            # Set volume to 8%
             bg_music = bg_music.volumex(bg_music_volume)
+            # Mix the background music with the existing audio
             video_audio = final_video.audio
+            mixed_audio = CompositeAudioClip([video_audio, bg_music])
+            # Set the mixed audio to the final video
             final_video = final_video.set_audio(mixed_audio)
             print("Background music added successfully")
         else:
+            print("No MP3 files found, skipping background music")
         return final_video
         print("Continuing without background music")
         return final_video
+# Update the subtitle positioning in the create_clip function
+# Find the section in create_clip that handles subtitles, and modify the positioning:
+def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
     """Create a video clip with synchronized subtitles and properly timed narration."""
     try:
         print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
             print("Missing media or TTS file")
             return None
+        # Load and process audio
         audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
         audio_duration = audio_clip.duration
         target_duration = audio_duration + 0.2
+        # Process visual asset
         if asset_type == "video":
             clip = VideoFileClip(media_path)
+            clip = resize_to_fill(clip, TARGET_RESOLUTION)
             if clip.duration < target_duration:
                 clip = clip.loop(duration=target_duration)
             else:
                 img.close()
             clip = ImageClip(media_path).set_duration(target_duration)
+            clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
             clip = clip.fadein(0.3).fadeout(0.3)
         else:
             return None
+        # Add subtitles with shorter chunks (4-5 words per line) and position at 2/3 of screen height
+        if narration_text and CAPTION_COLOR != "transparent":
             try:
+                # Create SRT-style subtitles
                 words = narration_text.split()
                 chunks = []
                 current_chunk = []
+                # Create chunks of 4-5 words for better readability
                 for word in words:
                     current_chunk.append(word)
+                    if len(current_chunk) >= 5:  # Maximum 5 words per chunk
                         chunks.append(' '.join(current_chunk))
                         current_chunk = []
+                # Add the last chunk if it exists
                 if current_chunk:
                     chunks.append(' '.join(current_chunk))
+                # Calculate timing for each chunk based on audio duration
+                chunk_duration = audio_duration / len(chunks)
                 subtitle_clips = []
+                # Position subtitles at 1/3 of the screen height instead of bottom
+                subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
                 for i, chunk_text in enumerate(chunks):
                     start_time = i * chunk_duration
                     end_time = (i + 1) * chunk_duration
+                    # Create text clip for this chunk
                     txt_clip = TextClip(
                         chunk_text,
                         fontsize=45,
                         font='Arial-Bold',
+                        color=CAPTION_COLOR,
                         bg_color='rgba(0, 0, 0, 0.25)',
                         method='caption',
                         align='center',
+                        stroke_width=2,  # Light shadow
+                        stroke_color=CAPTION_COLOR,  # Must be set if stroke is used
+                        size=(TARGET_RESOLUTION[0] * 0.8, None)  # Width for better readability
                     ).set_start(start_time).set_end(end_time)
                     txt_clip = txt_clip.set_position(('center', subtitle_y_position))
                     subtitle_clips.append(txt_clip)
+                # Combine all subtitle chunks with the main clip
                 clip = CompositeVideoClip([clip] + subtitle_clips)
             except Exception as sub_error:
                 print(f"Subtitle error: {sub_error}")
+                # Fallback to a simpler method if the chunk approach fails
                 txt_clip = TextClip(
                     narration_text,
                     fontsize=28,
+                    color=CAPTION_COLOR,
                     align='center',
+                    size=(TARGET_RESOLUTION[0] * 0.7, None)
+                ).set_position(('center', int(TARGET_RESOLUTION[2] / 3))).set_duration(clip.duration)
                 clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
         print(f"Error in create_clip: {str(e)}")
         return None
+def fix_imagemagick_policy():
+    """Comprehensive fix for ImageMagick security policies"""
+    try:
+        print("Attempting to fix ImageMagick security policies...")
+        # Find all possible policy.xml locations
+        policy_paths = [
+            "/etc/ImageMagick-6/policy.xml",
+            "/etc/ImageMagick-7/policy.xml",
+            "/etc/ImageMagick/policy.xml",
+            "/usr/local/etc/ImageMagick-7/policy.xml"
+        ]
+        found_policy = None
+        for path in policy_paths:
+            if os.path.exists(path):
+                found_policy = path
+                break
+        if not found_policy:
+            print("No policy.xml found. Using alternative subtitle method.")
+            return False
+        print(f"Modifying policy file at {found_policy}")
+        # Create backup
+        backup_path = f"{found_policy}.bak"
+        os.system(f"sudo cp {found_policy} {backup_path}")
+        # Apply security policy modifications
+        os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
+        os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
+        os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
+        print("ImageMagick policies updated successfully.")
+        return True
+    except Exception as e:
+        print(f"Error fixing policies: {e}")
+        return False
+# ---------------- Main Function ---------------- #
+import os
+import shutil
+import webbrowser
+def main_fixed():
+    # Fix ImageMagick policy first
+    fix_success = fix_imagemagick_policy()
+    if not fix_success:
+        print("Will use alternative methods if needed")
+    # Create temp folder if not exists
+    if not os.path.exists(TEMP_FOLDER):
+        os.makedirs(TEMP_FOLDER)
+    user_input = input("Enter your video concept: ")
+    print("Generating script from Gemini API...")
+    script = generate_script(user_input)
+    if not script:
+        print("Failed to generate script.")
+        return
+    print("Generated Script:\n", script)
+    elements = parse_script(script)
+    if not elements:
+        print("Failed to parse script into elements.")
+        return
+    print(f"Parsed {len(elements)//2} script segments.")
+    # Pair media elements with their corresponding TTS elements
+    paired_elements = []
+    for i in range(0, len(elements), 2):
+        if i+1 < len(elements):
+            paired_elements.append((elements[i], elements[i+1]))
+    if not paired_elements:
+        print("No valid script segments found. Exiting.")
+        return
+    # Process each paired segment to create video clips
+    clips = []
+    for idx, (media_elem, tts_elem) in enumerate(paired_elements):
+        print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
+        # Generate the visual asset (video or image) based on the prompt
+        media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
+        if not media_asset:
+            print(f"Skipping segment {idx+1} due to missing media asset.")
+            continue
+        # Generate the TTS audio for the narration
+        tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
+        if not tts_path:
+            print(f"Skipping segment {idx+1} due to TTS generation failure.")
+            continue
+        # Create the video clip using the media asset and the TTS audio
+        clip = create_clip(
+            media_path=media_asset['path'],
+            asset_type=media_asset['asset_type'],
+            tts_path=tts_path,
+            duration=tts_elem['duration'],
+            effects=media_elem.get('effects', 'fade-in'),
+            narration_text=tts_elem['text'],
+            segment_index=idx
+        )
+        if clip:
+            clips.append(clip)
+        else:
+            print(f"Clip creation failed for segment {idx+1}.")
+    if not clips:
+        print("No clips were successfully created. Exiting.")
+        return
+    # Concatenate all the clips into one final video
+    print("\nConcatenating clips...")
+    final_video = concatenate_videoclips(clips, method="compose")
+    # Add background music before exporting
+    final_video = add_background_music(final_video, bg_music_volume=0.08)
+    # Write the final video to a file with the veryfast preset
+    print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME} with veryfast rendering preset...")
+    final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
+    print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
+    # Auto-open the video file (acts as an auto-download/view feature)
     try:
+        webbrowser.open(OUTPUT_VIDEO_FILENAME)
+        print("Video is being opened for download/viewing.")
+    except Exception as e:
+        print("Failed to auto-download/open the video:", e)
+    # Clean up temporary files so they don't interfere with the next run
+    print("Cleaning up temporary files...")
+    shutil.rmtree(TEMP_FOLDER)
+    print("Temporary files removed.")
+if __name__ == "__main__":
+    main_fixed()
+# --------------- GRADIO INTERFACE --------------- #
+def run_pipeline(user_input, resolution, clip_amount, caption_enabled):
+    global TARGET_RESOLUTION, CAPTION_COLOR
+    # Set parameters based on user input
+    TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
+    CAPTION_COLOR = "white" if caption_enabled else "transparent"
+    CLIPS_AMMOUNT = int(clip_amount)
+    try:
+        # Generate script
         script = generate_script(user_input)
         if not script:
+            raise gr.Error("Failed to generate script")
+        # Parse elements
         elements = parse_script(script)
         if not elements:
+            raise gr.Error("Failed to parse script")
+        # Process elements
         paired_elements = []
         for i in range(0, len(elements), 2):
+            if i+1 < len(elements):
                 paired_elements.append((elements[i], elements[i+1]))
+        # Limit clips
+        paired_elements = paired_elements[:CLIPS_AMMOUNT]
+        # Create clips
         clips = []
         for idx, (media_elem, tts_elem) in enumerate(paired_elements):
+            media_asset = generate_media(media_elem['prompt'])
             if not media_asset:
                 continue
             tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
             if not tts_path:
                 continue
             clip = create_clip(
                 media_path=media_asset['path'],
                 asset_type=media_asset['asset_type'],
                 tts_path=tts_path,
                 narration_text=tts_elem['text'],
+                segment_index=idx
             )
             if clip:
                 clips.append(clip)
+        # Create final video
         if not clips:
+            raise gr.Error("Failed to create any video clips")
         final_video = concatenate_videoclips(clips, method="compose")
+        final_video = add_background_music(final_video)
+        # Save output
+        output_path = "final_video.mp4"
+        final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast', threads=4)
+        # Cleanup
+        for clip in clips:
+            clip.close()
+        final_video.close()
+        return output_path
     except Exception as e:
+        raise gr.Error(f"Error: {str(e)}")
     finally:
+        # Clean temporary files
+        for f in os.listdir(TEMP_FOLDER):
+            os.remove(os.path.join(TEMP_FOLDER, f))
+# --------------- GRADIO UI --------------- #
+with gr.Blocks(title="AI Documentary Maker", theme=gr.themes.Default()) as demo:
+    gr.Markdown("""
+    # 🎥 AI Documentary Maker
+    Create viral documentary-style videos with AI!
+    """)
     with gr.Row():
+        with gr.Column(scale=1):
+            user_input = gr.Textbox(label="Documentary Topic",
+                                   placeholder="Enter your topic or script...")
+            resolution = gr.Dropdown(["Full (1920x1080)", "Short (1080x1920)"],
+                                    label="Video Format", value="Short (1080x1920)")
+            clip_amount = gr.Slider(1, 10, value=5, step=1,
+                                   label="Number of Clips")
+            caption_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
+            generate_btn = gr.Button("Generate Video", variant="primary")
+        with gr.Column(scale=2):
+            output_video = gr.Video(label="Generated Video", format="mp4")
+    generate_btn.click(
+        fn=run_pipeline,
+        inputs=[user_input, resolution, clip_amount, caption_enabled],
+        outputs=output_video
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)