Spaces:

testdeep123
/

video

Build error

App Files Files Community

testdeep123 commited on Apr 11

Commit

b082bff

verified ·

1 Parent(s): 2bb6ef9

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -926

app.py CHANGED Viewed

@@ -1,216 +1,90 @@
 import os
-import gradio as gr
-from kokoro import KPipeline
-from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip, TextClip, concatenate_videoclips
-from PIL import Image
-import tempfile
-import random
-import cv2
-import math
 import requests
 import re
-import time
-import pydub
-import pysrt
-from gtts import gTTS
-import numpy as np
-import soundfile as sf
-# Initialize Kokoro TTS pipeline
-pipeline = KPipeline(lang_code='a')
-# API Constants
-PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
-OPENROUTER_API_KEY = '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'
-OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
-TEMP_FOLDER = "temp_video_processing"
-os.makedirs(TEMP_FOLDER, exist_ok=True)
-# --------------- ORIGINAL FUNCTIONS (UNMODIFIED) --------------- #
-!pip install transformers==4.49.0
-!pip install moviepy gTTS requests pydub pillow
-!pip cache purge
-!apt-get install imagemagick -y
-!pip install kokoro>=0.3.4 soundfile
-!apt-get-qq -y install espeak-ng > /dev/null 2>&1
-!pip install pysrt
-from kokoro import KPipeline
-from IPython.display import display, Audio
-import soundfile as sf
-import torch
-from IPython.display import display, Audio, HTML
-import soundfile as sf
-import os
-from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
-from PIL import Image
-import tempfile
 import random
-import cv2
 import math
-import os, requests, io, time, re, random
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
-    CompositeVideoClip, TextClip
 )
 import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
-from pydub.generators import Sine
-from google.colab import files
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from bs4 import BeautifulSoup
-import base64
 from urllib.parse import quote
 import pysrt
-from gtts import gTTS
-# Initialize Kokoro TTS pipeline (using American English, adjust lang_code as needed)
-pipeline = KPipeline(lang_code='a')  # Use voice 'af_heart' for American English
-# Ensure ImageMagick binary is set (to avoid "unset" errors)
-mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
-# ---------------- Global Configuration ---------------- #
-TARGET_RESOLUTION_INPUT = input("RESOLUTION:")
-CLIPS_AMMOUNT = int(input("Clips:"))
-CAPTION_OPTION = input("Caption Yes/No:")
-if CAPTION_OPTION == "Yes":
-  CAPTION_COLOR = "white"
-else:
-  CAPTION_COLOR = "transparent"
-if TARGET_RESOLUTION_INPUT == "Full":
-  TARGET_RESOLUTION = (1920, 1080)
-elif TARGET_RESOLUTION_INPUT == "Short":
-  TARGET_RESOLUTION = (1080, 1920)
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 TEMP_FOLDER = "temp_video_processing"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-# Create temporary folder if it doesn't exist
-if not os.path.exists(TEMP_FOLDER):
-    os.makedirs(TEMP_FOLDER)
-# ---------------- Helper Functions ---------------- #
 def generate_script(user_input):
-    """Generate documentary script with proper OpenRouter handling."""
     headers = {
         'Authorization': f'Bearer {OPENROUTER_API_KEY}',
         'HTTP-Referer': 'https://your-domain.com',
         'X-Title': 'AI Documentary Maker'
     }
     prompt = f"""Short Documentary Script GeneratorInstructions:
 If I say "use this," just output the script exactly as I gave it.
 If I only give topics, generate a script based on them.
 If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
 And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 Formatting Rules:
 Title in Square Brackets:
 Each section starts with a one-word title inside [ ] (max two words if necessary).
 This title will be used as a search term for Pexels footage.
 Casual & Funny Narration:
 Each section has 5-10 words of narration.
 Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
 No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 If a term is too specific, make it more general for Pexels search.
 Scene-Specific Writing:
 Each section describes only what should be shown in the video.
 Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 No extra text, just the script.
 Example Output:
 [North Korea]
 Top 5 unknown facts about North Korea.
 [Invisibility]
 North Korea’s internet speed is so fast… it doesn’t exist.
 [Leadership]
 Kim Jong-un once won an election with 100% votes… against himself.
 [Magic]
 North Korea discovered time travel. That’s why their news is always from the past.
 [Warning]
 Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 [Freedom]
 North Korean citizens can do anything… as long as it's government-approved.
 Now here is the Topic/scrip: {user_input}
 """
     data = {
         'model': OPENROUTER_MODEL,
         'messages': [{'role': 'user', 'content': prompt}],
         'temperature': 0.4,
         'max_tokens': 5000
     }
     try:
         response = requests.post(
             'https://openrouter.ai/api/v1/chat/completions',
@@ -218,677 +92,275 @@ Now here is the Topic/scrip: {user_input}
             json=data,
             timeout=30
         )
-        # Debug: Print raw response
-        print("API Response:", response.text)
         if response.status_code == 200:
             response_data = response.json()
             if 'choices' in response_data and len(response_data['choices']) > 0:
                 return response_data['choices'][0]['message']['content']
-            else:
-                print("Unexpected response format:", response_data)
-                return None
-        else:
-            print(f"API Error {response.status_code}: {response.text}")
-            return None
-    except Exception as e:
-        print(f"Request failed: {str(e)}")
         return None
 def parse_script(script_text):
-    """
-    Parse the generated script into a list of elements.
-    For each section, create two elements:
-      - A 'media' element using the section title as the visual prompt.
-      - A 'tts' element with the narration text, voice info, and computed duration.
-    """
     sections = {}
     current_title = None
     current_text = ""
     try:
         for line in script_text.splitlines():
             line = line.strip()
             if line.startswith("[") and "]" in line:
-                # Extract content between first [ and first ]
                 bracket_start = line.find("[")
                 bracket_end = line.find("]", bracket_start)
                 if bracket_start != -1 and bracket_end != -1:
                     if current_title is not None:
                         sections[current_title] = current_text.strip()
                     current_title = line[bracket_start+1:bracket_end]
-                    current_text = line[bracket_end+1:].strip()  # Get any text after the bracket
             elif current_title:
                 current_text += line + " "
-        # Don't forget the last section
         if current_title:
             sections[current_title] = current_text.strip()
         elements = []
         for title, narration in sections.items():
-            if not title or not narration:  # Skip empty sections
                 continue
             media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
-            # Duration: at least 3 sec, or 0.5 sec per word
             words = narration.split()
             duration = max(3, len(words) * 0.5)
             tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
             elements.append(media_element)
             elements.append(tts_element)
         return elements
-    except Exception as e:
-        print(f"Error parsing script: {e}")
         return []
 def search_pexels_videos(query, pexels_api_key):
-    """Search for a video on Pexels by query and return a random HD video."""
     headers = {'Authorization': pexels_api_key}
     base_url = "https://api.pexels.com/videos/search"
-    num_pages = 3  # Search through first 3 pages
     videos_per_page = 15
-    # Add retry mechanism
-    max_retries = 3
-    retry_delay = 1
-    search_query = query
     all_videos = []
     for page in range(1, num_pages + 1):
-        for attempt in range(max_retries):
-            try:
-                params = {"query": search_query, "per_page": videos_per_page, "page": page}
-                response = requests.get(base_url, headers=headers, params=params, timeout=10)
-                if response.status_code == 200:
-                    data = response.json()
-                    videos = data.get("videos", [])
-                    if not videos:
-                        print(f"No videos found on page {page}.")
-                        break  # No videos on this page, move to the next
-                    # Collect all HD videos
-                    for video in videos:
-                        video_files = video.get("video_files", [])
-                        for file in video_files:
-                            if file.get("quality") == "hd":  # Only collect HD quality
-                                all_videos.append(file.get("link"))
-                                break  # Only add one file per video
-                    break  # Success, exit retry loop
-                elif response.status_code == 429:  # Rate limit
-                    print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
-                    time.sleep(retry_delay)
-                    retry_delay *= 2
-                else:
-                    print(f"Error fetching videos: {response.status_code} {response.text}")
-                    if attempt < max_retries - 1:
-                        print(f"Retrying in {retry_delay} seconds...")
-                        time.sleep(retry_delay)
-                        retry_delay *= 2
-                    else:
-                        break
-            except requests.exceptions.RequestException as e:
-                print(f"Request exception: {e}")
-                if attempt < max_retries - 1:
-                    print(f"Retrying in {retry_delay} seconds...")
-                    time.sleep(retry_delay)
-                    retry_delay *= 2
-                else:
-                    break
-    if all_videos:
-        # Select a random video from the collected ones
-        random_video = random.choice(all_videos)
-        print(f"Selected random video from {len(all_videos)} HD videos")
-        return random_video
-    else:
-        print("No suitable videos found after searching all pages.")
-        return None
 def search_pexels_images(query, pexels_api_key):
-    """Search for an image on Pexels by query."""
     headers = {'Authorization': pexels_api_key}
     url = "https://api.pexels.com/v1/search"
     params = {"query": query, "per_page": 5, "orientation": "landscape"}
-    # Add retry mechanism
-    max_retries = 3
-    retry_delay = 1
-    for attempt in range(max_retries):
-        try:
-            response = requests.get(url, headers=headers, params=params, timeout=10)
-            if response.status_code == 200:
-                data = response.json()
-                photos = data.get("photos", [])
-                if photos:
-                    # Get a random image from the first 5 results (if available)
-                    photo = random.choice(photos[:min(5, len(photos))])
-                    img_url = photo.get("src", {}).get("original")
-                    return img_url
-                else:
-                    print(f"No images found for query: {query}")
-                    return None
-            elif response.status_code == 429:  # Rate limit
-                print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
-                time.sleep(retry_delay)
-                retry_delay *= 2
-            else:
-                print(f"Error fetching images: {response.status_code} {response.text}")
-                if attempt < max_retries - 1:
-                    print(f"Retrying in {retry_delay} seconds...")
-                    time.sleep(retry_delay)
-                    retry_delay *= 2
-        except requests.exceptions.RequestException as e:
-            print(f"Request exception: {e}")
-            if attempt < max_retries - 1:
-                print(f"Retrying in {retry_delay} seconds...")
-                time.sleep(retry_delay)
-                retry_delay *= 2
-    print(f"No Pexels images found for query: {query} after all attempts")
-    return None
 def search_google_images(query):
-    """Search for images on Google Images (for news-related queries)"""
     try:
         search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
         headers = {"User-Agent": USER_AGENT}
         response = requests.get(search_url, headers=headers, timeout=10)
         soup = BeautifulSoup(response.text, "html.parser")
-        # Look for image elements or JSON data containing image URLs
         img_tags = soup.find_all("img")
-        # Filter out small images (icons, etc.)
-        image_urls = []
-        for img in img_tags:
-            src = img.get("src", "")
-            if src.startswith("http") and "gstatic" not in src:
-                image_urls.append(src)
-        if image_urls:
-            return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
-        else:
-            print(f"No Google Images found for query: {query}")
-            return None
-    except Exception as e:
-        print(f"Error in Google Images search: {e}")
         return None
 def download_image(image_url, filename):
-    """Download an image from a URL to a local file with enhanced error handling."""
     try:
         headers = {"User-Agent": USER_AGENT}
-        print(f"Downloading image from: {image_url} to {filename}")
         response = requests.get(image_url, headers=headers, stream=True, timeout=15)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
-        print(f"Image downloaded successfully to: {filename}")
-        # Validate the image
-        try:
-            img = Image.open(filename)
-            img.verify()  # Verify it's an actual image
-            # If it passes verification, reopen and convert to RGB if needed
-            img = Image.open(filename)
-            if img.mode != 'RGB':
-                img = img.convert('RGB')
-                img.save(filename)
-            print(f"Image validated and processed: {filename}")
-            return filename
-        except Exception as e_validate:
-            print(f"Downloaded file is not a valid image: {e_validate}")
-            if os.path.exists(filename):
-                os.remove(filename)
-            return None
-    except requests.exceptions.RequestException as e_download:
-        print(f"Image download error: {e_download}")
-        if os.path.exists(filename):
-            os.remove(filename)
-        return None
-    except Exception as e_general:
-        print(f"General error during image processing: {e_general}")
         if os.path.exists(filename):
             os.remove(filename)
         return None
 def download_video(video_url, filename):
-    """Download a video from a URL to a local file."""
     try:
         response = requests.get(video_url, stream=True, timeout=30)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
-        print(f"Video downloaded successfully to: {filename}")
         return filename
-    except Exception as e:
-        print(f"Video download error: {e}")
         if os.path.exists(filename):
             os.remove(filename)
         return None
-def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
-    """
-    Generate a visual asset by first searching for a video or using a specific search strategy.
-    For news-related queries, use Google Images.
-    Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
-    """
-    # Make prompt URL-safe and a valid filename
     safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
-    # For news-related queries, use Google Images
     if "news" in prompt.lower():
-        print(f"News-related query detected: {prompt}. Using Google Images...")
         image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
         image_url = search_google_images(prompt)
-        if image_url:
-            downloaded_image = download_image(image_url, image_file)
-            if downloaded_image:
-                print(f"News image saved to {downloaded_image}")
-                return {"path": downloaded_image, "asset_type": "image"}
-        else:
-            print(f"Google Images search failed for prompt: {prompt}")
-    # Try video first (with reduced frequency for better media mix)
-    if random.random() < 0.25:  # 25% chance of using a video
         video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
         video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
-        if video_url:
-            downloaded_video = download_video(video_url, video_file)
-            if downloaded_video:
-                print(f"Video asset saved to {downloaded_video}")
-                return {"path": downloaded_video, "asset_type": "video"}
-        else:
-            print(f"Pexels video search failed for prompt: {prompt}")
-    # Fallback or primary choice for images
     image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
     image_url = search_pexels_images(prompt, PEXELS_API_KEY)
-    if image_url:
-        downloaded_image = download_image(image_url, image_file)
-        if downloaded_image:
-            print(f"Image asset saved to {downloaded_image}")
-            return {"path": downloaded_image, "asset_type": "image"}
-        else:
-            print(f"Pexels image download failed for prompt: {prompt}")
-    # Last resort: try to find a stock image for common terms
     fallback_terms = ["nature", "people", "landscape", "technology", "business"]
     for term in fallback_terms:
-        print(f"Trying fallback image search with term: {term}")
         fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
         fallback_url = search_pexels_images(term, PEXELS_API_KEY)
-        if fallback_url:
-            downloaded_fallback = download_image(fallback_url, fallback_file)
-            if downloaded_fallback:
-                print(f"Fallback image saved to {downloaded_fallback}")
-                return {"path": downloaded_fallback, "asset_type": "image"}
-            else:
-                print(f"Fallback image download failed for term: {term}")
-        else:
-            print(f"Fallback image search failed for term: {term}")
-    print(f"Failed to generate visual asset for prompt: {prompt}")
     return None
-# ---------------- TTS Function Using Kokoro ---------------- #
-def generate_tts(text, voice):
-    """
-    Generate TTS audio using Kokoro and save to a WAV file.
-    Uses the global Kokoro pipeline.
-    Falls back to gTTS if Kokoro fails.
-    """
-    # Create a safe filename
-    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
-    file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
-    # If file already exists, reuse it
-    if os.path.exists(file_path):
-        print(f"Using cached TTS for text '{text[:10]}...'")
-        return file_path
-    try:
-        # Map voice 'en' to kokoro's American English voice.
-        kokoro_voice = 'af_heart' if voice == 'en' else voice
-        generator = pipeline(text, voice=kokoro_voice, speed=0.8, split_pattern=r'\n+')
-        audio_segments = []
-        for i, (gs, ps, audio) in enumerate(generator):
-            audio_segments.append(audio)
-        if len(audio_segments) > 1:
-            full_audio = np.concatenate(audio_segments)
-        else:
-            full_audio = audio_segments[0]
-        sf.write(file_path, full_audio, 24000)  # Save as WAV at 24000 Hz
-        print(f"TTS audio saved to {file_path} (Kokoro)")
-        return file_path
-    except Exception as e:
-        print(f"Error generating TTS with Kokoro: {e}")
-        # Fallback to gTTS if Kokoro fails
-        try:
-            print("Falling back to gTTS...")
-            tts = gTTS(text=text, lang='en')
-            mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
-            tts.save(mp3_path)
-            # Convert mp3 to wav using pydub
-            audio = AudioSegment.from_mp3(mp3_path)
-            audio.export(file_path, format="wav")
-            if os.path.exists(mp3_path):
-                os.remove(mp3_path)  # Clean up the temporary mp3
-            print(f"Fallback TTS saved to {file_path} (gTTS)")
-            return file_path
-        except Exception as fallback_error:
-            print(f"Fallback TTS with gTTS also failed: {fallback_error}")
-            return None
-def generate_silent_audio(duration, sample_rate=24000):
-    """
-    Generate a silent WAV audio file lasting 'duration' seconds.
-    """
-    import numpy as np
-    import soundfile as sf
-    num_samples = int(duration * sample_rate)
-    silence = np.zeros(num_samples, dtype=np.float32)
-    silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
-    sf.write(silent_path, silence, sample_rate)
-    print(f"Silent audio generated: {silent_path}")
-    return silent_path
 def generate_tts(text, voice):
-    """
-    Generate TTS audio using Kokoro, and if that fails, try gTTS.
-    If both fail, generate silent audio as a fallback.
-    """
-    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
-    file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
     if os.path.exists(file_path):
-        print(f"Using cached TTS for text '{text[:10]}...'")
         return file_path
     try:
         kokoro_voice = 'af_heart' if voice == 'en' else voice
         generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
-        audio_segments = []
-        for i, (gs, ps, audio) in enumerate(generator):
-            audio_segments.append(audio)
         full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
         sf.write(file_path, full_audio, 24000)
-        print(f"TTS audio saved to {file_path} (Kokoro)")
         return file_path
-    except Exception as e:
-        print(f"Error with Kokoro TTS: {e}")
         try:
-            print("Falling back to gTTS...")
-            from gtts import gTTS
             tts = gTTS(text=text, lang='en')
             mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
             tts.save(mp3_path)
             audio = AudioSegment.from_mp3(mp3_path)
             audio.export(file_path, format="wav")
             os.remove(mp3_path)
-            print(f"Fallback TTS saved to {file_path} (gTTS)")
             return file_path
-        except Exception as fallback_error:
-            print(f"Both TTS methods failed: {fallback_error}")
-            # Generate silent audio as fallback
-            return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
 def apply_kenburns_effect(clip, target_resolution, effect_type=None):
-    """
-    Apply a smooth Ken Burns effect with a single, clean movement pattern.
-    """
-    # Unpack target resolution
     target_w, target_h = target_resolution
-    # First ensure the image fills the target resolution by resizing it properly
-    # Calculate the aspect ratio of the original clip
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
-    # Resize to fill the entire frame
-    if clip_aspect > target_aspect:  # Image is wider than the target frame
         new_height = target_h
         new_width = int(new_height * clip_aspect)
-    else:  # Image is taller than the target frame
         new_width = target_w
         new_height = int(new_width / clip_aspect)
-    # Resize the clip to ensure it fills the target resolution
     clip = clip.resize(newsize=(new_width, new_height))
-    # Now apply the base_scale for Ken Burns effect
     base_scale = 1.15
     new_width = int(new_width * base_scale)
     new_height = int(new_height * base_scale)
     clip = clip.resize(newsize=(new_width, new_height))
-    # Rest of your function stays the same...
-    # Calculate maximum offsets for panning
     max_offset_x = new_width - target_w
     max_offset_y = new_height - target_h
-    # Define available effects
     available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
-    # Randomly select an effect if not specified
-    if effect_type is None or effect_type == "random":
-        effect_type = random.choice(available_effects)
-    # Set effect parameters
     if effect_type == "zoom-in":
-        start_zoom = 0.9
-        end_zoom = 1.1
-        start_center = (new_width / 2, new_height / 2)
-        end_center = start_center
     elif effect_type == "zoom-out":
-        start_zoom = 1.1
-        end_zoom = 0.9
-        start_center = (new_width / 2, new_height / 2)
-        end_center = start_center
     elif effect_type == "pan-left":
-        start_zoom = 1.0
-        end_zoom = 1.0
         start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
         end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
     elif effect_type == "pan-right":
-        start_zoom = 1.0
-        end_zoom = 1.0
         start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
         end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
     elif effect_type == "up-left":
-        start_zoom = 1.0
-        end_zoom = 1.0
         start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
         end_center = (target_w / 2, target_h / 2)
     else:
         raise ValueError(f"Unsupported effect_type: {effect_type}")
-    # Define the transformation function for each frame
     def transform_frame(get_frame, t):
         frame = get_frame(t)
-        # Smooth interpolation using cosine easing
-        ratio = t / clip.duration if clip.duration > 0 else 0
-        ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)  # Ease in/out
-        # Calculate current zoom and crop size
         current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
-        crop_w = int(target_w / current_zoom)
-        crop_h = int(target_h / current_zoom)
-        # Calculate current center with floating-point precision
         current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
         current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
-        # Clamp center to keep the crop within image bounds
-        min_center_x = crop_w / 2
-        max_center_x = new_width - crop_w / 2
-        min_center_y = crop_h / 2
-        max_center_y = new_height - crop_h / 2
         current_center_x = max(min_center_x, min(current_center_x, max_center_x))
         current_center_y = max(min_center_y, min(current_center_y, max_center_y))
-        # Crop with subpixel accuracy and resize
         cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
-        resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
-        return resized_frame
-    # Apply the transformation to the clip
     return clip.fl(transform_frame)
-# Define target resolution (e.g., 1920x1080 for Full HD)
 def resize_to_fill(clip, target_resolution):
-    """
-    Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
-    Args:
-        clip: MoviePy VideoClip or ImageClip object.
-        target_resolution: Tuple of (width, height) to resize to.
-    Returns:
-        Resized and cropped clip.
-    """
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
     if clip_aspect > target_aspect:
-        # Clip is wider than target; resize to target height and crop width
         clip = clip.resize(height=target_h)
         crop_amount = (clip.w - target_w) / 2
         clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
     else:
-        # Clip is taller than target; resize to target width and crop height
         clip = clip.resize(width=target_w)
         crop_amount = (clip.h - target_h) / 2
         clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
     return clip
-# Function to find any MP3 file in the directory tree
-def find_mp3_files():
-    """
-    Search for any MP3 files in the current directory and subdirectories.
-    Returns the path to the first MP3 file found or None if none is found.
-    """
-    mp3_files = []
-    # Walk through all directories starting from current directory
-    for root, dirs, files in os.walk('.'):
-        for file in files:
-            if file.endswith('.mp3'):
-                mp3_path = os.path.join(root, file)
-                mp3_files.append(mp3_path)
-                print(f"Found MP3 file: {mp3_path}")
-    if mp3_files:
-        # Return the first MP3 file found
-        return mp3_files[0]
-    return None
-# Update the add_background_music function to use the first found MP3
 def add_background_music(final_video, bg_music_volume=0.08):
-    """Add background music to the final video using any MP3 file found in directories."""
-    try:
-        # Find MP3 files
-        bg_music_path = find_mp3_files()
-        if bg_music_path and os.path.exists(bg_music_path):
-            print(f"Adding background music from: {bg_music_path}")
-            # Load the background music
-            bg_music = AudioFileClip(bg_music_path)
-            # Loop the music if shorter than the video
-            if bg_music.duration < final_video.duration:
-                loops_needed = math.ceil(final_video.duration / bg_music.duration)
-                bg_segments = [bg_music] * loops_needed
-                bg_music = concatenate_audioclips(bg_segments)
-            # Trim if longer than the video
-            bg_music = bg_music.subclip(0, final_video.duration)
-            # Set volume to 8%
-            bg_music = bg_music.volumex(bg_music_volume)
-            # Mix the background music with the existing audio
-            video_audio = final_video.audio
-            mixed_audio = CompositeAudioClip([video_audio, bg_music])
-            # Set the mixed audio to the final video
-            final_video = final_video.set_audio(mixed_audio)
-            print("Background music added successfully")
-        else:
-            print("No MP3 files found, skipping background music")
-        return final_video
-    except Exception as e:
-        print(f"Error adding background music: {e}")
-        print("Continuing without background music")
-        return final_video
-# Update the subtitle positioning in the create_clip function
-# Find the section in create_clip that handles subtitles, and modify the positioning:
 def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
-    """Create a video clip with synchronized subtitles and properly timed narration."""
     try:
-        print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
         if not os.path.exists(media_path) or not os.path.exists(tts_path):
-            print("Missing media or TTS file")
             return None
-        # Load and process audio
         audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
-        audio_duration = audio_clip.duration
-        target_duration = audio_duration + 0.2
-        # Process visual asset
         if asset_type == "video":
             clip = VideoFileClip(media_path)
             clip = resize_to_fill(clip, TARGET_RESOLUTION)
-            if clip.duration < target_duration:
-                clip = clip.loop(duration=target_duration)
-            else:
-                clip = clip.subclip(0, target_duration)
         elif asset_type == "image":
             img = Image.open(media_path)
             if img.mode != 'RGB':
@@ -896,190 +368,64 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
                     img.convert('RGB').save(temp.name)
                     media_path = temp.name
                 img.close()
             clip = ImageClip(media_path).set_duration(target_duration)
             clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
             clip = clip.fadein(0.3).fadeout(0.3)
         else:
             return None
-        # Add subtitles with shorter chunks (4-5 words per line) and position at 2/3 of screen height
         if narration_text and CAPTION_COLOR != "transparent":
-            try:
-                # Create SRT-style subtitles
-                words = narration_text.split()
-                chunks = []
-                current_chunk = []
-                # Create chunks of 4-5 words for better readability
-                for word in words:
-                    current_chunk.append(word)
-                    if len(current_chunk) >= 5:  # Maximum 5 words per chunk
-                        chunks.append(' '.join(current_chunk))
-                        current_chunk = []
-                # Add the last chunk if it exists
-                if current_chunk:
-                    chunks.append(' '.join(current_chunk))
-                # Calculate timing for each chunk based on audio duration
-                chunk_duration = audio_duration / len(chunks)
-                subtitle_clips = []
-                # Position subtitles at 1/3 of the screen height instead of bottom
-                subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
-                for i, chunk_text in enumerate(chunks):
-                    start_time = i * chunk_duration
-                    end_time = (i + 1) * chunk_duration
-                    # Create text clip for this chunk
-                    txt_clip = TextClip(
-                        chunk_text,
-                        fontsize=45,
-                        font='Arial-Bold',
-                        color=CAPTION_COLOR,
-                        bg_color='rgba(0, 0, 0, 0.25)',
-                        method='caption',
-                        align='center',
-                        stroke_width=2,  # Light shadow
-                        stroke_color=CAPTION_COLOR,  # Must be set if stroke is used
-                        size=(TARGET_RESOLUTION[0] * 0.8, None)  # Width for better readability
-                    ).set_start(start_time).set_end(end_time)
-                    txt_clip = txt_clip.set_position(('center', subtitle_y_position))
-                    subtitle_clips.append(txt_clip)
-                # Combine all subtitle chunks with the main clip
-                clip = CompositeVideoClip([clip] + subtitle_clips)
-            except Exception as sub_error:
-                print(f"Subtitle error: {sub_error}")
-                # Fallback to a simpler method if the chunk approach fails
                 txt_clip = TextClip(
-                    narration_text,
-                    fontsize=28,
                     color=CAPTION_COLOR,
                     align='center',
-                    size=(TARGET_RESOLUTION[0] * 0.7, None)
-                ).set_position(('center', int(TARGET_RESOLUTION[2] / 3))).set_duration(clip.duration)
-                clip = CompositeVideoClip([clip, txt_clip])
         clip = clip.set_audio(audio_clip)
-        print(f"Clip created: {clip.duration:.1f}s")
         return clip
-    except Exception as e:
-        print(f"Error in create_clip: {str(e)}")
         return None
-def fix_imagemagick_policy():
-    """Comprehensive fix for ImageMagick security policies"""
-    try:
-        print("Attempting to fix ImageMagick security policies...")
-        # Find all possible policy.xml locations
-        policy_paths = [
-            "/etc/ImageMagick-6/policy.xml",
-            "/etc/ImageMagick-7/policy.xml",
-            "/etc/ImageMagick/policy.xml",
-            "/usr/local/etc/ImageMagick-7/policy.xml"
-        ]
-        found_policy = None
-        for path in policy_paths:
-            if os.path.exists(path):
-                found_policy = path
-                break
-        if not found_policy:
-            print("No policy.xml found. Using alternative subtitle method.")
-            return False
-        print(f"Modifying policy file at {found_policy}")
-        # Create backup
-        backup_path = f"{found_policy}.bak"
-        os.system(f"sudo cp {found_policy} {backup_path}")
-        # Apply security policy modifications
-        os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
-        os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
-        os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
-        print("ImageMagick policies updated successfully.")
-        return True
-    except Exception as e:
-        print(f"Error fixing policies: {e}")
-        return False
-# ---------------- Main Function ---------------- #
-import os
-import shutil
-import webbrowser
-def main_fixed():
-    # Fix ImageMagick policy first
-    fix_success = fix_imagemagick_policy()
-    if not fix_success:
-        print("Will use alternative methods if needed")
-    # Create temp folder if not exists
-    if not os.path.exists(TEMP_FOLDER):
-        os.makedirs(TEMP_FOLDER)
-    user_input = input("Enter your video concept: ")
-    print("Generating script from Gemini API...")
-    script = generate_script(user_input)
     if not script:
-        print("Failed to generate script.")
-        return
-    print("Generated Script:\n", script)
     elements = parse_script(script)
     if not elements:
-        print("Failed to parse script into elements.")
-        return
-    print(f"Parsed {len(elements)//2} script segments.")
-    # Pair media elements with their corresponding TTS elements
-    paired_elements = []
-    for i in range(0, len(elements), 2):
-        if i+1 < len(elements):
-            paired_elements.append((elements[i], elements[i+1]))
     if not paired_elements:
-        print("No valid script segments found. Exiting.")
-        return
-    # Process each paired segment to create video clips
     clips = []
     for idx, (media_elem, tts_elem) in enumerate(paired_elements):
-        print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
-        # Generate the visual asset (video or image) based on the prompt
         media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
         if not media_asset:
-            print(f"Skipping segment {idx+1} due to missing media asset.")
             continue
-        # Generate the TTS audio for the narration
         tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
         if not tts_path:
-            print(f"Skipping segment {idx+1} due to TTS generation failure.")
             continue
-        # Create the video clip using the media asset and the TTS audio
         clip = create_clip(
             media_path=media_asset['path'],
             asset_type=media_asset['asset_type'],
@@ -1091,142 +437,23 @@ def main_fixed():
         )
         if clip:
             clips.append(clip)
-        else:
-            print(f"Clip creation failed for segment {idx+1}.")
     if not clips:
-        print("No clips were successfully created. Exiting.")
-        return
-    # Concatenate all the clips into one final video
-    print("\nConcatenating clips...")
     final_video = concatenate_videoclips(clips, method="compose")
-    # Add background music before exporting
     final_video = add_background_music(final_video, bg_music_volume=0.08)
-    # Write the final video to a file with the veryfast preset
-    print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME} with veryfast rendering preset...")
     final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
-    print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
-    # Auto-open the video file (acts as an auto-download/view feature)
-    try:
-        webbrowser.open(OUTPUT_VIDEO_FILENAME)
-        print("Video is being opened for download/viewing.")
-    except Exception as e:
-        print("Failed to auto-download/open the video:", e)
-    # Clean up temporary files so they don't interfere with the next run
-    print("Cleaning up temporary files...")
     shutil.rmtree(TEMP_FOLDER)
-    print("Temporary files removed.")
-if __name__ == "__main__":
-    main_fixed()
-# --------------- GRADIO INTERFACE --------------- #
-def run_pipeline(user_input, resolution, clip_amount, caption_enabled):
-    global TARGET_RESOLUTION, CAPTION_COLOR
-    # Set parameters based on user input
-    TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
-    CAPTION_COLOR = "white" if caption_enabled else "transparent"
-    CLIPS_AMMOUNT = int(clip_amount)
-    try:
-        # Generate script
-        script = generate_script(user_input)
-        if not script:
-            raise gr.Error("Failed to generate script")
-        # Parse elements
-        elements = parse_script(script)
-        if not elements:
-            raise gr.Error("Failed to parse script")
-        # Process elements
-        paired_elements = []
-        for i in range(0, len(elements), 2):
-            if i+1 < len(elements):
-                paired_elements.append((elements[i], elements[i+1]))
-        # Limit clips
-        paired_elements = paired_elements[:CLIPS_AMMOUNT]
-        # Create clips
-        clips = []
-        for idx, (media_elem, tts_elem) in enumerate(paired_elements):
-            media_asset = generate_media(media_elem['prompt'])
-            if not media_asset:
-                continue
-            tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
-            if not tts_path:
-                continue
-            clip = create_clip(
-                media_path=media_asset['path'],
-                asset_type=media_asset['asset_type'],
-                tts_path=tts_path,
-                narration_text=tts_elem['text'],
-                segment_index=idx
-            )
-            if clip:
-                clips.append(clip)
-        # Create final video
-        if not clips:
-            raise gr.Error("Failed to create any video clips")
-        final_video = concatenate_videoclips(clips, method="compose")
-        final_video = add_background_music(final_video)
-        # Save output
-        output_path = "final_video.mp4"
-        final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast', threads=4)
-        # Cleanup
-        for clip in clips:
-            clip.close()
-        final_video.close()
-        return output_path
-    except Exception as e:
-        raise gr.Error(f"Error: {str(e)}")
-    finally:
-        # Clean temporary files
-        for f in os.listdir(TEMP_FOLDER):
-            os.remove(os.path.join(TEMP_FOLDER, f))
-# --------------- GRADIO UI --------------- #
-with gr.Blocks(title="AI Documentary Maker", theme=gr.themes.Default()) as demo:
-    gr.Markdown("""
-    # 🎥 AI Documentary Maker
-    Create viral documentary-style videos with AI!
-    """)
     with gr.Row():
-        with gr.Column(scale=1):
-            user_input = gr.Textbox(label="Documentary Topic",
-                                   placeholder="Enter your topic or script...")
-            resolution = gr.Dropdown(["Full (1920x1080)", "Short (1080x1920)"],
-                                    label="Video Format", value="Short (1080x1920)")
-            clip_amount = gr.Slider(1, 10, value=5, step=1,
-                                   label="Number of Clips")
-            caption_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
-            generate_btn = gr.Button("Generate Video", variant="primary")
-        with gr.Column(scale=2):
-            output_video = gr.Video(label="Generated Video", format="mp4")
-    generate_btn.click(
-        fn=run_pipeline,
-        inputs=[user_input, resolution, clip_amount, caption_enabled],
-        outputs=output_video
-    )
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import shutil
 import requests
 import re
 import random
+import time
 import math
 from moviepy.editor import (
     VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
+    CompositeVideoClip, TextClip, CompositeAudioClip
 )
 import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
+from gtts import gTTS
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 import pysrt
+import soundfile as sf
+from kokoro import KPipeline
+import cv2
+import gradio as gr
+# Initialize Kokoro TTS pipeline
+pipeline = KPipeline(lang_code='a')  # 'a' is from original code; adjust if needed
+# Set ImageMagick binary
+mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
+# Global Configuration
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 TEMP_FOLDER = "temp_video_processing"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+# Helper Functions
 def generate_script(user_input):
     headers = {
         'Authorization': f'Bearer {OPENROUTER_API_KEY}',
         'HTTP-Referer': 'https://your-domain.com',
         'X-Title': 'AI Documentary Maker'
     }
     prompt = f"""Short Documentary Script GeneratorInstructions:
 If I say "use this," just output the script exactly as I gave it.
 If I only give topics, generate a script based on them.
 If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
 And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 Formatting Rules:
 Title in Square Brackets:
 Each section starts with a one-word title inside [ ] (max two words if necessary).
 This title will be used as a search term for Pexels footage.
 Casual & Funny Narration:
 Each section has 5-10 words of narration.
 Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
 No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 If a term is too specific, make it more general for Pexels search.
 Scene-Specific Writing:
 Each section describes only what should be shown in the video.
 Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 No extra text, just the script.
 Example Output:
 [North Korea]
 Top 5 unknown facts about North Korea.
 [Invisibility]
 North Korea’s internet speed is so fast… it doesn’t exist.
 [Leadership]
 Kim Jong-un once won an election with 100% votes… against himself.
 [Magic]
 North Korea discovered time travel. That’s why their news is always from the past.
 [Warning]
 Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 [Freedom]
 North Korean citizens can do anything… as long as it's government-approved.
 Now here is the Topic/scrip: {user_input}
 """
     data = {
         'model': OPENROUTER_MODEL,
         'messages': [{'role': 'user', 'content': prompt}],
         'temperature': 0.4,
         'max_tokens': 5000
     }
     try:
         response = requests.post(
             'https://openrouter.ai/api/v1/chat/completions',
             json=data,
             timeout=30
         )
         if response.status_code == 200:
             response_data = response.json()
             if 'choices' in response_data and len(response_data['choices']) > 0:
                 return response_data['choices'][0]['message']['content']
+        return None
+    except Exception:
         return None
 def parse_script(script_text):
     sections = {}
     current_title = None
     current_text = ""
     try:
         for line in script_text.splitlines():
             line = line.strip()
             if line.startswith("[") and "]" in line:
                 bracket_start = line.find("[")
                 bracket_end = line.find("]", bracket_start)
                 if bracket_start != -1 and bracket_end != -1:
                     if current_title is not None:
                         sections[current_title] = current_text.strip()
                     current_title = line[bracket_start+1:bracket_end]
+                    current_text = line[bracket_end+1:].strip()
             elif current_title:
                 current_text += line + " "
         if current_title:
             sections[current_title] = current_text.strip()
         elements = []
         for title, narration in sections.items():
+            if not title or not narration:
                 continue
             media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
             words = narration.split()
             duration = max(3, len(words) * 0.5)
             tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
             elements.append(media_element)
             elements.append(tts_element)
         return elements
+    except Exception:
         return []
 def search_pexels_videos(query, pexels_api_key):
     headers = {'Authorization': pexels_api_key}
     base_url = "https://api.pexels.com/videos/search"
+    num_pages = 3
     videos_per_page = 15
     all_videos = []
     for page in range(1, num_pages + 1):
+        try:
+            params = {"query": query, "per_page": videos_per_page, "page": page}
+            response = requests.get(base_url, headers=headers, params=params, timeout=10)
+            if response.status_code == 200:
+                data = response.json()
+                videos = data.get("videos", [])
+                for video in videos:
+                    video_files = video.get("video_files", [])
+                    for file in video_files:
+                        if file.get("quality") == "hd":
+                            all_videos.append(file.get("link"))
+                            break
+        except Exception:
+            continue
+    return random.choice(all_videos) if all_videos else None
 def search_pexels_images(query, pexels_api_key):
     headers = {'Authorization': pexels_api_key}
     url = "https://api.pexels.com/v1/search"
     params = {"query": query, "per_page": 5, "orientation": "landscape"}
+    try:
+        response = requests.get(url, headers=headers, params=params, timeout=10)
+        if response.status_code == 200:
+            data = response.json()
+            photos = data.get("photos", [])
+            if photos:
+                photo = random.choice(photos[:min(5, len(photos))])
+                return photo.get("src", {}).get("original")
+        return None
+    except Exception:
+        return None
 def search_google_images(query):
     try:
         search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
         headers = {"User-Agent": USER_AGENT}
         response = requests.get(search_url, headers=headers, timeout=10)
         soup = BeautifulSoup(response.text, "html.parser")
         img_tags = soup.find_all("img")
+        image_urls = [img.get("src", "") for img in img_tags if img.get("src", "").startswith("http") and "gstatic" not in img.get("src", "")]
+        return random.choice(image_urls[:5]) if image_urls else None
+    except Exception:
         return None
 def download_image(image_url, filename):
     try:
         headers = {"User-Agent": USER_AGENT}
         response = requests.get(image_url, headers=headers, stream=True, timeout=15)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
+        img = Image.open(filename)
+        img.verify()
+        img = Image.open(filename)
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+            img.save(filename)
+        return filename
+    except Exception:
         if os.path.exists(filename):
             os.remove(filename)
         return None
 def download_video(video_url, filename):
     try:
         response = requests.get(video_url, stream=True, timeout=30)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
         return filename
+    except Exception:
         if os.path.exists(filename):
             os.remove(filename)
         return None
+def generate_media(prompt, current_index=0, total_segments=1):
     safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
     if "news" in prompt.lower():
         image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
         image_url = search_google_images(prompt)
+        if image_url and download_image(image_url, image_file):
+            return {"path": image_file, "asset_type": "image"}
+    if random.random() < 0.25:
         video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
         video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
+        if video_url and download_video(video_url, video_file):
+            return {"path": video_file, "asset_type": "video"}
     image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
     image_url = search_pexels_images(prompt, PEXELS_API_KEY)
+    if image_url and download_image(image_url, image_file):
+        return {"path": image_file, "asset_type": "image"}
     fallback_terms = ["nature", "people", "landscape", "technology", "business"]
     for term in fallback_terms:
         fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
         fallback_url = search_pexels_images(term, PEXELS_API_KEY)
+        if fallback_url and download_image(fallback_url, fallback_file):
+            return {"path": fallback_file, "asset_type": "image"}
     return None
 def generate_tts(text, voice):
+    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '')
+    file_path = os.path.join(TEMP_FOLDER, f"tts{safe_text}.wav")
     if os.path.exists(file_path):
         return file_path
     try:
         kokoro_voice = 'af_heart' if voice == 'en' else voice
         generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
+        audio_segments = [audio for _, _, audio in generator]
         full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
         sf.write(file_path, full_audio, 24000)
         return file_path
+    except Exception:
         try:
             tts = gTTS(text=text, lang='en')
             mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
             tts.save(mp3_path)
             audio = AudioSegment.from_mp3(mp3_path)
             audio.export(file_path, format="wav")
             os.remove(mp3_path)
             return file_path
+        except Exception:
+            num_samples = int(max(3, len(text.split()) * 0.5) * 24000)
+            silence = np.zeros(num_samples, dtype=np.float32)
+            sf.write(file_path, silence, 24000)
+            return file_path
 def apply_kenburns_effect(clip, target_resolution, effect_type=None):
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
+    if clip_aspect > target_aspect:
         new_height = target_h
         new_width = int(new_height * clip_aspect)
+    else:
         new_width = target_w
         new_height = int(new_width / clip_aspect)
     clip = clip.resize(newsize=(new_width, new_height))
     base_scale = 1.15
     new_width = int(new_width * base_scale)
     new_height = int(new_height * base_scale)
     clip = clip.resize(newsize=(new_width, new_height))
     max_offset_x = new_width - target_w
     max_offset_y = new_height - target_h
     available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
+    effect_type = random.choice(available_effects) if not effect_type or effect_type == "random" else effect_type
     if effect_type == "zoom-in":
+        start_zoom, end_zoom = 0.9, 1.1
+        start_center = end_center = (new_width / 2, new_height / 2)
     elif effect_type == "zoom-out":
+        start_zoom, end_zoom = 1.1, 0.9
+        start_center = end_center = (new_width / 2, new_height / 2)
     elif effect_type == "pan-left":
+        start_zoom = end_zoom = 1.0
         start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
         end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
     elif effect_type == "pan-right":
+        start_zoom = end_zoom = 1.0
         start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
         end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
     elif effect_type == "up-left":
+        start_zoom = end_zoom = 1.0
         start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
         end_center = (target_w / 2, target_h / 2)
     else:
         raise ValueError(f"Unsupported effect_type: {effect_type}")
     def transform_frame(get_frame, t):
         frame = get_frame(t)
+        ratio = 0.5 - 0.5 * math.cos(math.pi * t / clip.duration) if clip.duration > 0 else 0
         current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
+        crop_w, crop_h = int(target_w / current_zoom), int(target_h / current_zoom)
         current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
         current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
+        min_center_x, max_center_x = crop_w / 2, new_width - crop_w / 2
+        min_center_y, max_center_y = crop_h / 2, new_height - crop_h / 2
         current_center_x = max(min_center_x, min(current_center_x, max_center_x))
         current_center_y = max(min_center_y, min(current_center_y, max_center_y))
         cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
+        return cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
     return clip.fl(transform_frame)
 def resize_to_fill(clip, target_resolution):
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
     if clip_aspect > target_aspect:
         clip = clip.resize(height=target_h)
         crop_amount = (clip.w - target_w) / 2
         clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
     else:
         clip = clip.resize(width=target_w)
         crop_amount = (clip.h - target_h) / 2
         clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
     return clip
 def add_background_music(final_video, bg_music_volume=0.08):
+    bg_music_path = "background_music.mp3"
+    if os.path.exists(bg_music_path):
+        bg_music = AudioFileClip(bg_music_path)
+        if bg_music.duration < final_video.duration:
+            loops_needed = math.ceil(final_video.duration / bg_music.duration)
+            bg_segments = [bg_music] * loops_needed
+            bg_music = concatenate_audioclips(bg_segments)
+        bg_music = bg_music.subclip(0, final_video.duration)
+        bg_music = bg_music.volumex(bg_music_volume)
+        video_audio = final_video.audio
+        mixed_audio = CompositeAudioClip([video_audio, bg_music])
+        final_video = final_video.set_audio(mixed_audio)
+    return final_video
 def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
     try:
         if not os.path.exists(media_path) or not os.path.exists(tts_path):
             return None
         audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
+        target_duration = audio_clip.duration + 0.2
         if asset_type == "video":
             clip = VideoFileClip(media_path)
             clip = resize_to_fill(clip, TARGET_RESOLUTION)
+            clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
         elif asset_type == "image":
             img = Image.open(media_path)
             if img.mode != 'RGB':
                     img.convert('RGB').save(temp.name)
                     media_path = temp.name
                 img.close()
             clip = ImageClip(media_path).set_duration(target_duration)
             clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
             clip = clip.fadein(0.3).fadeout(0.3)
         else:
             return None
+        subtitle_clips = []
         if narration_text and CAPTION_COLOR != "transparent":
+            words = narration_text.split()
+            chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
+            chunk_duration = audio_clip.duration / len(chunks)
+            subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
+            for i, chunk_text in enumerate(chunks):
+                start_time = i * chunk_duration
+                end_time = (i + 1) * chunk_duration
                 txt_clip = TextClip(
+                    chunk_text,
+                    fontsize=45,
+                    font='Arial-Bold',
                     color=CAPTION_COLOR,
+                    bg_color='rgba(0, 0, 0, 0.25)',
+                    method='caption',
                     align='center',
+                    stroke_width=2,
+                    stroke_color=CAPTION_COLOR,
+                    size=(TARGET_RESOLUTION[0] * 0.8, None)
+                ).set_start(start_time).set_end(end_time).set_position(('center', subtitle_y_position))
+                subtitle_clips.append(txt_clip)
+            clip = CompositeVideoClip([clip] + subtitle_clips)
         clip = clip.set_audio(audio_clip)
         return clip
+    except Exception:
         return None
+# Main Gradio Function
+def generate_video(video_concept, resolution, caption_option):
+    global TARGET_RESOLUTION, CAPTION_COLOR
+    TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
+    CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
+    if os.path.exists(TEMP_FOLDER):
+        shutil.rmtree(TEMP_FOLDER)
+    os.makedirs(TEMP_FOLDER)
+    script = generate_script(video_concept)
     if not script:
+        return "Failed to generate script."
     elements = parse_script(script)
     if not elements:
+        return "Failed to parse script."
+    paired_elements = [(elements[i], elements[i+1]) for i in range(0, len(elements), 2) if i+1 < len(elements)]
     if not paired_elements:
+        return "No valid script segments found."
     clips = []
     for idx, (media_elem, tts_elem) in enumerate(paired_elements):
         media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
         if not media_asset:
             continue
         tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
         if not tts_path:
             continue
         clip = create_clip(
             media_path=media_asset['path'],
             asset_type=media_asset['asset_type'],
         )
         if clip:
             clips.append(clip)
     if not clips:
+        return "No clips were successfully created."
     final_video = concatenate_videoclips(clips, method="compose")
     final_video = add_background_music(final_video, bg_music_volume=0.08)
     final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
     shutil.rmtree(TEMP_FOLDER)
+    return OUTPUT_VIDEO_FILENAME
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# AI Documentary Video Generator")
     with gr.Row():
+        video_concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept here...")
+        resolution = gr.Dropdown(["Full", "Short"], label="Resolution", value="Full")
+        caption_option = gr.Dropdown(["Yes", "No"], label="Caption", value="Yes")
+    generate_btn = gr.Button("Generate Video")
+    output_video = gr.Video(label="Generated Video")
+    generate_btn.click(generate_video, inputs=[video_concept, resolution, caption_option], outputs=output_video)
+demo.launch()