Spaces:

testdeep123
/

video

Build error

App Files Files Community

testdeep123 commited on Apr 11

Commit

0a38b03

verified ·

1 Parent(s): e7589a4

Update app.py

Browse files

Files changed (1) hide show

app.py +669 -362

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import os
 import shutil
@@ -18,6 +19,7 @@ from moviepy.editor import (
     CompositeVideoClip, TextClip, CompositeAudioClip, ColorClip
 )
 import moviepy.video.fx.all as vfx
 from pydub import AudioSegment
 from PIL import Image, ImageDraw, ImageFont
 from bs4 import BeautifulSoup
@@ -26,30 +28,33 @@ from gtts import gTTS
 import logging
 # --- Configuration ---
-# IMPORTANT: Use Hugging Face Secrets for API keys in a real Space
-PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna' # Replace with your Pexels API Key
-OPENROUTER_API_KEY = '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' # Replace with your OpenRouter API Key
 OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" # Using a known free model
 # OPENROUTER_MODEL = "mistralai/mistral-small-latest" # Or a small paid one if needed
-TEMP_FOLDER_BASE = "/tmp/ai_doc_generator"
 OUTPUT_VIDEO_FILENAME = "final_documentary.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-DEFAULT_FONT = "DejaVuSans.ttf" # A common font available in many Linux distros, adjust if needed
 BGM_FILE = "background_music.mp3" # Optional: Place a royalty-free mp3 here
 BGM_VOLUME = 0.1 # Background music volume multiplier (0.0 to 1.0)
 # --- Logging Setup ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# --- Kokoro TTS Initialization (Optional) ---
-KOKORO_ENABLED = False
-pipeline = None
 # try:
 #     from kokoro import KPipeline
-#     # Check for GPU availability if desired, default to CPU
-#     device = 'cuda' if torch.cuda.is_available() else 'cpu'
-#     pipeline = KPipeline(lang_code='a', device=device) # 'a' for multilingual? Check Kokoro docs
 #     KOKORO_ENABLED = True
 #     logging.info("✅ Kokoro TTS Initialized.")
 # except ImportError:
@@ -59,11 +64,15 @@ pipeline = None
 #     logging.warning(f"⚠️ Error initializing Kokoro TTS: {e}. Using gTTS fallback.")
 #     pipeline = None
-# --- Helper Functions ---
 def generate_script(topic, api_key, model):
     """Generates a documentary script using OpenRouter API."""
     logging.info(f"Generating script for topic: {topic}")
     prompt = f"""Create a short documentary script about '{topic}'.
 The script should be structured as a sequence of scenes and narrations.
 Each scene description should be enclosed in [SCENE: description] tags. The description should be concise and suggest visuals (e.g., 'drone shot of mountains', 'close up of a historical artifact', 'archival footage of protests').
@@ -80,7 +89,9 @@ Generate the script now:
 """
     headers = {
         "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
     }
     data = {
         "model": model,
@@ -88,23 +99,41 @@ Generate the script now:
         "max_tokens": 1000, # Adjust as needed
     }
     try:
-        response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=60)
         response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
         result = response.json()
         script_content = result['choices'][0]['message']['content']
         logging.info("✅ Script generated successfully.")
         # Basic validation
         if "[SCENE:" not in script_content or "[NARRATION:" not in script_content:
              logging.error("❌ Script generation failed: Output format incorrect.")
              logging.debug(f"Raw script output: {script_content}")
-             return None
         return script_content
     except requests.exceptions.RequestException as e:
         logging.error(f"❌ Script generation failed: API request error: {e}")
         return None
-    except (KeyError, IndexError) as e:
-         logging.error(f"❌ Script generation failed: Unexpected API response format: {e}")
-         logging.debug(f"Raw API response: {response.text}")
          return None
     except Exception as e:
         logging.error(f"❌ Script generation failed: An unexpected error occurred: {e}")
@@ -115,21 +144,43 @@ def parse_script(script_text):
     """Parses the generated script into scene prompts and narration text."""
     logging.info("Parsing script...")
     if not script_text:
         return None
-    # Regex to find scene and narration blocks
     pattern = re.compile(r"\[SCENE:\s*(.*?)\s*\]\s*\[NARRATION:\s*(.*?)\s*\]", re.DOTALL | re.IGNORECASE)
     matches = pattern.findall(script_text)
     if not matches:
-        logging.error("❌ Script parsing failed: No valid [SCENE]/[NARRATION] pairs found.")
-        logging.debug(f"Script content for parsing: {script_text}")
-        return None
     elements = []
     for scene_desc, narration_text in matches:
         scene_desc = scene_desc.strip()
-        narration_text = narration_text.strip().replace('\n', ' ') # Clean up narration
         if scene_desc and narration_text:
             elements.append({"type": "scene", "prompt": scene_desc})
             elements.append({"type": "narration", "text": narration_text})
@@ -140,94 +191,121 @@ def parse_script(script_text):
     logging.info(f"✅ Script parsed into {len(elements)//2} scene/narration pairs.")
     return elements
-def search_pexels(query, api_key, media_type="videos", per_page=5):
     """Searches Pexels API for videos or photos."""
-    if not api_key or api_key == "YOUR_PEXELS_API_KEY_HERE":
         logging.warning("⚠️ Pexels API key not configured. Skipping search.")
         return []
-    logging.info(f"Searching Pexels {media_type} for: {query}")
     base_url = f"https://api.pexels.com/{media_type}/search"
     headers = {"Authorization": api_key}
-    params = {"query": query, "per_page": per_page, "orientation": "landscape"} # Default landscape
     try:
-        response = requests.get(base_url, headers=headers, params=params, timeout=20)
         response.raise_for_status()
         data = response.json()
         results = []
         media_key = 'videos' if media_type == 'videos' else 'photos'
         link_key = 'video_files' if media_type == 'videos' else 'src'
-        for item in data.get(media_key, []):
             if media_type == 'videos':
-                # Find HD or highest quality video link
-                video_links = sorted(item.get(link_key, []), key=lambda x: x.get('width', 0), reverse=True)
                 if video_links:
-                    # Prefer HD (1920x1080) or similar if available
-                    hd_link = next((link['link'] for link in video_links if link.get('quality') == 'hd' and link.get('width') == 1920), None)
                     if hd_link:
-                        results.append({'url': hd_link, 'type': 'video'})
                     elif video_links[0].get('link'): # Fallback to highest available
-                         results.append({'url': video_links[0]['link'], 'type': 'video'})
             else: # photos
-                # Get large or original image link
                 img_links = item.get(link_key, {})
-                if img_links.get('large2x'):
-                    results.append({'url': img_links['large2x'], 'type': 'image'})
-                elif img_links.get('large'):
-                    results.append({'url': img_links['large'], 'type': 'image'})
-                elif img_links.get('original'):
-                    results.append({'url': img_links['original'], 'type': 'image'})
-        logging.info(f"✅ Found {len(results)} Pexels {media_type} results.")
         return results
     except requests.exceptions.RequestException as e:
-        logging.error(f"❌ Pexels API request error: {e}")
         return []
     except Exception as e:
-        logging.error(f"❌ Error processing Pexels response: {e}")
         traceback.print_exc()
         return []
 def download_media(url, save_dir):
     """Downloads media (video or image) from a URL."""
-    logging.info(f"Downloading media from: {url}")
     try:
-        response = requests.get(url, stream=True, timeout=60, headers={'User-Agent': USER_AGENT})
         response.raise_for_status()
-        # Try to get filename from URL or Content-Disposition
-        filename = url.split('/')[-1].split('?')[0]
-        if not filename or '.' not in filename: # Basic check for extension
-             # Look for content-disposition header
-            cd = response.headers.get('content-disposition')
-            if cd:
-                fname = re.findall('filename="?(.+)"?', cd)
-                if fname:
-                    filename = fname[0]
-            # If still no good filename, generate one based on type
-            if not filename or '.' not in filename:
-                content_type = response.headers.get('content-type', '').lower()
-                ext = '.jpg' # default
-                if 'video' in content_type:
-                    ext = '.mp4'
-                elif 'jpeg' in content_type or 'jpg' in content_type:
-                    ext = '.jpg'
-                elif 'png' in content_type:
-                    ext = '.png'
-                filename = f"media_{int(time.time())}{ext}"
         save_path = os.path.join(save_dir, filename)
         with open(save_path, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
-        logging.info(f"✅ Media downloaded successfully to: {save_path}")
         return save_path
     except requests.exceptions.RequestException as e:
         logging.error(f"❌ Media download failed: Request error: {e}")
         return None
@@ -237,159 +315,247 @@ def download_media(url, save_dir):
         return None
 def generate_tts(text, lang, save_dir, segment_index):
-    """Generates TTS audio using Kokoro (if enabled) or gTTS."""
     filename = f"narration_{segment_index}.mp3"
     filepath = os.path.join(save_dir, filename)
-    logging.info(f"Generating TTS for segment {segment_index}: '{text[:50]}...'")
     audio_duration = 0
     success = False
-    # Try Kokoro first if enabled and initialized
-    # if KOKORO_ENABLED and pipeline:
-    #     try:
-    #         logging.info("Attempting TTS generation with Kokoro...")
-    #         # Assuming Kokoro outputs a numpy array and sample rate
-    #         wav, sr = pipeline.tts(text=text)
-    #         sf.write(filepath, wav, sr)
-    #         audio_duration = len(wav) / sr
-    #         logging.info(f"✅ Kokoro TTS generated successfully ({audio_duration:.2f}s).")
-    #         success = True
-    #     except Exception as e:
-    #         logging.warning(f"⚠️ Kokoro TTS failed: {e}. Falling back to gTTS.")
-    # Fallback to gTTS
-    if not success:
         try:
-            logging.info("Attempting TTS generation with gTTS...")
-            tts = gTTS(text=text, lang=lang)
-            tts.save(filepath)
-            # Get duration using soundfile
-            try:
-                audio_info = sf.info(filepath)
-                audio_duration = audio_info.duration
-            except Exception as e_dur:
-                 logging.warning(f"⚠️ Could not get duration using soundfile ({e_dur}), trying pydub...")
-                 try:
-                     audio_seg = AudioSegment.from_mp3(filepath)
-                     audio_duration = len(audio_seg) / 1000.0
-                 except Exception as e_dur_pd:
-                     logging.error(f"❌ Failed to get duration with pydub as well ({e_dur_pd}). Setting duration to estimated.")
-                     # Estimate duration based on words (very rough)
-                     words_per_minute = 150
-                     num_words = len(text.split())
-                     audio_duration = (num_words / words_per_minute) * 60
-                     if audio_duration < 2: audio_duration = 2 # Minimum duration
-            logging.info(f"✅ gTTS generated successfully ({audio_duration:.2f}s).")
-            success = True
-        except Exception as e:
-            logging.error(f"❌ gTTS failed: {e}")
-            traceback.print_exc()
-            success = False
     return filepath if success else None, audio_duration if success else 0
 def resize_media_to_fill(clip, target_size):
     """Resizes a MoviePy clip (video or image) to fill the target size, cropping if necessary."""
-    # target_size = (width, height)
     target_w, target_h = target_size
     target_aspect = target_w / target_h
     clip_w, clip_h = clip.size
     clip_aspect = clip_w / clip_h
     if abs(clip_aspect - target_aspect) < 0.01: # Aspect ratios are close enough
-        return clip.resize(width=target_w) # Or height=target_h
     if clip_aspect > target_aspect:
-        # Clip is wider than target, resize to target height and crop width
         resized_clip = clip.resize(height=target_h)
-        crop_width = resized_clip.w
-        crop_x_center = crop_width / 2
-        crop_x1 = int(crop_x_center - target_w / 2)
-        crop_x2 = int(crop_x_center + target_w / 2)
-        # Ensure crop coordinates are within bounds
-        crop_x1 = max(0, crop_x1)
-        crop_x2 = min(resized_clip.w, crop_x2)
-        # Adjust if calculated width is slightly off due to rounding
         if crop_x2 - crop_x1 != target_w:
-             crop_x2 = crop_x1 + target_w # Prioritize target width
-        return resized_clip.fx(vfx.crop, x1=crop_x1, y1=0, x2=crop_x2, y2=target_h)
     else:
-        # Clip is taller than target, resize to target width and crop height
         resized_clip = clip.resize(width=target_w)
-        crop_height = resized_clip.h
-        crop_y_center = crop_height / 2
-        crop_y1 = int(crop_y_center - target_h / 2)
-        crop_y2 = int(crop_y_center + target_h / 2)
-         # Ensure crop coordinates are within bounds
-        crop_y1 = max(0, crop_y1)
-        crop_y2 = min(resized_clip.h, crop_y2)
-        # Adjust if calculated height is slightly off
         if crop_y2 - crop_y1 != target_h:
             crop_y2 = crop_y1 + target_h
-        return resized_clip.fx(vfx.crop, x1=0, y1=crop_y1, x2=target_w, y2=crop_y2)
-def apply_ken_burns(image_clip, duration, target_size, zoom_factor=1.1):
-    """Applies a subtle zoom-out Ken Burns effect to an ImageClip."""
-    # Ensure the input clip already matches the target size
     if image_clip.size != target_size:
-         logging.warning("Applying Ken Burns to an image not matching target size, resizing first.")
          image_clip = resize_media_to_fill(image_clip, target_size)
     # Define the resize function based on time `t`
     def resize_func(t):
-        # Zoom out: start at zoom_factor, end at 1.0
-        current_zoom = 1 + (zoom_factor - 1) * (1 - t / duration)
         return current_zoom
-    # Apply the resize effect over time
-    # Need to center the zoom effect
     zoomed_clip = image_clip.fx(vfx.resize, resize_func)
-    # Crop back to target size, centered
-    final_clip = zoomed_clip.fx(vfx.crop, x_center=zoomed_clip.w/2, y_center=zoomed_clip.h/2, width=target_size[0], height=target_size[1])
     return final_clip.set_duration(duration)
-def create_caption_clip(text, duration, clip_size, font_size=None, font_path=DEFAULT_FONT, color='white', stroke_color='black', stroke_width=1.5, position=('center', 'bottom'), margin=20):
-    """Creates a MoviePy TextClip for captions with basic wrapping."""
     width, height = clip_size
-    max_text_width = width * 0.8 # Allow text to occupy 80% of the width
     if font_size is None:
-        font_size = max(20, int(height / 25)) # Dynamic font size based on height
-    # Basic word wrapping
     try:
-        # Attempt to load the font to estimate size
-        pil_font = ImageFont.truetype(font_path, font_size)
     except IOError:
-        logging.warning(f"Font '{font_path}' not found. Using MoviePy default.")
-        pil_font = None # Use MoviePy default if specified font fails
     words = text.split()
     lines = []
     current_line = ""
-    line_width_func = lambda txt: pil_font.getbbox(txt)[2] if pil_font else len(txt) * font_size * 0.6 # Estimate width
     for word in words:
         test_line = f"{current_line} {word}".strip()
-        # Estimate width (this is approximate)
-        if line_width_func(test_line) <= max_text_width:
             current_line = test_line
         else:
             if current_line: # Add the previous line if it wasn't empty
                  lines.append(current_line)
             current_line = word # Start new line with the current word
             # Handle case where a single word is too long
-            if line_width_func(current_line) > max_text_width:
-                 logging.warning(f"Word '{current_line}' is too long for caption width.")
-                 # Could implement character-level wrapping here if needed
     if current_line: # Add the last line
         lines.append(current_line)
@@ -402,126 +568,207 @@ def create_caption_clip(text, duration, clip_size, font_size=None, font_path=DEF
             wrapped_text,
             fontsize=font_size,
             color=color,
-            font=font_path, # MoviePy might handle font lookup differently
             stroke_color=stroke_color,
             stroke_width=stroke_width,
-            method='caption', # Use caption method for better wrapping if available
             size=(int(max_text_width), None), # Constrain width for wrapping
             align='center'
         )
     except Exception as e:
-         logging.error(f"Error creating TextClip (maybe font issue?): {e}. Using simpler TextClip.")
-         # Fallback to simpler TextClip without stroke/specific font if needed
-         caption = TextClip(wrapped_text, fontsize=font_size, color=color, method='caption', size=(int(max_text_width), None), align='center')
     # Set position with margin
-    pos_x, pos_y = position
-    final_pos = list(caption.pos(pos_x, pos_y)) # Get numeric position
-    if 'bottom' in pos_y:
-        final_pos[1] -= margin
-    elif 'top' in pos_y:
-        final_pos[1] += margin
-    if 'right' in pos_x:
-        final_pos[0] -= margin
-    elif 'left' in pos_x:
-        final_pos[0] += margin
-    caption = caption.set_position(tuple(final_pos)).set_duration(duration)
-    return caption
 def create_clip(media_path, media_type, audio_path, audio_duration, target_size, add_captions, narration_text, segment_index):
     """Creates a single video clip from media, audio, and optional captions."""
-    logging.info(f"Creating clip {segment_index} - Type: {media_type}, Duration: {audio_duration:.2f}s")
     try:
-        # Load Audio
         audio_clip = AudioFileClip(audio_path)
-        # Verify audio duration (sometimes file reading is slightly off)
-        if abs(audio_clip.duration - audio_duration) > 0.1:
-             logging.warning(f"Audio file duration ({audio_clip.duration:.2f}s) differs from expected ({audio_duration:.2f}s). Using file duration.")
              audio_duration = audio_clip.duration
-        # Ensure minimum duration to avoid issues
-        if audio_duration < 0.1:
-            logging.warning(f"Audio duration is very short ({audio_duration:.2f}s). Setting minimum 0.5s.")
             audio_duration = 0.5
         audio_clip = audio_clip.subclip(0, audio_duration)
-        # Load Media (Video or Image)
         if media_type == 'video':
             try:
-                video_clip = VideoFileClip(media_path, target_resolution=(target_size[1], target_size[0])) # height, width
                 # Trim or loop video to match audio duration
-                if video_clip.duration >= audio_duration:
-                    video_clip = video_clip.subclip(0, audio_duration)
                 else:
-                    # Loop the video if it's shorter than the audio
-                    logging.warning(f"Video duration ({video_clip.duration:.2f}s) shorter than audio ({audio_duration:.2f}s). Looping video.")
-                    # video_clip = video_clip.fx(vfx.loop, duration=audio_duration) # Loop is simpler
-                    # Alternatively freeze last frame:
-                    num_loops = math.ceil(audio_duration / video_clip.duration)
-                    video_clip = concatenate_videoclips([video_clip] * num_loops).subclip(0, audio_duration)
-                main_clip = resize_media_to_fill(video_clip, target_size)
             except Exception as e:
-                logging.error(f"❌ Failed to load or process video file '{media_path}': {e}. Creating black clip.")
                 main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
         elif media_type == 'image':
             try:
-                # Load image, resize to fill target, apply Ken Burns
                 img_clip_base = ImageClip(media_path)
-                img_clip_resized = resize_media_to_fill(img_clip_base, target_size)
-                main_clip = apply_ken_burns(img_clip_resized, audio_duration, target_size)
             except Exception as e:
-                logging.error(f"❌ Failed to load or process image file '{media_path}': {e}. Creating black clip.")
                 main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
-        else:
-             logging.error(f"❌ Unknown media type: {media_type}. Creating black clip.")
              main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
-        # Set duration definitively and add audio
-        main_clip = main_clip.set_duration(audio_duration).set_audio(audio_clip)
-        # Add Captions if enabled
         if add_captions and narration_text:
-            caption_clip = create_caption_clip(narration_text, audio_duration, target_size)
-            final_clip = CompositeVideoClip([main_clip, caption_clip], size=target_size)
         else:
-            final_clip = main_clip
-        logging.info(f"✅ Clip {segment_index} created successfully.")
-        return final_clip
     except Exception as e:
-        logging.error(f"❌ Failed to create clip {segment_index}: {e}")
         traceback.print_exc()
         return None
 def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
     """Adds background music to the final video clip."""
     if not os.path.exists(music_file):
         logging.warning(f"Background music file '{music_file}' not found. Skipping BGM.")
         return video_clip
     logging.info(f"Adding background music from {music_file}")
     try:
         bgm_clip = AudioFileClip(music_file)
         video_duration = video_clip.duration
-        # Loop or trim BGM to match video duration
         if bgm_clip.duration < video_duration:
-            # Loop BGM - Use audio_loop fx
-             bgm_clip = bgm_clip.fx(afx.audio_loop, duration=video_duration)
-            # Alternative manual loop:
-            # num_loops = math.ceil(video_duration / bgm_clip.duration)
-            # bgm_clip = concatenate_audioclips([bgm_clip] * num_loops).subclip(0, video_duration)
         else:
             bgm_clip = bgm_clip.subclip(0, video_duration)
@@ -529,125 +776,164 @@ def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
         bgm_clip = bgm_clip.volumex(volume)
         # Combine with existing audio
-        original_audio = video_clip.audio
         if original_audio:
              combined_audio = CompositeAudioClip([original_audio, bgm_clip])
         else:
-             # Handle case where video might not have narration audio (e.g., if all TTS failed)
              logging.warning("Video clip has no primary audio. Adding BGM only.")
              combined_audio = bgm_clip
-        video_clip = video_clip.set_audio(combined_audio)
         logging.info("✅ Background music added.")
-        return video_clip
     except Exception as e:
         logging.error(f"❌ Failed to add background music: {e}")
         traceback.print_exc()
-        return video_clip # Return original clip on failure
 # --- Main Gradio Function ---
-def generate_video_process(topic, resolution_choice, add_captions_option, add_bgm_option, progress=gr.Progress()):
     """The main function called by Gradio to generate the video."""
     start_time = time.time()
-    status_log = []
     temp_dir = None
     final_video_path = None
-    # Create a unique temporary directory for this run
     try:
         temp_dir = tempfile.mkdtemp(prefix=TEMP_FOLDER_BASE + "_")
-        status_log.append(f"Temporary directory created: {temp_dir}")
         logging.info(f"Using temp directory: {temp_dir}")
         # --- 1. Generate Script ---
         progress(0.1, desc="Generating script...")
-        status_log.append("🔄 Generating script...")
         script = generate_script(topic, OPENROUTER_API_KEY, OPENROUTER_MODEL)
         if not script:
-            status_log.append("❌ Script generation failed. Check API key and model.")
             return "\n".join(status_log), None
         status_log.append("✅ Script generated.")
-        # status_log.append(f"Raw Script:\n{script[:500]}...") # Optional: Log snippet
         # --- 2. Parse Script ---
         progress(0.2, desc="Parsing script...")
-        status_log.append("🔄 Parsing script...")
         elements = parse_script(script)
-        if not elements:
-            status_log.append("❌ Script parsing failed. Check script format.")
             return "\n".join(status_log), None
         num_segments = len(elements) // 2
         status_log.append(f"✅ Script parsed into {num_segments} segments.")
-        # --- 3. Process Segments (Media Search, Download, TTS, Clip Creation) ---
-        clips = []
-        target_size = (1920, 1080) if resolution_choice == "Full HD (16:9)" else (1080, 1920) # W, H
-        status_log.append(f"Target resolution: {target_size[0]}x{target_size[1]}")
         for i in range(0, len(elements), 2):
             segment_index = i // 2
-            current_progress = 0.2 + (0.6 * (segment_index / num_segments))
-            progress(current_progress, desc=f"Processing segment {segment_index + 1}/{num_segments}")
             scene_elem = elements[i]
             narration_elem = elements[i+1]
-            scene_prompt = scene_elem['prompt']
-            narration_text = narration_elem['text']
             status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments} ---")
-            status_log.append(f"Scene Prompt: {scene_prompt}")
-            status_log.append(f"Narration: {narration_text[:100]}...")
             # 3a. Generate TTS
-            status_log.append("🔄 Generating narration audio...")
             tts_path, tts_duration = generate_tts(narration_text, 'en', temp_dir, segment_index)
-            if not tts_path or tts_duration <= 0.1: # Check for valid duration
-                status_log.append(f"⚠️ TTS generation failed for segment {segment_index + 1}. Skipping segment.")
                 logging.warning(f"Skipping segment {segment_index+1} due to TTS failure.")
                 continue
-            status_log.append(f"✅ Narration audio generated ({tts_duration:.2f}s): {os.path.basename(tts_path)}")
-            # 3b. Search for Media
-            status_log.append("🔄 Searching for media...")
             media_path = None
             media_type = None
-            # Try Pexels Video first
-            video_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="videos")
             if video_results:
                 selected_media = random.choice(video_results)
-                status_log.append(f"📥 Downloading Pexels video: {selected_media['url']}")
                 media_path = download_media(selected_media['url'], temp_dir)
                 if media_path:
                     media_type = 'video'
                 else:
                     status_log.append("⚠️ Video download failed.")
-            # Try Pexels Image if video fails or not found
             if not media_path:
-                status_log.append("🔄 No suitable video found/downloaded. Searching Pexels images...")
-                image_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="photos")
                 if image_results:
                     selected_media = random.choice(image_results)
-                    status_log.append(f"📥 Downloading Pexels image: {selected_media['url']}")
                     media_path = download_media(selected_media['url'], temp_dir)
                     if media_path:
                         media_type = 'image'
                     else:
                          status_log.append("⚠️ Image download failed.")
-            # Fallback: If no media found after searches
             if not media_path:
-                status_log.append(f"⚠️ No suitable media found for '{scene_prompt}'. Using black screen.")
-                media_type = 'color' # Special type for ColorClip
-                media_path = None # No path needed for color clip
             # 3c. Create Clip
-            status_log.append(f"🔄 Creating video clip for segment {segment_index + 1}...")
             clip = create_clip(
-                media_path=media_path if media_type != 'color' else None, # Pass None if color
                 media_type=media_type,
                 audio_path=tts_path,
                 audio_duration=tts_duration,
@@ -659,110 +945,141 @@ def generate_video_process(topic, resolution_choice, add_captions_option, add_bg
             if clip:
                 clips.append(clip)
-                status_log.append(f"✅ Clip {segment_index + 1} created.")
             else:
-                status_log.append(f"❌ Failed to create clip for segment {segment_index + 1}. Skipping.")
                 logging.error(f"Failed to create clip {segment_index+1}, skipping.")
         if not clips:
             status_log.append("\n❌ No valid clips were created. Cannot generate video.")
             return "\n".join(status_log), None
         # --- 4. Concatenate Clips ---
         progress(0.85, desc="Combining video clips...")
         status_log.append("\n🔄 Combining video clips...")
         try:
-            final_clip = concatenate_videoclips(clips, method="compose")
             status_log.append("✅ Clips combined successfully.")
         except Exception as e:
             status_log.append(f"❌ Error concatenating clips: {e}")
             logging.error(f"Concatenation failed: {e}")
             traceback.print_exc()
-            # Attempt cleanup even on error
-            for clip in clips:
-                clip.close()
-            return "\n".join(status_log), None
         # --- 5. Add Background Music (Optional) ---
-        if add_bgm_option:
             progress(0.9, desc="Adding background music...")
-            status_log.append("🔄 Adding background music...")
             final_clip = add_background_music(final_clip, music_file=BGM_FILE, volume=BGM_VOLUME)
         # --- 6. Write Final Video ---
-        progress(0.95, desc="Writing final video file...")
-        status_log.append("🔄 Writing final video file (this may take time)...")
-        output_path = os.path.join(temp_dir, OUTPUT_VIDEO_FILENAME)
-        try:
-            # Use 'medium' preset for better quality/size balance than 'ultrafast'
-            # Use 'libx264' for wide compatibility. Adjust audio_codec if needed.
-            # threads=4 can help speed up encoding on multi-core systems
-            final_clip.write_videofile(
-                output_path,
-                codec='libx264',
-                audio_codec='aac',
-                fps=24,
-                preset='medium',
-                threads=4,
-                logger='bar' # Use None for less verbose output, or 'bar' for progress
-            )
-            status_log.append(f"✅ Final video saved to: {output_path}")
-            final_video_path = output_path # Set the path to be returned
-        except Exception as e:
-            status_log.append(f"❌ Error writing final video file: {e}")
-            logging.error(f"Final video write failed: {e}")
-            traceback.print_exc()
-            final_video_path = None # Ensure no path is returned on failure
-        finally:
-             # Ensure MoviePy resources are released
-             final_clip.close()
-             for clip in clips:
-                 try:
-                     clip.close()
-                     if clip.audio: clip.audio.close()
-                 except:
-                     pass # Ignore errors during cleanup
     except Exception as e:
-        status_log.append(f"\n❌ An unexpected error occurred during video generation: {e}")
-        logging.error("An unexpected error occurred in generate_video_process:")
         logging.error(traceback.format_exc())
         final_video_path = None # Ensure failure state
     finally:
         # --- 7. Cleanup ---
         if temp_dir and os.path.exists(temp_dir):
             try:
-                shutil.rmtree(temp_dir)
-                status_log.append(f"🧹 Temporary directory cleaned up: {temp_dir}")
-                logging.info(f"Cleaned up temp directory: {temp_dir}")
-            except Exception as e:
-                status_log.append(f"⚠️ Error cleaning up temporary directory {temp_dir}: {e}")
-                logging.warning(f"Cleanup failed for {temp_dir}: {e}")
     end_time = time.time()
     total_time = end_time - start_time
-    status_log.append(f"\n--- Generation Complete ---")
-    status_log.append(f"Total time: {total_time:.2f} seconds")
     progress(1.0, desc="Finished!")
     return "\n".join(status_log), final_video_path
 # --- Gradio Interface Definition ---
-with gr.Blocks() as iface:
-    gr.Markdown("# 🤖 AI Documentary Generator")
-    gr.Markdown("Enter a topic, choose your settings, and let the AI create a short video documentary!")
     with gr.Row():
         with gr.Column(scale=1):
             topic_input = gr.Textbox(
                 label="Video Topic",
-                placeholder="e.g., The History of Coffee, The Secrets of the Deep Ocean, The Rise of Quantum Computing",
                 lines=2
             )
             resolution_input = gr.Radio(
@@ -770,14 +1087,19 @@ with gr.Blocks() as iface:
                 choices=["Short (9:16)", "Full HD (16:9)"],
                 value="Short (9:16)"
             )
-            captions_input = gr.Checkbox(label="Add Captions", value=True)
-            bgm_input = gr.Checkbox(label=f"Add Background Music ({os.path.basename(BGM_FILE) if os.path.exists(BGM_FILE) else 'No BGM file found'})", value=True, interactive=os.path.exists(BGM_FILE))
-            generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column(scale=2):
-            status_output = gr.Textbox(label="Status Log", lines=15, interactive=False)
-            video_output = gr.Video(label="Generated Video")
     generate_button.click(
         fn=generate_video_process,
@@ -787,28 +1109,21 @@ with gr.Blocks() as iface:
     gr.Examples(
         examples=[
-            ["The lifecycle of a butterfly", "Short (9:16)", True, True],
-            ["Ancient Roman Engineering", "Full HD (16:9)", True, False],
-            ["The impact of social media", "Short (9:16)", False, True],
         ],
-        inputs=[topic_input, resolution_input, captions_input, bgm_input]
     )
 # --- Launch the App ---
 if __name__ == "__main__":
-    # Optional: Check for API keys on startup
-    if not PEXELS_API_KEY or PEXELS_API_KEY == "YOUR_PEXELS_API_KEY_HERE":
-        logging.warning("PEXELS_API_KEY is not set. Media search will be limited.")
-        print("WARNING: PEXELS_API_KEY is not set. Media search will be limited.")
-    if not OPENROUTER_API_KEY or OPENROUTER_API_KEY == "YOUR_OPENROUTER_API_KEY_HERE":
-        logging.warning("OPENROUTER_API_KEY is not set. Script generation will fail.")
-        print("WARNING: OPENROUTER_API_KEY is not set. Script generation will fail.")
-    # Optional: Add a placeholder BGM file if it doesn't exist
-    if not os.path.exists(BGM_FILE):
-         logging.warning(f"Background music file '{BGM_FILE}' not found. Creating a silent placeholder.")
          try:
-             # Create a short silent mp3 using pydub
              silent_segment = AudioSegment.silent(duration=1000) # 1 second silence
              silent_segment.export(BGM_FILE, format="mp3")
              logging.info(f"Created silent placeholder BGM file: {BGM_FILE}")
@@ -816,19 +1131,11 @@ if __name__ == "__main__":
              logging.error(f"Could not create placeholder BGM file: {e}")
-    # Fix ImageMagick policy (attempt) - May need sudo/root privileges not available in all environments
-    # def fix_imagemagick_policy():
-    #     policy_path = "/etc/ImageMagick-6/policy.xml" # Adjust path if needed
-    #     if os.path.exists(policy_path):
-    #         try:
-    #             # Use sed to modify the policy file (requires sed command)
-    #             os.system(f"sed -i 's/rights=\"none\" pattern=\"PS\"/rights=\"read|write\" pattern=\"PS\"/' {policy_path}")
-    #             os.system(f"sed -i 's/rights=\"none\" pattern=\"LABEL\"/rights=\"read|write\" pattern=\"LABEL\"/' {policy_path}")
-    #             os.system(f"sed -i 's/rights=\"none\" pattern=\"TEXT\"/rights=\"read|write\" pattern=\"TEXT\"/' {policy_path}") # Add TEXT pattern
-    #             logging.info(f"Attempted to update ImageMagick policy at {policy_path}")
-    #         except Exception as e:
-    #             logging.warning(f"Failed to automatically update ImageMagick policy: {e}. Manual adjustment might be needed if text rendering fails.")
-    # fix_imagemagick_policy()
-    iface.launch(debug=True, share=True) # Set share=True for public link if needed

+# -*- coding: utf-8 -*-
 import gradio as gr
 import os
 import shutil
     CompositeVideoClip, TextClip, CompositeAudioClip, ColorClip
 )
 import moviepy.video.fx.all as vfx
+import moviepy.audio.fx.all as afx # Import audio effects
 from pydub import AudioSegment
 from PIL import Image, ImageDraw, ImageFont
 from bs4 import BeautifulSoup
 import logging
 # --- Configuration ---
+# WARNING: Hardcoding keys is generally discouraged due to security risks.
+# Anyone who can see this code can use your keys.
+PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
+OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" # Using a known free model
 # OPENROUTER_MODEL = "mistralai/mistral-small-latest" # Or a small paid one if needed
+TEMP_FOLDER_BASE = "/tmp/ai_doc_generator" # Use /tmp inside container
 OUTPUT_VIDEO_FILENAME = "final_documentary.mp4"
 USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+# Try a very common font likely available in the base python image or installed via apt
+# If text fails, consider installing specific font packages in Dockerfile (e.g., fonts-freefont-ttf)
+DEFAULT_FONT = "DejaVuSans.ttf" # Or try "FreeSans.ttf" if fonts-freefont-ttf is installed
 BGM_FILE = "background_music.mp3" # Optional: Place a royalty-free mp3 here
 BGM_VOLUME = 0.1 # Background music volume multiplier (0.0 to 1.0)
 # --- Logging Setup ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# --- Kokoro TTS Initialization (Optional - Keep commented unless installed) ---
+# KOKORO_ENABLED = False
+# pipeline = None
 # try:
 #     from kokoro import KPipeline
+#     device = 'cpu' # Default to CPU
+#     pipeline = KPipeline(lang_code='a', device=device)
 #     KOKORO_ENABLED = True
 #     logging.info("✅ Kokoro TTS Initialized.")
 # except ImportError:
 #     logging.warning(f"⚠️ Error initializing Kokoro TTS: {e}. Using gTTS fallback.")
 #     pipeline = None
+# --- Helper Functions --- (Keep all helper functions from the previous version)
 def generate_script(topic, api_key, model):
     """Generates a documentary script using OpenRouter API."""
     logging.info(f"Generating script for topic: {topic}")
+    # Check if API key is placeholder or empty
+    if not api_key or "sk-or-v1-" not in api_key:
+         logging.error("❌ Script generation failed: OpenRouter API Key is missing or invalid.")
+         return None
     prompt = f"""Create a short documentary script about '{topic}'.
 The script should be structured as a sequence of scenes and narrations.
 Each scene description should be enclosed in [SCENE: description] tags. The description should be concise and suggest visuals (e.g., 'drone shot of mountains', 'close up of a historical artifact', 'archival footage of protests').
 """
     headers = {
         "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "http://localhost", # Some APIs require Referer
+        "X-Title": "AI Documentary Generator" # Optional custom title
     }
     data = {
         "model": model,
         "max_tokens": 1000, # Adjust as needed
     }
     try:
+        response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=90) # Increased timeout
+        logging.debug(f"OpenRouter Request: Headers={headers}, Data={data}")
+        logging.debug(f"OpenRouter Response Status: {response.status_code}")
+        logging.debug(f"OpenRouter Response Body: {response.text[:500]}...") # Log beginning of response
         response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
         result = response.json()
+        if not result.get('choices') or not result['choices'][0].get('message') or not result['choices'][0]['message'].get('content'):
+            logging.error("❌ Script generation failed: Unexpected API response format (missing content).")
+            logging.debug(f"Full API response: {result}")
+            return None
         script_content = result['choices'][0]['message']['content']
         logging.info("✅ Script generated successfully.")
         # Basic validation
         if "[SCENE:" not in script_content or "[NARRATION:" not in script_content:
              logging.error("❌ Script generation failed: Output format incorrect.")
              logging.debug(f"Raw script output: {script_content}")
+             # Return the raw content anyway, maybe parsing can salvage something
+             # return None
+             return script_content # Let parsing try
         return script_content
+    except requests.exceptions.Timeout:
+        logging.error("❌ Script generation failed: API request timed out.")
+        return None
+    except requests.exceptions.HTTPError as e:
+        logging.error(f"❌ Script generation failed: HTTP error: {e.response.status_code} - {e.response.text}")
+        return None
     except requests.exceptions.RequestException as e:
         logging.error(f"❌ Script generation failed: API request error: {e}")
         return None
+    except (KeyError, IndexError, TypeError) as e:
+         logging.error(f"❌ Script generation failed: Error processing API response: {e}")
+         logging.debug(f"Raw API response text: {response.text}")
          return None
     except Exception as e:
         logging.error(f"❌ Script generation failed: An unexpected error occurred: {e}")
     """Parses the generated script into scene prompts and narration text."""
     logging.info("Parsing script...")
     if not script_text:
+        logging.error("❌ Script parsing failed: Input script text is empty.")
         return None
+    # Regex to find scene and narration blocks, more tolerant to whitespace variations
     pattern = re.compile(r"\[SCENE:\s*(.*?)\s*\]\s*\[NARRATION:\s*(.*?)\s*\]", re.DOTALL | re.IGNORECASE)
     matches = pattern.findall(script_text)
     if not matches:
+        # Try a simpler split if the strict pattern fails, maybe format was slightly off
+        logging.warning("⚠️ Strict [SCENE]/[NARRATION] parsing failed. Attempting fallback split.")
+        elements_temp = re.split(r'\[(SCENE|NARRATION):\s*', script_text, flags=re.IGNORECASE)
+        if len(elements_temp) > 1:
+            elements_temp = [el.strip().rstrip(']') for el in elements_temp if el and el.strip() not in ['SCENE', 'NARRATION']]
+            # Try to pair them up
+            paired_elements = []
+            for i in range(0, len(elements_temp) - 1, 2):
+                 # Basic check if first looks like scene and second like narration
+                 if len(elements_temp[i]) < 100 and len(elements_temp[i+1]) > 10: # Heuristic
+                     paired_elements.append({"type": "scene", "prompt": elements_temp[i]})
+                     paired_elements.append({"type": "narration", "text": elements_temp[i+1].replace('\n', ' ')})
+            if paired_elements:
+                 logging.info(f"✅ Fallback parsing successful, found {len(paired_elements)//2} pairs.")
+                 return paired_elements
+            else:
+                 logging.error("❌ Fallback script parsing also failed.")
+                 logging.debug(f"Script content for parsing: {script_text}")
+                 return None
+        else:
+            logging.error("❌ Script parsing failed: No [SCENE]/[NARRATION] pairs found, and fallback split failed.")
+            logging.debug(f"Script content for parsing: {script_text}")
+            return None
     elements = []
     for scene_desc, narration_text in matches:
         scene_desc = scene_desc.strip()
+        narration_text = narration_text.strip().replace('\n', ' ').replace('"', "'") # Clean up narration, replace double quotes
         if scene_desc and narration_text:
             elements.append({"type": "scene", "prompt": scene_desc})
             elements.append({"type": "narration", "text": narration_text})
     logging.info(f"✅ Script parsed into {len(elements)//2} scene/narration pairs.")
     return elements
+def search_pexels(query, api_key, media_type="videos", per_page=5, orientation="any"):
     """Searches Pexels API for videos or photos."""
+    if not api_key or api_key == "YOUR_PEXELS_API_KEY_HERE": # Check actual key too
         logging.warning("⚠️ Pexels API key not configured. Skipping search.")
         return []
+    logging.info(f"Searching Pexels {media_type} for: '{query}' (Orientation: {orientation})")
     base_url = f"https://api.pexels.com/{media_type}/search"
     headers = {"Authorization": api_key}
+    params = {"query": query, "per_page": per_page}
+    if orientation != "any":
+        params["orientation"] = orientation # landscape or portrait
     try:
+        response = requests.get(base_url, headers=headers, params=params, timeout=30) # Increased timeout
         response.raise_for_status()
         data = response.json()
         results = []
         media_key = 'videos' if media_type == 'videos' else 'photos'
         link_key = 'video_files' if media_type == 'videos' else 'src'
+        items = data.get(media_key, [])
+        if not items:
+             logging.info(f"No Pexels {media_type} results found for '{query}'.")
+             return []
+        for item in items:
             if media_type == 'videos':
+                video_links = sorted([vf for vf in item.get(link_key, []) if vf.get('link')], key=lambda x: x.get('width', 0), reverse=True)
                 if video_links:
+                    # Prefer HD (1920 or 1280 width) or highest quality
+                    hd_link = next((link['link'] for link in video_links if link.get('width') in [1920, 1280]), None)
                     if hd_link:
+                        results.append({'url': hd_link, 'type': 'video', 'width': next(link['width'] for link in video_links if link['link'] == hd_link), 'height': next(link['height'] for link in video_links if link['link'] == hd_link)})
                     elif video_links[0].get('link'): # Fallback to highest available
+                         link_data = video_links[0]
+                         results.append({'url': link_data['link'], 'type': 'video', 'width': link_data.get('width'), 'height': link_data.get('height')})
             else: # photos
                 img_links = item.get(link_key, {})
+                # Prioritize larger sizes
+                chosen_url = img_links.get('large2x') or img_links.get('large') or img_links.get('original') or img_links.get('medium')
+                if chosen_url:
+                    results.append({'url': chosen_url, 'type': 'image', 'width': item.get('width'), 'height': item.get('height')})
+        logging.info(f"✅ Found {len(results)} Pexels {media_type} results for '{query}'.")
         return results
+    except requests.exceptions.Timeout:
+        logging.error(f"❌ Pexels API request timed out for '{query}'.")
+        return []
+    except requests.exceptions.HTTPError as e:
+        logging.error(f"❌ Pexels API HTTP error for '{query}': {e.response.status_code} - {e.response.text}")
+        return []
     except requests.exceptions.RequestException as e:
+        logging.error(f"❌ Pexels API request error for '{query}': {e}")
         return []
     except Exception as e:
+        logging.error(f"❌ Error processing Pexels response for '{query}': {e}")
         traceback.print_exc()
         return []
 def download_media(url, save_dir):
     """Downloads media (video or image) from a URL."""
+    logging.info(f"Downloading media from: {url[:100]}...") # Log truncated URL
     try:
+        response = requests.get(url, stream=True, timeout=120, headers={'User-Agent': USER_AGENT}) # Increased timeout
         response.raise_for_status()
+        # Try to get filename
+        filename = None
+        cd = response.headers.get('content-disposition')
+        if cd:
+            fname = re.findall('filename="?(.+)"?', cd)
+            if fname:
+                filename = fname[0]
+        if not filename:
+            # Basic filename from URL path
+            filename = url.split('/')[-1].split('?')[0]
+            # Clean filename and ensure extension
+            filename = re.sub(r'[^\w\.\-]', '_', filename) # Replace invalid chars
+            if '.' not in filename[-5:]: # Check last 5 chars for extension
+                 # Guess extension from content type
+                 content_type = response.headers.get('content-type', '').lower()
+                 ext = '.vid' # default video extension
+                 if 'jpeg' in content_type or 'jpg' in content_type: ext = '.jpg'
+                 elif 'png' in content_type: ext = '.png'
+                 elif 'mp4' in content_type: ext = '.mp4'
+                 elif 'video' in content_type: ext = '.mp4' # Guess mp4 for generic video
+                 elif 'image' in content_type: ext = '.jpg' # Guess jpg for generic image
+                 filename = f"media_{int(time.time())}{ext}"
+        # Ensure filename is not excessively long
+        if len(filename) > 100:
+            name, ext = os.path.splitext(filename)
+            filename = name[:95] + ext
         save_path = os.path.join(save_dir, filename)
+        logging.info(f"Saving media to: {save_path}")
         with open(save_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192*4): # Larger chunk size
                 f.write(chunk)
+        # Verify file size (basic check)
+        file_size = os.path.getsize(save_path)
+        if file_size < 1024: # Less than 1KB might indicate an issue
+             logging.warning(f"⚠️ Downloaded media file size is small ({file_size} bytes). Check file: {save_path}")
+        logging.info(f"✅ Media downloaded successfully ({file_size / 1024:.1f} KB).")
         return save_path
+    except requests.exceptions.Timeout:
+        logging.error(f"❌ Media download timed out: {url}")
+        return None
     except requests.exceptions.RequestException as e:
         logging.error(f"❌ Media download failed: Request error: {e}")
         return None
         return None
 def generate_tts(text, lang, save_dir, segment_index):
+    """Generates TTS audio using gTTS."""
     filename = f"narration_{segment_index}.mp3"
     filepath = os.path.join(save_dir, filename)
+    # Clean text for TTS - remove characters that might cause issues
+    text = re.sub(r'[\[\]\*#]', '', text) # Remove brackets, asterisks, hash
+    text = text.strip()
+    if not text:
+        logging.error(f"❌ TTS failed for segment {segment_index}: Text is empty after cleaning.")
+        return None, 0
+    logging.info(f"Generating TTS for segment {segment_index}: '{text[:60]}...'")
     audio_duration = 0
     success = False
+    try:
+        logging.info("Attempting TTS generation with gTTS...")
+        tts = gTTS(text=text, lang=lang, slow=False) # Use slow=False for normal speed
+        tts.save(filepath)
+        # Get duration using soundfile as primary method
         try:
+            audio_info = sf.info(filepath)
+            audio_duration = audio_info.duration
+            if audio_duration < 0.1: # Check for invalid duration from sf.info
+                 raise ValueError("Soundfile reported near-zero duration")
+        except Exception as e_dur_sf:
+             logging.warning(f"⚠️ Could not get accurate duration using soundfile ({e_dur_sf}). Trying pydub...")
+             try:
+                 # Ensure file is written before pydub tries to read
+                 time.sleep(0.1)
+                 audio_seg = AudioSegment.from_mp3(filepath)
+                 audio_duration = len(audio_seg) / 1000.0
+             except Exception as e_dur_pd:
+                 logging.error(f"❌ Failed to get duration with pydub as well ({e_dur_pd}). Estimating duration.")
+                 # Estimate duration based on words (rough fallback)
+                 words_per_minute = 140 # Adjusted estimate
+                 num_words = len(text.split())
+                 audio_duration = max(1.0, (num_words / words_per_minute) * 60) # Ensure at least 1 second
+        # Final duration sanity check
+        if audio_duration < 0.5:
+             logging.warning(f"⚠️ Calculated audio duration is very short ({audio_duration:.2f}s). Setting minimum to 1.0s.")
+             audio_duration = 1.0
+        logging.info(f"✅ gTTS generated successfully ({audio_duration:.2f}s).")
+        success = True
+    except gTTS.gTTSError as e_gtts:
+         logging.error(f"❌ gTTS API Error: {e_gtts}")
+         success = False
+    except Exception as e:
+        logging.error(f"❌ gTTS failed with unexpected error: {e}")
+        traceback.print_exc()
+        success = False
     return filepath if success else None, audio_duration if success else 0
 def resize_media_to_fill(clip, target_size):
     """Resizes a MoviePy clip (video or image) to fill the target size, cropping if necessary."""
     target_w, target_h = target_size
+    if target_w == 0 or target_h == 0:
+        logging.error("Target size cannot have zero dimensions.")
+        return clip # Return original clip
     target_aspect = target_w / target_h
+    # Ensure clip has size attribute
+    if not hasattr(clip, 'size'):
+        logging.error("Input clip does not have 'size' attribute.")
+        return clip
     clip_w, clip_h = clip.size
+    if clip_w == 0 or clip_h == 0:
+         logging.warning("Input clip has zero dimensions. Cannot resize.")
+         # Return a black clip of target size instead?
+         return ColorClip(size=target_size, color=(0,0,0), duration=clip.duration if hasattr(clip, 'duration') else 1)
     clip_aspect = clip_w / clip_h
     if abs(clip_aspect - target_aspect) < 0.01: # Aspect ratios are close enough
+        # Just resize to fit width, height should scale correctly
+        return clip.resize(width=target_w)
     if clip_aspect > target_aspect:
+        # Clip is wider than target: Resize based on height, then crop width
         resized_clip = clip.resize(height=target_h)
+        # Calculate crop coordinates
+        crop_x_center = resized_clip.w / 2
+        crop_x1 = max(0, int(crop_x_center - target_w / 2))
+        crop_x2 = min(resized_clip.w, int(crop_x_center + target_w / 2))
+        # Adjust width if rounding caused issues
         if crop_x2 - crop_x1 != target_w:
+            crop_x2 = crop_x1 + target_w
+            if crop_x2 > resized_clip.w: # Ensure it doesn't go out of bounds
+                crop_x2 = resized_clip.w
+                crop_x1 = max(0, crop_x2 - target_w)
+        return resized_clip.fx(vfx.crop, x1=crop_x1, y1=0, width=target_w, height=target_h)
     else:
+        # Clip is taller than target: Resize based on width, then crop height
         resized_clip = clip.resize(width=target_w)
+        # Calculate crop coordinates
+        crop_y_center = resized_clip.h / 2
+        crop_y1 = max(0, int(crop_y_center - target_h / 2))
+        crop_y2 = min(resized_clip.h, int(crop_y_center + target_h / 2))
+         # Adjust height if rounding caused issues
         if crop_y2 - crop_y1 != target_h:
             crop_y2 = crop_y1 + target_h
+            if crop_y2 > resized_clip.h: # Ensure it doesn't go out of bounds
+                crop_y2 = resized_clip.h
+                crop_y1 = max(0, crop_y2 - target_h)
+        return resized_clip.fx(vfx.crop, x1=0, y1=crop_y1, width=target_w, height=target_h)
+def apply_ken_burns(image_clip, duration, target_size, zoom_factor=1.15, direction='zoom_out'):
+    """Applies Ken Burns effect (zoom in/out, simple pan) to an ImageClip."""
+    if not isinstance(image_clip, ImageClip):
+        logging.warning("Ken Burns effect can only be applied to ImageClips.")
+        return image_clip.set_duration(duration) # Just set duration if not image
+    # Ensure the input clip already matches the target size (or resize it)
     if image_clip.size != target_size:
+         logging.info("Applying Ken Burns: Resizing image to fill target size first.")
          image_clip = resize_media_to_fill(image_clip, target_size)
+    # Make sure the base clip has the correct duration before applying effects
+    image_clip = image_clip.set_duration(duration)
+    img_w, img_h = image_clip.size
     # Define the resize function based on time `t`
     def resize_func(t):
+        if direction == 'zoom_out':
+            # Zoom out: start at zoom_factor, end at 1.0
+            current_zoom = 1 + (zoom_factor - 1) * (1 - t / duration)
+        elif direction == 'zoom_in':
+             # Zoom in: start at 1.0, end at zoom_factor
+             current_zoom = 1 + (zoom_factor - 1) * (t / duration)
+        else: # No zoom
+            current_zoom = 1.0
         return current_zoom
+    # Apply zoom effect
     zoomed_clip = image_clip.fx(vfx.resize, resize_func)
+    # Simple Pan (optional, can be randomized)
+    # Example: Pan slightly horizontally
+    pan_intensity = 0.05 # Fraction of width/height to pan
+    start_x_offset = 0
+    end_x_offset = pan_intensity * img_w * random.choice([-1, 1]) # Pan left or right
+    start_y_offset = 0
+    end_y_offset = pan_intensity * img_h * random.choice([-1, 1]) # Pan up or down
+    def position_func(t):
+        current_x = start_x_offset + (end_x_offset - start_x_offset) * (t / duration)
+        current_y = start_y_offset + (end_y_offset - start_y_offset) * (t / duration)
+        # Position is relative to the zoomed clip's center
+        center_x = zoomed_clip.w / 2 - current_x
+        center_y = zoomed_clip.h / 2 - current_y
+        return (center_x - target_size[0]/2, center_y - target_size[1]/2) # Top-left corner for crop
+    # Apply cropping based on the calculated position
+    # Use a function for position to simulate pan
+    final_clip = zoomed_clip.fx(vfx.crop, x1=lambda t: position_func(t)[0], y1=lambda t: position_func(t)[1], width=target_size[0], height=target_size[1])
     return final_clip.set_duration(duration)
+def find_font(preferred_font=DEFAULT_FONT):
+    """Tries to find a usable font file."""
+    # 1. Check if preferred font exists directly (e.g., uploaded)
+    if os.path.exists(preferred_font):
+        logging.info(f"Using specified font: {preferred_font}")
+        return preferred_font
+    # 2. Common system font paths (Linux)
+    font_paths = [
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+        "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
+        "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
+        "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # If installed
+        # Add more paths if needed
+    ]
+    for path in font_paths:
+        if os.path.exists(path):
+            logging.info(f"Found system font: {path}")
+            return path
+    # 3. Use MoviePy's default if nothing else is found
+    logging.warning(f"Could not find specified font '{preferred_font}' or common system fonts. Relying on MoviePy's default.")
+    return None # Let MoviePy use its internal default
+def create_caption_clip(text, duration, clip_size, font_size=None, font_path=None, color='white', stroke_color='black', stroke_width=1.5, position=('center', 'bottom'), margin_percent=5):
+    """Creates a MoviePy TextClip for captions with wrapping and background."""
     width, height = clip_size
+    max_text_width = width * 0.85 # Allow text to occupy 85% of the width
+    margin = int(height * (margin_percent / 100)) # Margin based on percentage of height
     if font_size is None:
+        font_size = max(20, int(height / 28)) # Dynamic font size based on height
+    actual_font_path = find_font(font_path or DEFAULT_FONT)
+    # Use Pillow for reliable text wrapping (MoviePy's can be inconsistent)
     try:
+        pil_font = ImageFont.truetype(actual_font_path, font_size) if actual_font_path else ImageFont.load_default()
     except IOError:
+        logging.warning(f"Failed to load font '{actual_font_path}' with Pillow. Using default.")
+        pil_font = ImageFont.load_default()
+        font_size = 18 # Reset font size if using default
     words = text.split()
     lines = []
     current_line = ""
+    # Simple greedy word wrapping using Pillow's textlength
     for word in words:
         test_line = f"{current_line} {word}".strip()
+        # Use textlength for more accurate width calculation
+        line_width = pil_font.getlength(test_line)
+        if line_width <= max_text_width:
             current_line = test_line
         else:
             if current_line: # Add the previous line if it wasn't empty
                  lines.append(current_line)
             current_line = word # Start new line with the current word
             # Handle case where a single word is too long
+            if pil_font.getlength(current_line) > max_text_width:
+                 logging.warning(f"Word '{current_line}' might be too long for caption width.")
+                 # Basic split for very long words (optional)
+                 # while pil_font.getlength(current_line) > max_text_width:
+                 #     for i in range(len(current_line)-1, 0, -1):
+                 #         if pil_font.getlength(current_line[:i]) <= max_text_width:
+                 #             lines.append(current_line[:i] + '-')
+                 #             current_line = current_line[i:]
+                 #             break
+                 #     else: # Cannot split further
+                 #         break # Avoid infinite loop
     if current_line: # Add the last line
         lines.append(current_line)
             wrapped_text,
             fontsize=font_size,
             color=color,
+            font=actual_font_path if actual_font_path else 'Arial', # Provide a common fallback font name
             stroke_color=stroke_color,
             stroke_width=stroke_width,
+            method='caption', # Use caption method for potential internal wrapping
             size=(int(max_text_width), None), # Constrain width for wrapping
             align='center'
         )
+        # Add a semi-transparent background for better readability
+        bg_color = (0, 0, 0) # Black background
+        bg_opacity = 0.5
+        # Create a ColorClip slightly larger than the text
+        txt_width, txt_height = caption.size
+        bg_padding = int(font_size * 0.3) # Padding around text
+        bg_clip = ColorClip(
+            size=(txt_width + 2 * bg_padding, txt_height + 2 * bg_padding),
+            color=bg_color,
+            ismask=False,
+            duration=duration
+        ).set_opacity(bg_opacity)
+        # Composite text on background
+        caption_with_bg = CompositeVideoClip([
+            bg_clip.set_position('center'),
+            caption.set_position('center')
+        ], size=bg_clip.size).set_duration(duration)
     except Exception as e:
+         logging.error(f"Error creating TextClip (maybe font issue?): {e}. Using simple TextClip.")
+         traceback.print_exc()
+         # Fallback to simpler TextClip without stroke/bg if needed
+         caption_with_bg = TextClip(wrapped_text, fontsize=font_size, color=color, method='caption', size=(int(max_text_width), None), align='center').set_duration(duration)
     # Set position with margin
+    # MoviePy position can be tricky, calculate manually
+    final_pos = ['center', 'center'] # Default
+    caption_w, caption_h = caption_with_bg.size
+    if isinstance(position, tuple) or isinstance(position, list):
+        pos_x, pos_y = position
+        # Horizontal positioning
+        if 'left' in pos_x: final_pos[0] = margin
+        elif 'right' in pos_x: final_pos[0] = width - caption_w - margin
+        else: final_pos[0] = (width - caption_w) / 2 # Center default
+        # Vertical positioning
+        if 'top' in pos_y: final_pos[1] = margin
+        elif 'bottom' in pos_y: final_pos[1] = height - caption_h - margin
+        else: final_pos[1] = (height - caption_h) / 2 # Center default
+    # Ensure positions are integers
+    final_pos = (int(final_pos[0]), int(final_pos[1]))
+    caption_with_bg = caption_with_bg.set_position(final_pos).set_duration(duration)
+    return caption_with_bg
 def create_clip(media_path, media_type, audio_path, audio_duration, target_size, add_captions, narration_text, segment_index):
     """Creates a single video clip from media, audio, and optional captions."""
+    logging.info(f"--- Creating Clip {segment_index + 1} ---")
+    logging.info(f"Type: {media_type}, Audio Duration: {audio_duration:.2f}s, Target Size: {target_size}")
+    main_clip = None
+    audio_clip = None
     try:
+        # --- Load Audio ---
+        logging.info("Loading audio...")
         audio_clip = AudioFileClip(audio_path)
+        # Verify audio duration and clamp if necessary
+        if abs(audio_clip.duration - audio_duration) > 0.2: # Allow slightly larger diff
+             logging.warning(f"Audio file duration ({audio_clip.duration:.2f}s) differs significantly from expected ({audio_duration:.2f}s). Using file duration.")
              audio_duration = audio_clip.duration
+        # Ensure minimum duration
+        if audio_duration < 0.5:
+            logging.warning(f"Audio duration is very short ({audio_duration:.2f}s). Clamping to 0.5s.")
             audio_duration = 0.5
+        # Trim audio clip precisely
         audio_clip = audio_clip.subclip(0, audio_duration)
+        logging.info("Audio loaded.")
+        # --- Load Media (Video, Image, or Color) ---
         if media_type == 'video':
+            logging.info(f"Loading video: {media_path}")
             try:
+                # Load with target resolution hint, disable audio from video file
+                video_clip_raw = VideoFileClip(media_path, audio=False, target_resolution=(target_size[1], target_size[0]))
                 # Trim or loop video to match audio duration
+                if video_clip_raw.duration >= audio_duration:
+                    video_clip_timed = video_clip_raw.subclip(0, audio_duration)
                 else:
+                    # Loop the video if it's shorter
+                    logging.info(f"Video duration ({video_clip_raw.duration:.2f}s) shorter than audio ({audio_duration:.2f}s). Looping video.")
+                    # Use fx.loop for cleaner looping
+                    video_clip_timed = video_clip_raw.fx(vfx.loop, duration=audio_duration)
+                    # Fallback: manual concatenate (less efficient)
+                    # num_loops = math.ceil(audio_duration / video_clip_raw.duration)
+                    # video_clip_timed = concatenate_videoclips([video_clip_raw] * num_loops).subclip(0, audio_duration)
+                main_clip = resize_media_to_fill(video_clip_timed, target_size)
+                logging.info("Video processed.")
+                # Clean up raw clip explicitly? Moviepy should handle it, but just in case
+                # video_clip_raw.close() # Might cause issues if timed clip still references it
             except Exception as e:
+                logging.error(f"❌ Failed to load/process video '{media_path}': {e}. Using black clip.")
+                traceback.print_exc()
                 main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
         elif media_type == 'image':
+            logging.info(f"Loading image: {media_path}")
             try:
                 img_clip_base = ImageClip(media_path)
+                # Apply Ken Burns effect (includes resizing and duration setting)
+                main_clip = apply_ken_burns(img_clip_base, audio_duration, target_size)
+                logging.info("Image processed with Ken Burns effect.")
             except Exception as e:
+                logging.error(f"❌ Failed to load/process image '{media_path}': {e}. Using black clip.")
+                traceback.print_exc()
                 main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
+        else: # Includes 'color' type or any unexpected type
+             logging.info(f"Media type is '{media_type}'. Using black background.")
              main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
+        # --- Combine Video/Image and Audio ---
+        if main_clip and audio_clip:
+             # Ensure main_clip has correct duration before setting audio
+             main_clip = main_clip.set_duration(audio_duration)
+             main_clip = main_clip.set_audio(audio_clip)
+             logging.info("Audio attached to visual clip.")
+        elif main_clip:
+             logging.warning("Audio clip was not loaded successfully. Video will be silent.")
+             main_clip = main_clip.set_duration(audio_duration)
+        else:
+             logging.error("❌ Failed to create main visual clip. Skipping segment.")
+             if audio_clip: audio_clip.close()
+             return None
+        # --- Add Captions (if enabled) ---
+        final_composite_clip = main_clip # Start with the main clip
         if add_captions and narration_text:
+            logging.info("Adding captions...")
+            try:
+                caption_clip = create_caption_clip(
+                    narration_text,
+                    audio_duration,
+                    target_size,
+                    font_path=DEFAULT_FONT # Pass the default font path
+                )
+                # Composite caption on top of the main clip
+                final_composite_clip = CompositeVideoClip([main_clip, caption_clip], size=target_size)
+                logging.info("Captions added.")
+            except Exception as e:
+                 logging.error(f"❌ Failed to create or composite captions: {e}")
+                 traceback.print_exc()
+                 # Proceed without captions if creation failed
+                 final_composite_clip = main_clip
         else:
+            logging.info("Captions disabled or no narration text.")
+        logging.info(f"✅ Clip {segment_index + 1} created successfully.")
+        # Return the final composited clip (with or without captions)
+        return final_composite_clip
     except Exception as e:
+        logging.error(f"❌ Failed to create clip {segment_index + 1}: {e}")
         traceback.print_exc()
+        # Ensure cleanup on error
+        if main_clip and hasattr(main_clip, 'close'): main_clip.close()
+        if audio_clip and hasattr(audio_clip, 'close'): audio_clip.close()
         return None
 def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
     """Adds background music to the final video clip."""
     if not os.path.exists(music_file):
         logging.warning(f"Background music file '{music_file}' not found. Skipping BGM.")
         return video_clip
+    # Check if file is empty or too small
+    if os.path.getsize(music_file) < 1024:
+        logging.warning(f"Background music file '{music_file}' is very small. Skipping BGM.")
+        return video_clip
     logging.info(f"Adding background music from {music_file}")
+    bgm_clip = None
+    original_audio = video_clip.audio # Get existing audio first
     try:
         bgm_clip = AudioFileClip(music_file)
         video_duration = video_clip.duration
+        # Loop or trim BGM
         if bgm_clip.duration < video_duration:
+            logging.info(f"Looping BGM (duration {bgm_clip.duration:.2f}s) for video ({video_duration:.2f}s)")
+            bgm_clip = bgm_clip.fx(afx.audio_loop, duration=video_duration)
         else:
             bgm_clip = bgm_clip.subclip(0, video_duration)
         bgm_clip = bgm_clip.volumex(volume)
         # Combine with existing audio
         if original_audio:
+             logging.info("Combining narration audio with BGM.")
+             # Ensure original audio has same duration as video clip for composite
+             if abs(original_audio.duration - video_duration) > 0.1:
+                 logging.warning("Original audio duration doesn't match video, trimming/padding original audio.")
+                 # This shouldn't happen if clips were created correctly, but as a safeguard:
+                 original_audio = original_audio.subclip(0, video_duration) # Trim if longer
+                 # Padding if shorter is harder, CompositeAudioClip might handle it
              combined_audio = CompositeAudioClip([original_audio, bgm_clip])
         else:
              logging.warning("Video clip has no primary audio. Adding BGM only.")
              combined_audio = bgm_clip
+        video_clip_with_bgm = video_clip.set_audio(combined_audio)
         logging.info("✅ Background music added.")
+        # Close intermediate clips AFTER successful composition
+        # bgm_clip.close() # CompositeAudioClip might still need it? Test this.
+        # if original_audio: original_audio.close() # Same potential issue
+        return video_clip_with_bgm
     except Exception as e:
         logging.error(f"❌ Failed to add background music: {e}")
         traceback.print_exc()
+        # Clean up BGM clip if it was loaded
+        if bgm_clip and hasattr(bgm_clip, 'close'): bgm_clip.close()
+        # Return original clip without BGM on failure
+        return video_clip
 # --- Main Gradio Function ---
+def generate_video_process(topic, resolution_choice, add_captions_option, add_bgm_option, progress=gr.Progress(track_tqdm=True)):
     """The main function called by Gradio to generate the video."""
     start_time = time.time()
+    status_log = ["--- Starting Video Generation ---"]
     temp_dir = None
     final_video_path = None
+    clips = [] # Keep track of created clips for cleanup
+    # --- Setup ---
     try:
         temp_dir = tempfile.mkdtemp(prefix=TEMP_FOLDER_BASE + "_")
+        status_log.append(f"✅ Temporary directory created: {temp_dir}")
         logging.info(f"Using temp directory: {temp_dir}")
+        target_size = (1920, 1080) if resolution_choice == "Full HD (16:9)" else (1080, 1920) # W, H
+        pexels_orientation = "landscape" if resolution_choice == "Full HD (16:9)" else "portrait"
+        status_log.append(f"⚙️ Target resolution: {target_size[0]}x{target_size[1]}")
+        status_log.append(f"⚙️ Pexels orientation: {pexels_orientation}")
+        status_log.append(f"⚙️ Add Captions: {add_captions_option}")
+        status_log.append(f"⚙️ Add BGM: {add_bgm_option}")
         # --- 1. Generate Script ---
         progress(0.1, desc="Generating script...")
+        status_log.append("\n🔄 Generating script...")
         script = generate_script(topic, OPENROUTER_API_KEY, OPENROUTER_MODEL)
         if not script:
+            status_log.append("❌ Script generation failed. Check API key, model, and connection.")
+            # No cleanup needed yet, just return
             return "\n".join(status_log), None
         status_log.append("✅ Script generated.")
+        # logging.debug(f"Raw Script:\n{script}") # Log full script for debugging
         # --- 2. Parse Script ---
         progress(0.2, desc="Parsing script...")
+        status_log.append("\n🔄 Parsing script...")
         elements = parse_script(script)
+        if not elements or len(elements) < 2:
+            status_log.append("❌ Script parsing failed. Check script format from LLM.")
             return "\n".join(status_log), None
         num_segments = len(elements) // 2
         status_log.append(f"✅ Script parsed into {num_segments} segments.")
+        # --- 3. Process Segments ---
+        total_duration = 0
         for i in range(0, len(elements), 2):
             segment_index = i // 2
+            progress_val = 0.2 + (0.6 * (segment_index / num_segments))
+            progress(progress_val, desc=f"Processing segment {segment_index + 1}/{num_segments}")
+            # Check if elements exist before accessing
+            if i + 1 >= len(elements):
+                 logging.warning(f"⚠️ Found scene element at index {i} but no corresponding narration. Skipping.")
+                 continue
             scene_elem = elements[i]
             narration_elem = elements[i+1]
+            # Validate element types (optional but good practice)
+            if scene_elem.get("type") != "scene" or narration_elem.get("type") != "narration":
+                logging.warning(f"⚠️ Unexpected element types at index {i}/{i+1}. Skipping segment.")
+                continue
+            scene_prompt = scene_elem.get('prompt', '').strip()
+            narration_text = narration_elem.get('text', '').strip()
+            if not scene_prompt or not narration_text:
+                 logging.warning(f"⚠️ Segment {segment_index + 1} has empty scene prompt or narration. Skipping.")
+                 status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments}: SKIPPED (Empty prompt/narration) ---")
+                 continue
             status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments} ---")
+            status_log.append(f"📝 Scene: {scene_prompt}")
+            status_log.append(f"🗣️ Narration: {narration_text[:100]}...")
             # 3a. Generate TTS
+            status_log.append("🔄 Generating narration...")
             tts_path, tts_duration = generate_tts(narration_text, 'en', temp_dir, segment_index)
+            if not tts_path or tts_duration <= 0.1:
+                status_log.append(f"⚠️ TTS failed. Skipping segment.")
                 logging.warning(f"Skipping segment {segment_index+1} due to TTS failure.")
                 continue
+            status_log.append(f"✅ Narration generated ({tts_duration:.2f}s)")
+            total_duration += tts_duration
+            # 3b. Search & Download Media
+            status_log.append("🔄 Finding media...")
             media_path = None
             media_type = None
+            # Try Pexels Video first with correct orientation
+            video_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="videos", orientation=pexels_orientation)
             if video_results:
                 selected_media = random.choice(video_results)
+                status_log.append(f"⬇️ Downloading Pexels video...")
                 media_path = download_media(selected_media['url'], temp_dir)
                 if media_path:
                     media_type = 'video'
+                    status_log.append(f"✅ Video downloaded.")
                 else:
                     status_log.append("⚠️ Video download failed.")
+            # Try Pexels Image if video fails/not found (correct orientation)
             if not media_path:
+                status_log.append("🔄 No suitable video. Searching images...")
+                image_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="photos", orientation=pexels_orientation)
                 if image_results:
                     selected_media = random.choice(image_results)
+                    status_log.append(f"⬇️ Downloading Pexels image...")
                     media_path = download_media(selected_media['url'], temp_dir)
                     if media_path:
                         media_type = 'image'
+                        status_log.append(f"✅ Image downloaded.")
                     else:
                          status_log.append("⚠️ Image download failed.")
+            # Fallback: Black screen
             if not media_path:
+                status_log.append(f"⚠️ No media found for '{scene_prompt}'. Using black screen.")
+                media_type = 'color'
+                media_path = None # No path needed
             # 3c. Create Clip
+            status_log.append(f"🎬 Creating clip...")
             clip = create_clip(
+                media_path=media_path,
                 media_type=media_type,
                 audio_path=tts_path,
                 audio_duration=tts_duration,
             if clip:
                 clips.append(clip)
+                status_log.append(f"✅ Clip created.")
             else:
+                status_log.append(f"❌ Failed to create clip. Skipping segment.")
                 logging.error(f"Failed to create clip {segment_index+1}, skipping.")
         if not clips:
             status_log.append("\n❌ No valid clips were created. Cannot generate video.")
+            # No cleanup needed beyond temp dir removal in finally block
             return "\n".join(status_log), None
+        status_log.append(f"\n✅ Successfully created {len(clips)} video clips.")
+        status_log.append(f"⏱️ Estimated total video duration: {total_duration:.2f} seconds.")
         # --- 4. Concatenate Clips ---
         progress(0.85, desc="Combining video clips...")
         status_log.append("\n🔄 Combining video clips...")
+        final_clip = None # Define final_clip before try block
         try:
+            # Use method="compose" - might be better for clips with varying sources/codecs
+            final_clip = concatenate_videoclips(clips, method="compose", padding = -0.1) # Small overlap?
             status_log.append("✅ Clips combined successfully.")
         except Exception as e:
             status_log.append(f"❌ Error concatenating clips: {e}")
             logging.error(f"Concatenation failed: {e}")
             traceback.print_exc()
+            # Ensure final_clip is None if concatenation fails
+            final_clip = None
+            # Fall through to finally block for cleanup
         # --- 5. Add Background Music (Optional) ---
+        if final_clip and add_bgm_option:
             progress(0.9, desc="Adding background music...")
+            status_log.append("\n🔄 Adding background music...")
             final_clip = add_background_music(final_clip, music_file=BGM_FILE, volume=BGM_VOLUME)
+            # Status logged within the function
         # --- 6. Write Final Video ---
+        if final_clip:
+            progress(0.95, desc="Writing final video file...")
+            status_log.append("\n💾 Writing final video file (this may take time)...")
+            output_path = os.path.join(temp_dir, OUTPUT_VIDEO_FILENAME)
+            writer_logger = logging.getLogger("moviepy_writer")
+            writer_logger.setLevel(logging.WARNING) # Reduce moviepy verbosity during write
+            try:
+                final_clip.write_videofile(
+                    output_path,
+                    codec='libx264',
+                    audio_codec='aac',
+                    temp_audiofile=os.path.join(temp_dir, 'temp_audio.aac'), # Explicit temp audio file
+                    remove_temp=True,
+                    preset='medium', # 'medium' is good balance, 'fast' or 'ultrafast' for speed
+                    fps=24,
+                    threads=max(1, os.cpu_count() // 2), # Use half available cores
+                    logger=None # Use None or 'bar', avoid default verbose logger
+                )
+                status_log.append(f"✅ Final video saved: {os.path.basename(output_path)}")
+                final_video_path = output_path # Set the path to be returned
+            except Exception as e:
+                status_log.append(f"❌ Error writing final video file: {e}")
+                logging.error(f"Final video write failed: {e}")
+                traceback.print_exc()
+                final_video_path = None # Ensure no path is returned on failure
+        else:
+             status_log.append("\n❌ Skipping final video write because clip combination failed.")
+             final_video_path = None
     except Exception as e:
+        status_log.append(f"\n❌ An critical error occurred during video generation: {e}")
+        logging.error("An critical error occurred in generate_video_process:")
         logging.error(traceback.format_exc())
         final_video_path = None # Ensure failure state
     finally:
         # --- 7. Cleanup ---
+        status_log.append("\n🧹 Cleaning up resources...")
+        # Close all individual clips first
+        for i, clip in enumerate(clips):
+            try:
+                if clip: clip.close()
+                logging.debug(f"Closed clip {i+1}")
+            except Exception as e_close:
+                logging.warning(f"Error closing clip {i+1}: {e_close}")
+        # Close the final concatenated clip if it exists
+        try:
+            if final_clip: final_clip.close()
+            logging.debug("Closed final clip")
+        except Exception as e_final_close:
+            logging.warning(f"Error closing final clip: {e_final_close}")
+        # Remove the temporary directory
         if temp_dir and os.path.exists(temp_dir):
             try:
+                # Add retries for shutil.rmtree on potential lingering file handles
+                attempts = 3
+                for attempt in range(attempts):
+                    try:
+                        shutil.rmtree(temp_dir)
+                        status_log.append(f"✅ Temporary directory removed: {os.path.basename(temp_dir)}")
+                        logging.info(f"Cleaned up temp directory: {temp_dir}")
+                        break # Success
+                    except OSError as e_rm:
+                        if attempt < attempts - 1:
+                            logging.warning(f"Attempt {attempt+1} failed to remove temp dir {temp_dir}: {e_rm}. Retrying in 1s...")
+                            time.sleep(1)
+                        else:
+                            raise # Raise the error on the last attempt
+            except Exception as e_clean:
+                status_log.append(f"⚠️ Error cleaning up temporary directory {temp_dir}: {e_clean}")
+                logging.error(f"Cleanup failed for {temp_dir}: {e_clean}")
+        else:
+            status_log.append("ℹ️ No temporary directory to remove or already removed.")
     end_time = time.time()
     total_time = end_time - start_time
+    status_log.append(f"\n--- Generation Finished ---")
+    status_log.append(f"⏱️ Total time: {total_time:.2f} seconds")
     progress(1.0, desc="Finished!")
     return "\n".join(status_log), final_video_path
 # --- Gradio Interface Definition ---
+with gr.Blocks(css="footer {display: none !important}") as iface: # Hide Gradio footer
+    gr.Markdown("# 🤖 AI Documentary Generator v2")
+    gr.Markdown("Enter a topic, choose settings, and let AI create a short video. Uses OpenRouter for script, Pexels for media, gTTS for narration, and MoviePy for assembly.")
     with gr.Row():
         with gr.Column(scale=1):
             topic_input = gr.Textbox(
                 label="Video Topic",
+                placeholder="e.g., The History of Coffee, Secrets of the Deep Ocean",
                 lines=2
             )
             resolution_input = gr.Radio(
                 choices=["Short (9:16)", "Full HD (16:9)"],
                 value="Short (9:16)"
             )
+            captions_input = gr.Checkbox(label="Add Captions (with background)", value=True)
+            # Check for BGM file and enable checkbox accordingly
+            bgm_exists = os.path.exists(BGM_FILE) and os.path.getsize(BGM_FILE) > 1024
+            bgm_label = f"Add Background Music ({os.path.basename(BGM_FILE)})" if bgm_exists else f"Add Background Music (File '{BGM_FILE}' not found or empty)"
+            bgm_input = gr.Checkbox(label=bgm_label, value=bgm_exists, interactive=bgm_exists)
+            generate_button = gr.Button("✨ Generate Video ✨", variant="primary")
         with gr.Column(scale=2):
+            status_output = gr.Textbox(label="📜 Status Log", lines=20, interactive=False, autoscroll=True)
+            video_output = gr.Video(label="🎬 Generated Video")
     generate_button.click(
         fn=generate_video_process,
     gr.Examples(
         examples=[
+            ["The lifecycle of a monarch butterfly", "Short (9:16)", True, True],
+            ["The construction of the Eiffel Tower", "Full HD (16:9)", True, False],
+            ["The impact of renewable energy sources", "Short (9:16)", True, True],
+            ["A brief history of the internet", "Full HD (16:9)", True, True],
         ],
+        inputs=[topic_input, resolution_input, captions_input, bgm_input],
+        label="Example Topics"
     )
 # --- Launch the App ---
 if __name__ == "__main__":
+    # Create a silent placeholder BGM file if needed and BGM checkbox requires it
+    if not os.path.exists(BGM_FILE) or os.path.getsize(BGM_FILE) < 1024:
+         logging.warning(f"Background music file '{BGM_FILE}' not found or empty. Creating a silent placeholder.")
          try:
              silent_segment = AudioSegment.silent(duration=1000) # 1 second silence
              silent_segment.export(BGM_FILE, format="mp3")
              logging.info(f"Created silent placeholder BGM file: {BGM_FILE}")
              logging.error(f"Could not create placeholder BGM file: {e}")
+    # Check for API keys (already hardcoded, but good practice)
+    if not PEXELS_API_KEY or len(PEXELS_API_KEY) < 50: # Basic length check
+        logging.warning("PEXELS_API_KEY seems invalid or missing.")
+    if not OPENROUTER_API_KEY or not OPENROUTER_API_KEY.startswith("sk-or-v1-"):
+        logging.warning("OPENROUTER_API_KEY seems invalid or missing.")
+    # Launch Gradio app
+    iface.queue().launch(debug=False, share=False) # Use queue for handling multiple requests, disable debug/share for production