testdeep123 commited on
Commit
97ed4cf
·
verified ·
1 Parent(s): 0a38b03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +782 -1026
app.py CHANGED
@@ -1,1141 +1,897 @@
1
- # -*- coding: utf-8 -*-
2
  import gradio as gr
 
 
 
 
3
  import os
4
- import shutil
5
- import requests
6
- import io
7
- import time
8
- import re
9
  import random
 
10
  import math
11
- import tempfile
12
- import traceback
13
- import numpy as np
14
- import soundfile as sf
15
- # import pysrt # Not strictly needed if embedding captions directly
16
- import cv2 # OpenCV for potential image processing, though PIL is often enough
17
  from moviepy.editor import (
18
- VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
19
- CompositeVideoClip, TextClip, CompositeAudioClip, ColorClip
20
  )
21
  import moviepy.video.fx.all as vfx
22
- import moviepy.audio.fx.all as afx # Import audio effects
23
  from pydub import AudioSegment
 
24
  from PIL import Image, ImageDraw, ImageFont
 
25
  from bs4 import BeautifulSoup
 
26
  from urllib.parse import quote
 
27
  from gtts import gTTS
28
- import logging
 
 
29
 
30
- # --- Configuration ---
31
- # WARNING: Hardcoding keys is generally discouraged due to security risks.
32
- # Anyone who can see this code can use your keys.
33
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
34
- OPENROUTER_API_KEY = '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'
 
35
 
36
- OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" # Using a known free model
37
- # OPENROUTER_MODEL = "mistralai/mistral-small-latest" # Or a small paid one if needed
38
-
39
- TEMP_FOLDER_BASE = "/tmp/ai_doc_generator" # Use /tmp inside container
40
- OUTPUT_VIDEO_FILENAME = "final_documentary.mp4"
41
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
42
- # Try a very common font likely available in the base python image or installed via apt
43
- # If text fails, consider installing specific font packages in Dockerfile (e.g., fonts-freefont-ttf)
44
- DEFAULT_FONT = "DejaVuSans.ttf" # Or try "FreeSans.ttf" if fonts-freefont-ttf is installed
45
- BGM_FILE = "background_music.mp3" # Optional: Place a royalty-free mp3 here
46
- BGM_VOLUME = 0.1 # Background music volume multiplier (0.0 to 1.0)
47
-
48
- # --- Logging Setup ---
49
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
50
-
51
- # --- Kokoro TTS Initialization (Optional - Keep commented unless installed) ---
52
- # KOKORO_ENABLED = False
53
- # pipeline = None
54
- # try:
55
- # from kokoro import KPipeline
56
- # device = 'cpu' # Default to CPU
57
- # pipeline = KPipeline(lang_code='a', device=device)
58
- # KOKORO_ENABLED = True
59
- # logging.info("✅ Kokoro TTS Initialized.")
60
- # except ImportError:
61
- # logging.warning("Kokoro library not found. Falling back to gTTS.")
62
- # pipeline = None
63
- # except Exception as e:
64
- # logging.warning(f"⚠️ Error initializing Kokoro TTS: {e}. Using gTTS fallback.")
65
- # pipeline = None
66
-
67
- # --- Helper Functions --- (Keep all helper functions from the previous version)
68
- def generate_script(topic, api_key, model):
69
- """Generates a documentary script using OpenRouter API."""
70
- logging.info(f"Generating script for topic: {topic}")
71
- # Check if API key is placeholder or empty
72
- if not api_key or "sk-or-v1-" not in api_key:
73
- logging.error("❌ Script generation failed: OpenRouter API Key is missing or invalid.")
74
- return None
75
-
76
- prompt = f"""Create a short documentary script about '{topic}'.
77
- The script should be structured as a sequence of scenes and narrations.
78
- Each scene description should be enclosed in [SCENE: description] tags. The description should be concise and suggest visuals (e.g., 'drone shot of mountains', 'close up of a historical artifact', 'archival footage of protests').
79
- Each narration segment should follow its corresponding scene and be enclosed in [NARRATION: text] tags. The narration should be engaging and informative, broken into short sentences suitable for ~5-10 second clips.
80
- Keep the total number of scenes between 5 and 8.
81
-
82
- Example:
83
- [SCENE: Time-lapse of a bustling city street at night]
84
- [NARRATION: Cities are centers of human activity, constantly evolving.]
85
- [SCENE: Close up on intricate gears of an old clock]
86
- [NARRATION: But how do we measure the relentless march of time?]
87
-
88
- Generate the script now:
89
- """
90
  headers = {
91
- "Authorization": f"Bearer {api_key}",
92
- "Content-Type": "application/json",
93
- "HTTP-Referer": "http://localhost", # Some APIs require Referer
94
- "X-Title": "AI Documentary Generator" # Optional custom title
95
  }
96
- data = {
97
- "model": model,
98
- "messages": [{"role": "user", "content": prompt}],
99
- "max_tokens": 1000, # Adjust as needed
100
- }
101
- try:
102
- response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=90) # Increased timeout
103
- logging.debug(f"OpenRouter Request: Headers={headers}, Data={data}")
104
- logging.debug(f"OpenRouter Response Status: {response.status_code}")
105
- logging.debug(f"OpenRouter Response Body: {response.text[:500]}...") # Log beginning of response
106
 
107
- response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
108
- result = response.json()
109
 
110
- if not result.get('choices') or not result['choices'][0].get('message') or not result['choices'][0]['message'].get('content'):
111
- logging.error("❌ Script generation failed: Unexpected API response format (missing content).")
112
- logging.debug(f"Full API response: {result}")
113
- return None
 
114
 
115
- script_content = result['choices'][0]['message']['content']
116
- logging.info("✅ Script generated successfully.")
117
- # Basic validation
118
- if "[SCENE:" not in script_content or "[NARRATION:" not in script_content:
119
- logging.error("❌ Script generation failed: Output format incorrect.")
120
- logging.debug(f"Raw script output: {script_content}")
121
- # Return the raw content anyway, maybe parsing can salvage something
122
- # return None
123
- return script_content # Let parsing try
124
- return script_content
125
- except requests.exceptions.Timeout:
126
- logging.error("❌ Script generation failed: API request timed out.")
127
- return None
128
- except requests.exceptions.HTTPError as e:
129
- logging.error(f"❌ Script generation failed: HTTP error: {e.response.status_code} - {e.response.text}")
130
- return None
131
- except requests.exceptions.RequestException as e:
132
- logging.error(f"❌ Script generation failed: API request error: {e}")
133
- return None
134
- except (KeyError, IndexError, TypeError) as e:
135
- logging.error(f"❌ Script generation failed: Error processing API response: {e}")
136
- logging.debug(f"Raw API response text: {response.text}")
137
- return None
138
- except Exception as e:
139
- logging.error(f"❌ Script generation failed: An unexpected error occurred: {e}")
140
- traceback.print_exc()
141
- return None
142
 
143
- def parse_script(script_text):
144
- """Parses the generated script into scene prompts and narration text."""
145
- logging.info("Parsing script...")
146
- if not script_text:
147
- logging.error("❌ Script parsing failed: Input script text is empty.")
148
- return None
149
 
150
- # Regex to find scene and narration blocks, more tolerant to whitespace variations
151
- pattern = re.compile(r"\[SCENE:\s*(.*?)\s*\]\s*\[NARRATION:\s*(.*?)\s*\]", re.DOTALL | re.IGNORECASE)
152
- matches = pattern.findall(script_text)
153
-
154
- if not matches:
155
- # Try a simpler split if the strict pattern fails, maybe format was slightly off
156
- logging.warning("⚠️ Strict [SCENE]/[NARRATION] parsing failed. Attempting fallback split.")
157
- elements_temp = re.split(r'\[(SCENE|NARRATION):\s*', script_text, flags=re.IGNORECASE)
158
- if len(elements_temp) > 1:
159
- elements_temp = [el.strip().rstrip(']') for el in elements_temp if el and el.strip() not in ['SCENE', 'NARRATION']]
160
- # Try to pair them up
161
- paired_elements = []
162
- for i in range(0, len(elements_temp) - 1, 2):
163
- # Basic check if first looks like scene and second like narration
164
- if len(elements_temp[i]) < 100 and len(elements_temp[i+1]) > 10: # Heuristic
165
- paired_elements.append({"type": "scene", "prompt": elements_temp[i]})
166
- paired_elements.append({"type": "narration", "text": elements_temp[i+1].replace('\n', ' ')})
167
- if paired_elements:
168
- logging.info(f"✅ Fallback parsing successful, found {len(paired_elements)//2} pairs.")
169
- return paired_elements
170
- else:
171
- logging.error("❌ Fallback script parsing also failed.")
172
- logging.debug(f"Script content for parsing: {script_text}")
173
- return None
174
- else:
175
- logging.error("❌ Script parsing failed: No [SCENE]/[NARRATION] pairs found, and fallback split failed.")
176
- logging.debug(f"Script content for parsing: {script_text}")
177
- return None
178
 
 
 
179
 
180
- elements = []
181
- for scene_desc, narration_text in matches:
182
- scene_desc = scene_desc.strip()
183
- narration_text = narration_text.strip().replace('\n', ' ').replace('"', "'") # Clean up narration, replace double quotes
184
- if scene_desc and narration_text:
185
- elements.append({"type": "scene", "prompt": scene_desc})
186
- elements.append({"type": "narration", "text": narration_text})
187
- else:
188
- logging.warning(f"⚠️ Skipping invalid pair: Scene='{scene_desc}', Narration='{narration_text}'")
189
 
190
 
191
- logging.info(f"✅ Script parsed into {len(elements)//2} scene/narration pairs.")
192
- return elements
193
 
194
- def search_pexels(query, api_key, media_type="videos", per_page=5, orientation="any"):
195
- """Searches Pexels API for videos or photos."""
196
- if not api_key or api_key == "YOUR_PEXELS_API_KEY_HERE": # Check actual key too
197
- logging.warning("⚠️ Pexels API key not configured. Skipping search.")
198
- return []
199
- logging.info(f"Searching Pexels {media_type} for: '{query}' (Orientation: {orientation})")
200
- base_url = f"https://api.pexels.com/{media_type}/search"
201
- headers = {"Authorization": api_key}
202
- params = {"query": query, "per_page": per_page}
203
- if orientation != "any":
204
- params["orientation"] = orientation # landscape or portrait
205
 
206
- try:
207
- response = requests.get(base_url, headers=headers, params=params, timeout=30) # Increased timeout
208
- response.raise_for_status()
209
- data = response.json()
210
-
211
- results = []
212
- media_key = 'videos' if media_type == 'videos' else 'photos'
213
- link_key = 'video_files' if media_type == 'videos' else 'src'
214
- items = data.get(media_key, [])
215
-
216
- if not items:
217
- logging.info(f"No Pexels {media_type} results found for '{query}'.")
218
- return []
219
-
220
- for item in items:
221
- if media_type == 'videos':
222
- video_links = sorted([vf for vf in item.get(link_key, []) if vf.get('link')], key=lambda x: x.get('width', 0), reverse=True)
223
- if video_links:
224
- # Prefer HD (1920 or 1280 width) or highest quality
225
- hd_link = next((link['link'] for link in video_links if link.get('width') in [1920, 1280]), None)
226
- if hd_link:
227
- results.append({'url': hd_link, 'type': 'video', 'width': next(link['width'] for link in video_links if link['link'] == hd_link), 'height': next(link['height'] for link in video_links if link['link'] == hd_link)})
228
- elif video_links[0].get('link'): # Fallback to highest available
229
- link_data = video_links[0]
230
- results.append({'url': link_data['link'], 'type': 'video', 'width': link_data.get('width'), 'height': link_data.get('height')})
231
- else: # photos
232
- img_links = item.get(link_key, {})
233
- # Prioritize larger sizes
234
- chosen_url = img_links.get('large2x') or img_links.get('large') or img_links.get('original') or img_links.get('medium')
235
- if chosen_url:
236
- results.append({'url': chosen_url, 'type': 'image', 'width': item.get('width'), 'height': item.get('height')})
237
-
238
- logging.info(f"✅ Found {len(results)} Pexels {media_type} results for '{query}'.")
239
- return results
240
-
241
- except requests.exceptions.Timeout:
242
- logging.error(f"❌ Pexels API request timed out for '{query}'.")
243
- return []
244
- except requests.exceptions.HTTPError as e:
245
- logging.error(f"❌ Pexels API HTTP error for '{query}': {e.response.status_code} - {e.response.text}")
246
- return []
247
- except requests.exceptions.RequestException as e:
248
- logging.error(f"❌ Pexels API request error for '{query}': {e}")
249
- return []
250
- except Exception as e:
251
- logging.error(f"❌ Error processing Pexels response for '{query}': {e}")
252
- traceback.print_exc()
253
- return []
254
 
255
- def download_media(url, save_dir):
256
- """Downloads media (video or image) from a URL."""
257
- logging.info(f"Downloading media from: {url[:100]}...") # Log truncated URL
258
- try:
259
- response = requests.get(url, stream=True, timeout=120, headers={'User-Agent': USER_AGENT}) # Increased timeout
260
- response.raise_for_status()
261
 
262
- # Try to get filename
263
- filename = None
264
- cd = response.headers.get('content-disposition')
265
- if cd:
266
- fname = re.findall('filename="?(.+)"?', cd)
267
- if fname:
268
- filename = fname[0]
269
-
270
- if not filename:
271
- # Basic filename from URL path
272
- filename = url.split('/')[-1].split('?')[0]
273
- # Clean filename and ensure extension
274
- filename = re.sub(r'[^\w\.\-]', '_', filename) # Replace invalid chars
275
- if '.' not in filename[-5:]: # Check last 5 chars for extension
276
- # Guess extension from content type
277
- content_type = response.headers.get('content-type', '').lower()
278
- ext = '.vid' # default video extension
279
- if 'jpeg' in content_type or 'jpg' in content_type: ext = '.jpg'
280
- elif 'png' in content_type: ext = '.png'
281
- elif 'mp4' in content_type: ext = '.mp4'
282
- elif 'video' in content_type: ext = '.mp4' # Guess mp4 for generic video
283
- elif 'image' in content_type: ext = '.jpg' # Guess jpg for generic image
284
- filename = f"media_{int(time.time())}{ext}"
285
-
286
- # Ensure filename is not excessively long
287
- if len(filename) > 100:
288
- name, ext = os.path.splitext(filename)
289
- filename = name[:95] + ext
290
-
291
-
292
- save_path = os.path.join(save_dir, filename)
293
- logging.info(f"Saving media to: {save_path}")
294
-
295
- with open(save_path, 'wb') as f:
296
- for chunk in response.iter_content(chunk_size=8192*4): # Larger chunk size
297
- f.write(chunk)
298
 
299
- # Verify file size (basic check)
300
- file_size = os.path.getsize(save_path)
301
- if file_size < 1024: # Less than 1KB might indicate an issue
302
- logging.warning(f"⚠️ Downloaded media file size is small ({file_size} bytes). Check file: {save_path}")
303
 
304
- logging.info(f"✅ Media downloaded successfully ({file_size / 1024:.1f} KB).")
305
- return save_path
306
- except requests.exceptions.Timeout:
307
- logging.error(f"❌ Media download timed out: {url}")
308
- return None
309
- except requests.exceptions.RequestException as e:
310
- logging.error(f"❌ Media download failed: Request error: {e}")
311
- return None
312
- except Exception as e:
313
- logging.error(f"❌ Media download failed: An unexpected error occurred: {e}")
314
- traceback.print_exc()
315
- return None
316
 
317
- def generate_tts(text, lang, save_dir, segment_index):
318
- """Generates TTS audio using gTTS."""
319
- filename = f"narration_{segment_index}.mp3"
320
- filepath = os.path.join(save_dir, filename)
321
- # Clean text for TTS - remove characters that might cause issues
322
- text = re.sub(r'[\[\]\*#]', '', text) # Remove brackets, asterisks, hash
323
- text = text.strip()
324
- if not text:
325
- logging.error(f"❌ TTS failed for segment {segment_index}: Text is empty after cleaning.")
326
- return None, 0
327
 
328
- logging.info(f"Generating TTS for segment {segment_index}: '{text[:60]}...'")
329
 
330
- audio_duration = 0
331
- success = False
332
 
333
- try:
334
- logging.info("Attempting TTS generation with gTTS...")
335
- tts = gTTS(text=text, lang=lang, slow=False) # Use slow=False for normal speed
336
- tts.save(filepath)
337
- # Get duration using soundfile as primary method
338
- try:
339
- audio_info = sf.info(filepath)
340
- audio_duration = audio_info.duration
341
- if audio_duration < 0.1: # Check for invalid duration from sf.info
342
- raise ValueError("Soundfile reported near-zero duration")
343
- except Exception as e_dur_sf:
344
- logging.warning(f"⚠️ Could not get accurate duration using soundfile ({e_dur_sf}). Trying pydub...")
345
- try:
346
- # Ensure file is written before pydub tries to read
347
- time.sleep(0.1)
348
- audio_seg = AudioSegment.from_mp3(filepath)
349
- audio_duration = len(audio_seg) / 1000.0
350
- except Exception as e_dur_pd:
351
- logging.error(f"❌ Failed to get duration with pydub as well ({e_dur_pd}). Estimating duration.")
352
- # Estimate duration based on words (rough fallback)
353
- words_per_minute = 140 # Adjusted estimate
354
- num_words = len(text.split())
355
- audio_duration = max(1.0, (num_words / words_per_minute) * 60) # Ensure at least 1 second
356
-
357
- # Final duration sanity check
358
- if audio_duration < 0.5:
359
- logging.warning(f"⚠️ Calculated audio duration is very short ({audio_duration:.2f}s). Setting minimum to 1.0s.")
360
- audio_duration = 1.0
361
-
362
-
363
- logging.info(f"✅ gTTS generated successfully ({audio_duration:.2f}s).")
364
- success = True
365
- except gTTS.gTTSError as e_gtts:
366
- logging.error(f"❌ gTTS API Error: {e_gtts}")
367
- success = False
368
- except Exception as e:
369
- logging.error(f"❌ gTTS failed with unexpected error: {e}")
370
- traceback.print_exc()
371
- success = False
372
 
373
- return filepath if success else None, audio_duration if success else 0
374
 
 
375
 
376
- def resize_media_to_fill(clip, target_size):
377
- """Resizes a MoviePy clip (video or image) to fill the target size, cropping if necessary."""
378
- target_w, target_h = target_size
379
- if target_w == 0 or target_h == 0:
380
- logging.error("Target size cannot have zero dimensions.")
381
- return clip # Return original clip
382
 
383
- target_aspect = target_w / target_h
384
 
385
- # Ensure clip has size attribute
386
- if not hasattr(clip, 'size'):
387
- logging.error("Input clip does not have 'size' attribute.")
388
- return clip
389
- clip_w, clip_h = clip.size
390
- if clip_w == 0 or clip_h == 0:
391
- logging.warning("Input clip has zero dimensions. Cannot resize.")
392
- # Return a black clip of target size instead?
393
- return ColorClip(size=target_size, color=(0,0,0), duration=clip.duration if hasattr(clip, 'duration') else 1)
394
 
395
 
396
- clip_aspect = clip_w / clip_h
397
 
398
- if abs(clip_aspect - target_aspect) < 0.01: # Aspect ratios are close enough
399
- # Just resize to fit width, height should scale correctly
400
- return clip.resize(width=target_w)
401
 
402
- if clip_aspect > target_aspect:
403
- # Clip is wider than target: Resize based on height, then crop width
404
- resized_clip = clip.resize(height=target_h)
405
- # Calculate crop coordinates
406
- crop_x_center = resized_clip.w / 2
407
- crop_x1 = max(0, int(crop_x_center - target_w / 2))
408
- crop_x2 = min(resized_clip.w, int(crop_x_center + target_w / 2))
409
- # Adjust width if rounding caused issues
410
- if crop_x2 - crop_x1 != target_w:
411
- crop_x2 = crop_x1 + target_w
412
- if crop_x2 > resized_clip.w: # Ensure it doesn't go out of bounds
413
- crop_x2 = resized_clip.w
414
- crop_x1 = max(0, crop_x2 - target_w)
415
-
416
- return resized_clip.fx(vfx.crop, x1=crop_x1, y1=0, width=target_w, height=target_h)
417
- else:
418
- # Clip is taller than target: Resize based on width, then crop height
419
- resized_clip = clip.resize(width=target_w)
420
- # Calculate crop coordinates
421
- crop_y_center = resized_clip.h / 2
422
- crop_y1 = max(0, int(crop_y_center - target_h / 2))
423
- crop_y2 = min(resized_clip.h, int(crop_y_center + target_h / 2))
424
- # Adjust height if rounding caused issues
425
- if crop_y2 - crop_y1 != target_h:
426
- crop_y2 = crop_y1 + target_h
427
- if crop_y2 > resized_clip.h: # Ensure it doesn't go out of bounds
428
- crop_y2 = resized_clip.h
429
- crop_y1 = max(0, crop_y2 - target_h)
430
-
431
- return resized_clip.fx(vfx.crop, x1=0, y1=crop_y1, width=target_w, height=target_h)
432
-
433
-
434
- def apply_ken_burns(image_clip, duration, target_size, zoom_factor=1.15, direction='zoom_out'):
435
- """Applies Ken Burns effect (zoom in/out, simple pan) to an ImageClip."""
436
- if not isinstance(image_clip, ImageClip):
437
- logging.warning("Ken Burns effect can only be applied to ImageClips.")
438
- return image_clip.set_duration(duration) # Just set duration if not image
439
-
440
- # Ensure the input clip already matches the target size (or resize it)
441
- if image_clip.size != target_size:
442
- logging.info("Applying Ken Burns: Resizing image to fill target size first.")
443
- image_clip = resize_media_to_fill(image_clip, target_size)
444
-
445
- # Make sure the base clip has the correct duration before applying effects
446
- image_clip = image_clip.set_duration(duration)
447
-
448
- img_w, img_h = image_clip.size
449
-
450
- # Define the resize function based on time `t`
451
- def resize_func(t):
452
- if direction == 'zoom_out':
453
- # Zoom out: start at zoom_factor, end at 1.0
454
- current_zoom = 1 + (zoom_factor - 1) * (1 - t / duration)
455
- elif direction == 'zoom_in':
456
- # Zoom in: start at 1.0, end at zoom_factor
457
- current_zoom = 1 + (zoom_factor - 1) * (t / duration)
458
- else: # No zoom
459
- current_zoom = 1.0
460
- return current_zoom
461
-
462
- # Apply zoom effect
463
- zoomed_clip = image_clip.fx(vfx.resize, resize_func)
464
-
465
- # Simple Pan (optional, can be randomized)
466
- # Example: Pan slightly horizontally
467
- pan_intensity = 0.05 # Fraction of width/height to pan
468
- start_x_offset = 0
469
- end_x_offset = pan_intensity * img_w * random.choice([-1, 1]) # Pan left or right
470
- start_y_offset = 0
471
- end_y_offset = pan_intensity * img_h * random.choice([-1, 1]) # Pan up or down
472
-
473
- def position_func(t):
474
- current_x = start_x_offset + (end_x_offset - start_x_offset) * (t / duration)
475
- current_y = start_y_offset + (end_y_offset - start_y_offset) * (t / duration)
476
- # Position is relative to the zoomed clip's center
477
- center_x = zoomed_clip.w / 2 - current_x
478
- center_y = zoomed_clip.h / 2 - current_y
479
- return (center_x - target_size[0]/2, center_y - target_size[1]/2) # Top-left corner for crop
480
-
481
- # Apply cropping based on the calculated position
482
- # Use a function for position to simulate pan
483
- final_clip = zoomed_clip.fx(vfx.crop, x1=lambda t: position_func(t)[0], y1=lambda t: position_func(t)[1], width=target_size[0], height=target_size[1])
484
-
485
- return final_clip.set_duration(duration)
486
-
487
-
488
- def find_font(preferred_font=DEFAULT_FONT):
489
- """Tries to find a usable font file."""
490
- # 1. Check if preferred font exists directly (e.g., uploaded)
491
- if os.path.exists(preferred_font):
492
- logging.info(f"Using specified font: {preferred_font}")
493
- return preferred_font
494
-
495
- # 2. Common system font paths (Linux)
496
- font_paths = [
497
- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
498
- "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
499
- "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
500
- "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # If installed
501
- # Add more paths if needed
502
- ]
503
- for path in font_paths:
504
- if os.path.exists(path):
505
- logging.info(f"Found system font: {path}")
506
- return path
507
-
508
- # 3. Use MoviePy's default if nothing else is found
509
- logging.warning(f"Could not find specified font '{preferred_font}' or common system fonts. Relying on MoviePy's default.")
510
- return None # Let MoviePy use its internal default
511
-
512
-
513
- def create_caption_clip(text, duration, clip_size, font_size=None, font_path=None, color='white', stroke_color='black', stroke_width=1.5, position=('center', 'bottom'), margin_percent=5):
514
- """Creates a MoviePy TextClip for captions with wrapping and background."""
515
- width, height = clip_size
516
- max_text_width = width * 0.85 # Allow text to occupy 85% of the width
517
- margin = int(height * (margin_percent / 100)) # Margin based on percentage of height
518
-
519
- if font_size is None:
520
- font_size = max(20, int(height / 28)) # Dynamic font size based on height
521
-
522
- actual_font_path = find_font(font_path or DEFAULT_FONT)
523
-
524
- # Use Pillow for reliable text wrapping (MoviePy's can be inconsistent)
525
- try:
526
- pil_font = ImageFont.truetype(actual_font_path, font_size) if actual_font_path else ImageFont.load_default()
527
- except IOError:
528
- logging.warning(f"Failed to load font '{actual_font_path}' with Pillow. Using default.")
529
- pil_font = ImageFont.load_default()
530
- font_size = 18 # Reset font size if using default
531
-
532
- words = text.split()
533
- lines = []
534
- current_line = ""
535
-
536
- # Simple greedy word wrapping using Pillow's textlength
537
- for word in words:
538
- test_line = f"{current_line} {word}".strip()
539
- # Use textlength for more accurate width calculation
540
- line_width = pil_font.getlength(test_line)
541
- if line_width <= max_text_width:
542
- current_line = test_line
543
- else:
544
- if current_line: # Add the previous line if it wasn't empty
545
- lines.append(current_line)
546
- current_line = word # Start new line with the current word
547
- # Handle case where a single word is too long
548
- if pil_font.getlength(current_line) > max_text_width:
549
- logging.warning(f"Word '{current_line}' might be too long for caption width.")
550
- # Basic split for very long words (optional)
551
- # while pil_font.getlength(current_line) > max_text_width:
552
- # for i in range(len(current_line)-1, 0, -1):
553
- # if pil_font.getlength(current_line[:i]) <= max_text_width:
554
- # lines.append(current_line[:i] + '-')
555
- # current_line = current_line[i:]
556
- # break
557
- # else: # Cannot split further
558
- # break # Avoid infinite loop
559
-
560
- if current_line: # Add the last line
561
- lines.append(current_line)
562
-
563
- wrapped_text = "\n".join(lines)
564
-
565
- # Create the TextClip
566
- try:
567
- caption = TextClip(
568
- wrapped_text,
569
- fontsize=font_size,
570
- color=color,
571
- font=actual_font_path if actual_font_path else 'Arial', # Provide a common fallback font name
572
- stroke_color=stroke_color,
573
- stroke_width=stroke_width,
574
- method='caption', # Use caption method for potential internal wrapping
575
- size=(int(max_text_width), None), # Constrain width for wrapping
576
- align='center'
577
- )
578
 
579
- # Add a semi-transparent background for better readability
580
- bg_color = (0, 0, 0) # Black background
581
- bg_opacity = 0.5
582
- # Create a ColorClip slightly larger than the text
583
- txt_width, txt_height = caption.size
584
- bg_padding = int(font_size * 0.3) # Padding around text
585
- bg_clip = ColorClip(
586
- size=(txt_width + 2 * bg_padding, txt_height + 2 * bg_padding),
587
- color=bg_color,
588
- ismask=False,
589
- duration=duration
590
- ).set_opacity(bg_opacity)
591
-
592
- # Composite text on background
593
- caption_with_bg = CompositeVideoClip([
594
- bg_clip.set_position('center'),
595
- caption.set_position('center')
596
- ], size=bg_clip.size).set_duration(duration)
597
 
598
 
599
- except Exception as e:
600
- logging.error(f"Error creating TextClip (maybe font issue?): {e}. Using simple TextClip.")
601
- traceback.print_exc()
602
- # Fallback to simpler TextClip without stroke/bg if needed
603
- caption_with_bg = TextClip(wrapped_text, fontsize=font_size, color=color, method='caption', size=(int(max_text_width), None), align='center').set_duration(duration)
604
-
605
-
606
- # Set position with margin
607
- # MoviePy position can be tricky, calculate manually
608
- final_pos = ['center', 'center'] # Default
609
- caption_w, caption_h = caption_with_bg.size
610
-
611
- if isinstance(position, tuple) or isinstance(position, list):
612
- pos_x, pos_y = position
613
- # Horizontal positioning
614
- if 'left' in pos_x: final_pos[0] = margin
615
- elif 'right' in pos_x: final_pos[0] = width - caption_w - margin
616
- else: final_pos[0] = (width - caption_w) / 2 # Center default
617
- # Vertical positioning
618
- if 'top' in pos_y: final_pos[1] = margin
619
- elif 'bottom' in pos_y: final_pos[1] = height - caption_h - margin
620
- else: final_pos[1] = (height - caption_h) / 2 # Center default
621
-
622
- # Ensure positions are integers
623
- final_pos = (int(final_pos[0]), int(final_pos[1]))
624
-
625
- caption_with_bg = caption_with_bg.set_position(final_pos).set_duration(duration)
626
- return caption_with_bg
627
-
628
-
629
- def create_clip(media_path, media_type, audio_path, audio_duration, target_size, add_captions, narration_text, segment_index):
630
- """Creates a single video clip from media, audio, and optional captions."""
631
- logging.info(f"--- Creating Clip {segment_index + 1} ---")
632
- logging.info(f"Type: {media_type}, Audio Duration: {audio_duration:.2f}s, Target Size: {target_size}")
633
- main_clip = None
634
- audio_clip = None
 
635
 
636
  try:
637
- # --- Load Audio ---
638
- logging.info("Loading audio...")
639
- audio_clip = AudioFileClip(audio_path)
640
- # Verify audio duration and clamp if necessary
641
- if abs(audio_clip.duration - audio_duration) > 0.2: # Allow slightly larger diff
642
- logging.warning(f"Audio file duration ({audio_clip.duration:.2f}s) differs significantly from expected ({audio_duration:.2f}s). Using file duration.")
643
- audio_duration = audio_clip.duration
644
- # Ensure minimum duration
645
- if audio_duration < 0.5:
646
- logging.warning(f"Audio duration is very short ({audio_duration:.2f}s). Clamping to 0.5s.")
647
- audio_duration = 0.5
648
- # Trim audio clip precisely
649
- audio_clip = audio_clip.subclip(0, audio_duration)
650
- logging.info("Audio loaded.")
651
-
652
- # --- Load Media (Video, Image, or Color) ---
653
- if media_type == 'video':
654
- logging.info(f"Loading video: {media_path}")
655
- try:
656
- # Load with target resolution hint, disable audio from video file
657
- video_clip_raw = VideoFileClip(media_path, audio=False, target_resolution=(target_size[1], target_size[0]))
658
 
659
- # Trim or loop video to match audio duration
660
- if video_clip_raw.duration >= audio_duration:
661
- video_clip_timed = video_clip_raw.subclip(0, audio_duration)
662
- else:
663
- # Loop the video if it's shorter
664
- logging.info(f"Video duration ({video_clip_raw.duration:.2f}s) shorter than audio ({audio_duration:.2f}s). Looping video.")
665
- # Use fx.loop for cleaner looping
666
- video_clip_timed = video_clip_raw.fx(vfx.loop, duration=audio_duration)
667
- # Fallback: manual concatenate (less efficient)
668
- # num_loops = math.ceil(audio_duration / video_clip_raw.duration)
669
- # video_clip_timed = concatenate_videoclips([video_clip_raw] * num_loops).subclip(0, audio_duration)
670
-
671
- main_clip = resize_media_to_fill(video_clip_timed, target_size)
672
- logging.info("Video processed.")
673
- # Clean up raw clip explicitly? Moviepy should handle it, but just in case
674
- # video_clip_raw.close() # Might cause issues if timed clip still references it
675
-
676
- except Exception as e:
677
- logging.error(f"❌ Failed to load/process video '{media_path}': {e}. Using black clip.")
678
- traceback.print_exc()
679
- main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
680
-
681
- elif media_type == 'image':
682
- logging.info(f"Loading image: {media_path}")
683
- try:
684
- img_clip_base = ImageClip(media_path)
685
- # Apply Ken Burns effect (includes resizing and duration setting)
686
- main_clip = apply_ken_burns(img_clip_base, audio_duration, target_size)
687
- logging.info("Image processed with Ken Burns effect.")
688
- except Exception as e:
689
- logging.error(f"❌ Failed to load/process image '{media_path}': {e}. Using black clip.")
690
- traceback.print_exc()
691
- main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
692
-
693
- else: # Includes 'color' type or any unexpected type
694
- logging.info(f"Media type is '{media_type}'. Using black background.")
695
- main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
696
-
697
- # --- Combine Video/Image and Audio ---
698
- if main_clip and audio_clip:
699
- # Ensure main_clip has correct duration before setting audio
700
- main_clip = main_clip.set_duration(audio_duration)
701
- main_clip = main_clip.set_audio(audio_clip)
702
- logging.info("Audio attached to visual clip.")
703
- elif main_clip:
704
- logging.warning("Audio clip was not loaded successfully. Video will be silent.")
705
- main_clip = main_clip.set_duration(audio_duration)
706
  else:
707
- logging.error(" Failed to create main visual clip. Skipping segment.")
708
- if audio_clip: audio_clip.close()
709
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
 
712
- # --- Add Captions (if enabled) ---
713
- final_composite_clip = main_clip # Start with the main clip
 
 
 
 
714
 
715
- if add_captions and narration_text:
716
- logging.info("Adding captions...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
  try:
718
- caption_clip = create_caption_clip(
719
- narration_text,
720
- audio_duration,
721
- target_size,
722
- font_path=DEFAULT_FONT # Pass the default font path
723
- )
724
- # Composite caption on top of the main clip
725
- final_composite_clip = CompositeVideoClip([main_clip, caption_clip], size=target_size)
726
- logging.info("Captions added.")
727
- except Exception as e:
728
- logging.error(f"❌ Failed to create or composite captions: {e}")
729
- traceback.print_exc()
730
- # Proceed without captions if creation failed
731
- final_composite_clip = main_clip
732
- else:
733
- logging.info("Captions disabled or no narration text.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
 
 
 
 
 
 
735
 
736
- logging.info(f"✅ Clip {segment_index + 1} created successfully.")
737
- # Return the final composited clip (with or without captions)
738
- return final_composite_clip
 
 
 
 
739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
  except Exception as e:
741
- logging.error(f" Failed to create clip {segment_index + 1}: {e}")
742
- traceback.print_exc()
743
- # Ensure cleanup on error
744
- if main_clip and hasattr(main_clip, 'close'): main_clip.close()
745
- if audio_clip and hasattr(audio_clip, 'close'): audio_clip.close()
746
  return None
747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
 
749
- def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
750
- """Adds background music to the final video clip."""
751
- if not os.path.exists(music_file):
752
- logging.warning(f"Background music file '{music_file}' not found. Skipping BGM.")
753
- return video_clip
754
- # Check if file is empty or too small
755
- if os.path.getsize(music_file) < 1024:
756
- logging.warning(f"Background music file '{music_file}' is very small. Skipping BGM.")
757
- return video_clip
 
 
 
 
 
758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759
 
760
- logging.info(f"Adding background music from {music_file}")
761
- bgm_clip = None
762
- original_audio = video_clip.audio # Get existing audio first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  try:
765
- bgm_clip = AudioFileClip(music_file)
766
- video_duration = video_clip.duration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
 
768
- # Loop or trim BGM
769
- if bgm_clip.duration < video_duration:
770
- logging.info(f"Looping BGM (duration {bgm_clip.duration:.2f}s) for video ({video_duration:.2f}s)")
771
- bgm_clip = bgm_clip.fx(afx.audio_loop, duration=video_duration)
772
  else:
773
- bgm_clip = bgm_clip.subclip(0, video_duration)
774
-
775
- # Adjust volume
776
- bgm_clip = bgm_clip.volumex(volume)
777
-
778
- # Combine with existing audio
779
- if original_audio:
780
- logging.info("Combining narration audio with BGM.")
781
- # Ensure original audio has same duration as video clip for composite
782
- if abs(original_audio.duration - video_duration) > 0.1:
783
- logging.warning("Original audio duration doesn't match video, trimming/padding original audio.")
784
- # This shouldn't happen if clips were created correctly, but as a safeguard:
785
- original_audio = original_audio.subclip(0, video_duration) # Trim if longer
786
- # Padding if shorter is harder, CompositeAudioClip might handle it
787
-
788
- combined_audio = CompositeAudioClip([original_audio, bgm_clip])
789
- else:
790
- logging.warning("Video clip has no primary audio. Adding BGM only.")
791
- combined_audio = bgm_clip
792
 
793
- video_clip_with_bgm = video_clip.set_audio(combined_audio)
794
- logging.info("✅ Background music added.")
 
795
 
796
- # Close intermediate clips AFTER successful composition
797
- # bgm_clip.close() # CompositeAudioClip might still need it? Test this.
798
- # if original_audio: original_audio.close() # Same potential issue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
 
800
- return video_clip_with_bgm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
 
802
  except Exception as e:
803
- logging.error(f" Failed to add background music: {e}")
804
- traceback.print_exc()
805
- # Clean up BGM clip if it was loaded
806
- if bgm_clip and hasattr(bgm_clip, 'close'): bgm_clip.close()
807
- # Return original clip without BGM on failure
808
- return video_clip
809
-
810
-
811
- # --- Main Gradio Function ---
812
- def generate_video_process(topic, resolution_choice, add_captions_option, add_bgm_option, progress=gr.Progress(track_tqdm=True)):
813
- """The main function called by Gradio to generate the video."""
814
- start_time = time.time()
815
- status_log = ["--- Starting Video Generation ---"]
816
- temp_dir = None
817
- final_video_path = None
818
- clips = [] # Keep track of created clips for cleanup
819
-
820
- # --- Setup ---
 
 
 
 
 
 
 
 
821
  try:
822
- temp_dir = tempfile.mkdtemp(prefix=TEMP_FOLDER_BASE + "_")
823
- status_log.append(f"✅ Temporary directory created: {temp_dir}")
824
- logging.info(f"Using temp directory: {temp_dir}")
825
-
826
- target_size = (1920, 1080) if resolution_choice == "Full HD (16:9)" else (1080, 1920) # W, H
827
- pexels_orientation = "landscape" if resolution_choice == "Full HD (16:9)" else "portrait"
828
- status_log.append(f"⚙️ Target resolution: {target_size[0]}x{target_size[1]}")
829
- status_log.append(f"⚙️ Pexels orientation: {pexels_orientation}")
830
- status_log.append(f"⚙️ Add Captions: {add_captions_option}")
831
- status_log.append(f"⚙️ Add BGM: {add_bgm_option}")
832
-
833
- # --- 1. Generate Script ---
834
  progress(0.1, desc="Generating script...")
835
- status_log.append("\n🔄 Generating script...")
836
- script = generate_script(topic, OPENROUTER_API_KEY, OPENROUTER_MODEL)
837
  if not script:
838
- status_log.append(" Script generation failed. Check API key, model, and connection.")
839
- # No cleanup needed yet, just return
840
- return "\n".join(status_log), None
841
- status_log.append("✅ Script generated.")
842
- # logging.debug(f"Raw Script:\n{script}") # Log full script for debugging
843
 
844
- # --- 2. Parse Script ---
845
  progress(0.2, desc="Parsing script...")
846
- status_log.append("\n🔄 Parsing script...")
847
  elements = parse_script(script)
848
- if not elements or len(elements) < 2:
849
- status_log.append(" Script parsing failed. Check script format from LLM.")
850
- return "\n".join(status_log), None
851
- num_segments = len(elements) // 2
852
- status_log.append(f"✅ Script parsed into {num_segments} segments.")
853
-
854
- # --- 3. Process Segments ---
855
- total_duration = 0
856
  for i in range(0, len(elements), 2):
857
- segment_index = i // 2
858
- progress_val = 0.2 + (0.6 * (segment_index / num_segments))
859
- progress(progress_val, desc=f"Processing segment {segment_index + 1}/{num_segments}")
860
 
861
- # Check if elements exist before accessing
862
- if i + 1 >= len(elements):
863
- logging.warning(f"⚠️ Found scene element at index {i} but no corresponding narration. Skipping.")
864
- continue
865
 
866
- scene_elem = elements[i]
867
- narration_elem = elements[i+1]
 
 
 
868
 
869
- # Validate element types (optional but good practice)
870
- if scene_elem.get("type") != "scene" or narration_elem.get("type") != "narration":
871
- logging.warning(f"⚠️ Unexpected element types at index {i}/{i+1}. Skipping segment.")
872
  continue
873
 
874
- scene_prompt = scene_elem.get('prompt', '').strip()
875
- narration_text = narration_elem.get('text', '').strip()
876
-
877
- if not scene_prompt or not narration_text:
878
- logging.warning(f"⚠️ Segment {segment_index + 1} has empty scene prompt or narration. Skipping.")
879
- status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments}: SKIPPED (Empty prompt/narration) ---")
880
- continue
881
-
882
- status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments} ---")
883
- status_log.append(f"📝 Scene: {scene_prompt}")
884
- status_log.append(f"🗣️ Narration: {narration_text[:100]}...")
885
-
886
- # 3a. Generate TTS
887
- status_log.append("🔄 Generating narration...")
888
- tts_path, tts_duration = generate_tts(narration_text, 'en', temp_dir, segment_index)
889
- if not tts_path or tts_duration <= 0.1:
890
- status_log.append(f"⚠️ TTS failed. Skipping segment.")
891
- logging.warning(f"Skipping segment {segment_index+1} due to TTS failure.")
892
  continue
893
- status_log.append(f"✅ Narration generated ({tts_duration:.2f}s)")
894
- total_duration += tts_duration
895
-
896
- # 3b. Search & Download Media
897
- status_log.append("🔄 Finding media...")
898
- media_path = None
899
- media_type = None
900
-
901
- # Try Pexels Video first with correct orientation
902
- video_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="videos", orientation=pexels_orientation)
903
- if video_results:
904
- selected_media = random.choice(video_results)
905
- status_log.append(f"⬇️ Downloading Pexels video...")
906
- media_path = download_media(selected_media['url'], temp_dir)
907
- if media_path:
908
- media_type = 'video'
909
- status_log.append(f"✅ Video downloaded.")
910
- else:
911
- status_log.append("⚠️ Video download failed.")
912
-
913
- # Try Pexels Image if video fails/not found (correct orientation)
914
- if not media_path:
915
- status_log.append("🔄 No suitable video. Searching images...")
916
- image_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="photos", orientation=pexels_orientation)
917
- if image_results:
918
- selected_media = random.choice(image_results)
919
- status_log.append(f"⬇️ Downloading Pexels image...")
920
- media_path = download_media(selected_media['url'], temp_dir)
921
- if media_path:
922
- media_type = 'image'
923
- status_log.append(f"✅ Image downloaded.")
924
- else:
925
- status_log.append("⚠️ Image download failed.")
926
 
927
- # Fallback: Black screen
928
- if not media_path:
929
- status_log.append(f"⚠️ No media found for '{scene_prompt}'. Using black screen.")
930
- media_type = 'color'
931
- media_path = None # No path needed
932
-
933
- # 3c. Create Clip
934
- status_log.append(f"🎬 Creating clip...")
935
  clip = create_clip(
936
- media_path=media_path,
937
- media_type=media_type,
938
- audio_path=tts_path,
939
- audio_duration=tts_duration,
940
- target_size=target_size,
941
- add_captions=add_captions_option,
942
- narration_text=narration_text,
943
- segment_index=segment_index
 
944
  )
945
-
946
  if clip:
947
  clips.append(clip)
948
- status_log.append(f"✅ Clip created.")
949
  else:
950
- status_log.append(f" Failed to create clip. Skipping segment.")
951
- logging.error(f"Failed to create clip {segment_index+1}, skipping.")
952
-
953
 
954
  if not clips:
955
- status_log.append("\n❌ No valid clips were created. Cannot generate video.")
956
- # No cleanup needed beyond temp dir removal in finally block
957
- return "\n".join(status_log), None
958
 
959
- status_log.append(f"\n✅ Successfully created {len(clips)} video clips.")
960
- status_log.append(f"⏱️ Estimated total video duration: {total_duration:.2f} seconds.")
961
 
962
- # --- 4. Concatenate Clips ---
963
- progress(0.85, desc="Combining video clips...")
964
- status_log.append("\n🔄 Combining video clips...")
965
- final_clip = None # Define final_clip before try block
966
- try:
967
- # Use method="compose" - might be better for clips with varying sources/codecs
968
- final_clip = concatenate_videoclips(clips, method="compose", padding = -0.1) # Small overlap?
969
- status_log.append("✅ Clips combined successfully.")
970
- except Exception as e:
971
- status_log.append(f"❌ Error concatenating clips: {e}")
972
- logging.error(f"Concatenation failed: {e}")
973
- traceback.print_exc()
974
- # Ensure final_clip is None if concatenation fails
975
- final_clip = None
976
- # Fall through to finally block for cleanup
977
-
978
- # --- 5. Add Background Music (Optional) ---
979
- if final_clip and add_bgm_option:
980
- progress(0.9, desc="Adding background music...")
981
- status_log.append("\n🔄 Adding background music...")
982
- final_clip = add_background_music(final_clip, music_file=BGM_FILE, volume=BGM_VOLUME)
983
- # Status logged within the function
984
-
985
- # --- 6. Write Final Video ---
986
- if final_clip:
987
- progress(0.95, desc="Writing final video file...")
988
- status_log.append("\n💾 Writing final video file (this may take time)...")
989
- output_path = os.path.join(temp_dir, OUTPUT_VIDEO_FILENAME)
990
- writer_logger = logging.getLogger("moviepy_writer")
991
- writer_logger.setLevel(logging.WARNING) # Reduce moviepy verbosity during write
992
 
993
- try:
994
- final_clip.write_videofile(
995
- output_path,
996
- codec='libx264',
997
- audio_codec='aac',
998
- temp_audiofile=os.path.join(temp_dir, 'temp_audio.aac'), # Explicit temp audio file
999
- remove_temp=True,
1000
- preset='medium', # 'medium' is good balance, 'fast' or 'ultrafast' for speed
1001
- fps=24,
1002
- threads=max(1, os.cpu_count() // 2), # Use half available cores
1003
- logger=None # Use None or 'bar', avoid default verbose logger
1004
- )
1005
- status_log.append(f"✅ Final video saved: {os.path.basename(output_path)}")
1006
- final_video_path = output_path # Set the path to be returned
1007
- except Exception as e:
1008
- status_log.append(f"❌ Error writing final video file: {e}")
1009
- logging.error(f"Final video write failed: {e}")
1010
- traceback.print_exc()
1011
- final_video_path = None # Ensure no path is returned on failure
1012
- else:
1013
- status_log.append("\n❌ Skipping final video write because clip combination failed.")
1014
- final_video_path = None
1015
 
 
 
1016
 
1017
  except Exception as e:
1018
- status_log.append(f"\n❌ An critical error occurred during video generation: {e}")
1019
- logging.error("An critical error occurred in generate_video_process:")
1020
- logging.error(traceback.format_exc())
1021
- final_video_path = None # Ensure failure state
1022
 
1023
  finally:
1024
- # --- 7. Cleanup ---
1025
- status_log.append("\n🧹 Cleaning up resources...")
1026
- # Close all individual clips first
1027
- for i, clip in enumerate(clips):
1028
- try:
1029
- if clip: clip.close()
1030
- logging.debug(f"Closed clip {i+1}")
1031
- except Exception as e_close:
1032
- logging.warning(f"Error closing clip {i+1}: {e_close}")
1033
- # Close the final concatenated clip if it exists
1034
- try:
1035
- if final_clip: final_clip.close()
1036
- logging.debug("Closed final clip")
1037
- except Exception as e_final_close:
1038
- logging.warning(f"Error closing final clip: {e_final_close}")
1039
-
1040
- # Remove the temporary directory
1041
- if temp_dir and os.path.exists(temp_dir):
1042
- try:
1043
- # Add retries for shutil.rmtree on potential lingering file handles
1044
- attempts = 3
1045
- for attempt in range(attempts):
1046
- try:
1047
- shutil.rmtree(temp_dir)
1048
- status_log.append(f"✅ Temporary directory removed: {os.path.basename(temp_dir)}")
1049
- logging.info(f"Cleaned up temp directory: {temp_dir}")
1050
- break # Success
1051
- except OSError as e_rm:
1052
- if attempt < attempts - 1:
1053
- logging.warning(f"Attempt {attempt+1} failed to remove temp dir {temp_dir}: {e_rm}. Retrying in 1s...")
1054
- time.sleep(1)
1055
- else:
1056
- raise # Raise the error on the last attempt
1057
- except Exception as e_clean:
1058
- status_log.append(f"⚠️ Error cleaning up temporary directory {temp_dir}: {e_clean}")
1059
- logging.error(f"Cleanup failed for {temp_dir}: {e_clean}")
1060
- else:
1061
- status_log.append("ℹ️ No temporary directory to remove or already removed.")
1062
-
1063
 
1064
- end_time = time.time()
1065
- total_time = end_time - start_time
1066
- status_log.append(f"\n--- Generation Finished ---")
1067
- status_log.append(f"⏱️ Total time: {total_time:.2f} seconds")
1068
-
1069
- progress(1.0, desc="Finished!")
1070
- return "\n".join(status_log), final_video_path
1071
-
1072
-
1073
- # --- Gradio Interface Definition ---
1074
- with gr.Blocks(css="footer {display: none !important}") as iface: # Hide Gradio footer
1075
- gr.Markdown("# 🤖 AI Documentary Generator v2")
1076
- gr.Markdown("Enter a topic, choose settings, and let AI create a short video. Uses OpenRouter for script, Pexels for media, gTTS for narration, and MoviePy for assembly.")
1077
 
1078
  with gr.Row():
1079
- with gr.Column(scale=1):
1080
- topic_input = gr.Textbox(
1081
- label="Video Topic",
1082
- placeholder="e.g., The History of Coffee, Secrets of the Deep Ocean",
1083
- lines=2
1084
- )
1085
- resolution_input = gr.Radio(
1086
- label="Video Format",
1087
- choices=["Short (9:16)", "Full HD (16:9)"],
1088
- value="Short (9:16)"
1089
- )
1090
- captions_input = gr.Checkbox(label="Add Captions (with background)", value=True)
1091
-
1092
- # Check for BGM file and enable checkbox accordingly
1093
- bgm_exists = os.path.exists(BGM_FILE) and os.path.getsize(BGM_FILE) > 1024
1094
- bgm_label = f"Add Background Music ({os.path.basename(BGM_FILE)})" if bgm_exists else f"Add Background Music (File '{BGM_FILE}' not found or empty)"
1095
- bgm_input = gr.Checkbox(label=bgm_label, value=bgm_exists, interactive=bgm_exists)
1096
 
 
 
 
 
 
 
 
 
 
 
1097
 
1098
- generate_button = gr.Button("Generate Video", variant="primary")
1099
-
1100
- with gr.Column(scale=2):
1101
- status_output = gr.Textbox(label="📜 Status Log", lines=20, interactive=False, autoscroll=True)
1102
- video_output = gr.Video(label="🎬 Generated Video")
1103
 
1104
  generate_button.click(
1105
- fn=generate_video_process,
1106
- inputs=[topic_input, resolution_input, captions_input, bgm_input],
1107
  outputs=[status_output, video_output]
1108
  )
1109
 
1110
- gr.Examples(
1111
- examples=[
1112
- ["The lifecycle of a monarch butterfly", "Short (9:16)", True, True],
1113
- ["The construction of the Eiffel Tower", "Full HD (16:9)", True, False],
1114
- ["The impact of renewable energy sources", "Short (9:16)", True, True],
1115
- ["A brief history of the internet", "Full HD (16:9)", True, True],
1116
- ],
1117
- inputs=[topic_input, resolution_input, captions_input, bgm_input],
1118
- label="Example Topics"
1119
- )
1120
-
1121
- # --- Launch the App ---
1122
  if __name__ == "__main__":
1123
- # Create a silent placeholder BGM file if needed and BGM checkbox requires it
1124
- if not os.path.exists(BGM_FILE) or os.path.getsize(BGM_FILE) < 1024:
1125
- logging.warning(f"Background music file '{BGM_FILE}' not found or empty. Creating a silent placeholder.")
1126
- try:
1127
- silent_segment = AudioSegment.silent(duration=1000) # 1 second silence
1128
- silent_segment.export(BGM_FILE, format="mp3")
1129
- logging.info(f"Created silent placeholder BGM file: {BGM_FILE}")
1130
- except Exception as e:
1131
- logging.error(f"Could not create placeholder BGM file: {e}")
1132
-
1133
-
1134
- # Check for API keys (already hardcoded, but good practice)
1135
- if not PEXELS_API_KEY or len(PEXELS_API_KEY) < 50: # Basic length check
1136
- logging.warning("PEXELS_API_KEY seems invalid or missing.")
1137
- if not OPENROUTER_API_KEY or not OPENROUTER_API_KEY.startswith("sk-or-v1-"):
1138
- logging.warning("OPENROUTER_API_KEY seems invalid or missing.")
1139
-
1140
- # Launch Gradio app
1141
- iface.queue().launch(debug=False, share=False) # Use queue for handling multiple requests, disable debug/share for production
 
1
+ # app.py
2
  import gradio as gr
3
+ import soundfile as sf
4
+ import torch
5
+ from IPython.display import display, Audio, HTML
6
+ import soundfile as sf
7
  import os
8
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
9
+ from PIL import Image
10
+ import tempfile
 
 
11
  import random
12
+ import cv2
13
  import math
14
+ import os, requests, io, time, re, random
 
 
 
 
 
15
  from moviepy.editor import (
16
+ VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
17
+ CompositeVideoClip, TextClip, CompositeAudioClip
18
  )
19
  import moviepy.video.fx.all as vfx
20
+ import moviepy.config as mpy_config
21
  from pydub import AudioSegment
22
+ from pydub.generators import Sine
23
  from PIL import Image, ImageDraw, ImageFont
24
+ import numpy as np
25
  from bs4 import BeautifulSoup
26
+ import base64
27
  from urllib.parse import quote
28
+ import pysrt
29
  from gtts import gTTS
30
+ import shutil
31
+ import webbrowser # This won't work in HF Spaces, but keep for local testing reference
32
+ import sys
33
 
34
+ # --- API Keys (Embed directly as requested for private space) ---
 
 
35
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
36
+ OPENROUTER_API_KEY = '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'
37
+ OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
38
 
39
+ # --- Global Configuration ---
40
+ TEMP_FOLDER = "temp_video_processing"
41
+ OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 
 
42
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
43
+
44
+ # --- Initialize Kokoro TTS pipeline (if compatible with HF CPU) ---
45
+ # NOTE: Kokoro might be too resource-intensive for free CPU spaces.
46
+ # If it causes issues, you might need to remove it and rely solely on gTTS.
47
+ try:
48
+ from kokoro import KPipeline
49
+ pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
50
+ print("Kokoro TTS pipeline initialized.")
51
+ except ImportError:
52
+ print("Kokoro library not found or failed to initialize. Will rely on gTTS.")
53
+ pipeline = None
54
+ except Exception as e:
55
+ print(f"Error initializing Kokoro: {e}. Will rely on gTTS.")
56
+ pipeline = None
57
+
58
+
59
+ # Ensure ImageMagick binary is set (might need adjustment for HF Spaces)
60
+ # This path might differ in the HF environment.
61
+ # If this causes issues, you might need to use a Dockerfile or configure the space differently.
62
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
63
+
64
+ # --- Helper Functions (from original script) ---
65
+
66
+ def generate_script(user_input):
67
+ """Generate documentary script with proper OpenRouter handling."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  headers = {
69
+ 'Authorization': f'Bearer {OPENROUTER_API_KEY}',
70
+ 'HTTP-Referer': 'https://your-domain.com',
71
+ 'X-Title': 'AI Documentary Maker'
 
72
  }
 
 
 
 
 
 
 
 
 
 
73
 
74
+ prompt = f"""Short Documentary Script GeneratorInstructions:
 
75
 
76
+ If I say "use this," just output the script exactly as I gave it.
77
+ If I only give topics, generate a script based on them.
78
+ If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
79
+ And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
80
+ Formatting Rules:
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ Title in Square Brackets:
 
 
 
 
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ Each section starts with a one-word title inside [ ] (max two words if necessary).
87
+ This title will be used as a search term for Pexels footage.
88
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
+ Casual & Funny Narration:
 
92
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ Each section has 5-10 words of narration.
95
+ Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
 
 
 
 
 
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ No Special Formatting:
 
 
 
100
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 
 
 
 
 
 
 
 
 
103
 
 
104
 
 
 
105
 
106
+ Generalized Search Terms:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
 
108
 
109
+ If a term is too specific, make it more general for Pexels search.
110
 
 
 
 
 
 
 
111
 
 
112
 
113
+ Scene-Specific Writing:
 
 
 
 
 
 
 
 
114
 
115
 
116
+ Each section describes only what should be shown in the video.
117
 
 
 
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
 
123
+ No extra text, just the script.
124
+
125
+
126
+
127
+ Example Output:
128
+ [North Korea]
129
+
130
+ Top 5 unknown facts about North Korea.
131
+
132
+ [Invisibility]
133
+
134
+ North Korea’s internet speed is so fast… it doesn’t exist.
135
+
136
+ [Leadership]
137
+
138
+ Kim Jong-un once won an election with 100% votes… against himself.
139
+
140
+ [Magic]
141
+
142
+ North Korea discovered time travel. That’s why their news is always from the past.
143
+
144
+ [Warning]
145
+
146
+ Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
147
+
148
+ [Freedom]
149
+
150
+ North Korean citizens can do anything… as long as it's government-approved.
151
+ Now here is the Topic/scrip: {user_input}
152
+ """
153
+
154
+ data = {
155
+ 'model': OPENROUTER_MODEL,
156
+ 'messages': [{'role': 'user', 'content': prompt}],
157
+ 'temperature': 0.4,
158
+ 'max_tokens': 5000
159
+ }
160
 
161
  try:
162
+ response = requests.post(
163
+ 'https://openrouter.ai/api/v1/chat/completions',
164
+ headers=headers,
165
+ json=data,
166
+ timeout=30
167
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ print("API Response Status:", response.status_code)
170
+ if response.status_code == 200:
171
+ response_data = response.json()
172
+ if 'choices' in response_data and len(response_data['choices']) > 0:
173
+ return response_data['choices'][0]['message']['content']
174
+ else:
175
+ print("Unexpected response format:", response_data)
176
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  else:
178
+ print(f"API Error {response.status_code}: {response.text}")
179
+ return None
180
+
181
+ except Exception as e:
182
+ print(f"Request failed: {str(e)}")
183
+ return None
184
+
185
+ def parse_script(script_text):
186
+ """
187
+ Parse the generated script into a list of elements.
188
+ For each section, create two elements:
189
+ - A 'media' element using the section title as the visual prompt.
190
+ - A 'tts' element with the narration text, voice info, and computed duration.
191
+ """
192
+ sections = {}
193
+ current_title = None
194
+ current_text = ""
195
 
196
+ try:
197
+ for line in script_text.splitlines():
198
+ line = line.strip()
199
+ if line.startswith("[") and "]" in line:
200
+ bracket_start = line.find("[")
201
+ bracket_end = line.find("]", bracket_start)
202
+ if bracket_start != -1 and bracket_end != -1:
203
+ if current_title is not None:
204
+ sections[current_title] = current_text.strip()
205
+ current_title = line[bracket_start+1:bracket_end]
206
+ current_text = line[bracket_end+1:].strip()
207
+ elif current_title:
208
+ current_text += line + " "
209
+
210
+ if current_title:
211
+ sections[current_title] = current_text.strip()
212
+
213
+ elements = []
214
+ for title, narration in sections.items():
215
+ if not title or not narration:
216
+ continue
217
 
218
+ media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
219
+ words = narration.split()
220
+ duration = max(3, len(words) * 0.5)
221
+ tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
222
+ elements.append(media_element)
223
+ elements.append(tts_element)
224
 
225
+ return elements
226
+ except Exception as e:
227
+ print(f"Error parsing script: {e}")
228
+ return []
229
+
230
+ def search_pexels_videos(query, pexels_api_key):
231
+ """Search for a video on Pexels by query and return a random HD video."""
232
+ headers = {'Authorization': pexels_api_key}
233
+ base_url = "https://api.pexels.com/videos/search"
234
+ num_pages = 3
235
+ videos_per_page = 15
236
+ max_retries = 3
237
+ retry_delay = 1
238
+ search_query = query
239
+ all_videos = []
240
+
241
+ for page in range(1, num_pages + 1):
242
+ for attempt in range(max_retries):
243
  try:
244
+ params = {"query": search_query, "per_page": videos_per_page, "page": page}
245
+ response = requests.get(base_url, headers=headers, params=params, timeout=10)
246
+
247
+ if response.status_code == 200:
248
+ data = response.json()
249
+ videos = data.get("videos", [])
250
+ if not videos:
251
+ break
252
+ for video in videos:
253
+ video_files = video.get("video_files", [])
254
+ for file in video_files:
255
+ if file.get("quality") == "hd":
256
+ all_videos.append(file.get("link"))
257
+ break
258
+ break
259
+ elif response.status_code == 429:
260
+ time.sleep(retry_delay)
261
+ retry_delay *= 2
262
+ else:
263
+ if attempt < max_retries - 1:
264
+ time.sleep(retry_delay)
265
+ retry_delay *= 2
266
+ else:
267
+ break
268
+ except requests.exceptions.RequestException as e:
269
+ if attempt < max_retries - 1:
270
+ time.sleep(retry_delay)
271
+ retry_delay *= 2
272
+ else:
273
+ break
274
 
275
+ if all_videos:
276
+ random_video = random.choice(all_videos)
277
+ return random_video
278
+ else:
279
+ return None
280
 
281
+ def search_pexels_images(query, pexels_api_key):
282
+ """Search for an image on Pexels by query."""
283
+ headers = {'Authorization': pexels_api_key}
284
+ url = "https://api.pexels.com/v1/search"
285
+ params = {"query": query, "per_page": 5, "orientation": "landscape"}
286
+ max_retries = 3
287
+ retry_delay = 1
288
 
289
+ for attempt in range(max_retries):
290
+ try:
291
+ response = requests.get(url, headers=headers, params=params, timeout=10)
292
+ if response.status_code == 200:
293
+ data = response.json()
294
+ photos = data.get("photos", [])
295
+ if photos:
296
+ photo = random.choice(photos[:min(5, len(photos))])
297
+ img_url = photo.get("src", {}).get("original")
298
+ return img_url
299
+ else:
300
+ return None
301
+ elif response.status_code == 429:
302
+ time.sleep(retry_delay)
303
+ retry_delay *= 2
304
+ else:
305
+ if attempt < max_retries - 1:
306
+ time.sleep(retry_delay)
307
+ retry_delay *= 2
308
+ except requests.exceptions.RequestException as e:
309
+ if attempt < max_retries - 1:
310
+ time.sleep(retry_delay)
311
+ retry_delay *= 2
312
+
313
+ return None
314
+
315
+ def search_google_images(query):
316
+ """Search for images on Google Images (for news-related queries)"""
317
+ try:
318
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
319
+ headers = {"User-Agent": USER_AGENT}
320
+ response = requests.get(search_url, headers=headers, timeout=10)
321
+ soup = BeautifulSoup(response.text, "html.parser")
322
+ img_tags = soup.find_all("img")
323
+ image_urls = []
324
+ for img in img_tags:
325
+ src = img.get("src", "")
326
+ if src.startswith("http") and "gstatic" not in src:
327
+ image_urls.append(src)
328
+ if image_urls:
329
+ return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
330
+ else:
331
+ return None
332
  except Exception as e:
333
+ print(f"Error in Google Images search: {e}")
 
 
 
 
334
  return None
335
 
336
+ def download_image(image_url, filename):
337
+ """Download an image from a URL to a local file with enhanced error handling."""
338
+ try:
339
+ headers = {"User-Agent": USER_AGENT}
340
+ response = requests.get(image_url, headers=headers, stream=True, timeout=15)
341
+ response.raise_for_status()
342
+ with open(filename, 'wb') as f:
343
+ for chunk in response.iter_content(chunk_size=8192):
344
+ f.write(chunk)
345
+ try:
346
+ img = Image.open(filename)
347
+ img.verify()
348
+ img = Image.open(filename)
349
+ if img.mode != 'RGB':
350
+ img = img.convert('RGB')
351
+ img.save(filename)
352
+ return filename
353
+ except Exception as e_validate:
354
+ print(f"Downloaded file is not a valid image: {e_validate}")
355
+ if os.path.exists(filename):
356
+ os.remove(filename)
357
+ return None
358
+ except requests.exceptions.RequestException as e_download:
359
+ print(f"Image download error: {e_download}")
360
+ if os.path.exists(filename):
361
+ os.remove(filename)
362
+ return None
363
+ except Exception as e_general:
364
+ print(f"General error during image processing: {e_general}")
365
+ if os.path.exists(filename):
366
+ os.remove(filename)
367
+ return None
368
 
369
+ def download_video(video_url, filename):
370
+ """Download a video from a URL to a local file."""
371
+ try:
372
+ response = requests.get(video_url, stream=True, timeout=30)
373
+ response.raise_for_status()
374
+ with open(filename, 'wb') as f:
375
+ for chunk in response.iter_content(chunk_size=8192):
376
+ f.write(chunk)
377
+ return filename
378
+ except Exception as e:
379
+ print(f"Video download error: {e}")
380
+ if os.path.exists(filename):
381
+ os.remove(filename)
382
+ return None
383
 
384
+ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
385
+ """
386
+ Generate a visual asset by first searching for a video or using a specific search strategy.
387
+ For news-related queries, use Google Images.
388
+ Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
389
+ """
390
+ safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
391
+
392
+ if "news" in prompt.lower():
393
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
394
+ image_url = search_google_images(prompt)
395
+ if image_url:
396
+ downloaded_image = download_image(image_url, image_file)
397
+ if downloaded_image:
398
+ return {"path": downloaded_image, "asset_type": "image"}
399
+ else:
400
+ print(f"Google Images search failed for prompt: {prompt}")
401
+
402
+ if random.random() < 0.25:
403
+ video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
404
+ video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
405
+ if video_url:
406
+ downloaded_video = download_video(video_url, video_file)
407
+ if downloaded_video:
408
+ return {"path": downloaded_video, "asset_type": "video"}
409
+ else:
410
+ print(f"Pexels video search failed for prompt: {prompt}")
411
+
412
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
413
+ image_url = search_pexels_images(prompt, PEXELS_API_KEY)
414
+ if image_url:
415
+ downloaded_image = download_image(image_url, image_file)
416
+ if downloaded_image:
417
+ return {"path": downloaded_image, "asset_type": "image"}
418
+ else:
419
+ print(f"Pexels image download failed for prompt: {prompt}")
420
+
421
+ fallback_terms = ["nature", "people", "landscape", "technology", "business"]
422
+ for term in fallback_terms:
423
+ fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
424
+ fallback_url = search_pexels_images(term, PEXELS_API_KEY)
425
+ if fallback_url:
426
+ downloaded_fallback = download_image(fallback_url, fallback_file)
427
+ if downloaded_fallback:
428
+ return {"path": downloaded_fallback, "asset_type": "image"}
429
+ else:
430
+ print(f"Fallback image download failed for term: {term}")
431
+ else:
432
+ print(f"Fallback image search failed for term: {term}")
433
+
434
+ print(f"Failed to generate visual asset for prompt: {prompt}")
435
+ return None
436
+
437
+ def generate_silent_audio(duration, sample_rate=24000):
438
+ """
439
+ Generate a silent WAV audio file lasting 'duration' seconds.
440
+ """
441
+ num_samples = int(duration * sample_rate)
442
+ silence = np.zeros(num_samples, dtype=np.float32)
443
+ silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
444
+ sf.write(silent_path, silence, sample_rate)
445
+ print(f"Silent audio generated: {silent_path}")
446
+ return silent_path
447
+
448
+ def generate_tts(text, voice):
449
+ """
450
+ Generate TTS audio using Kokoro, and if that fails, try gTTS.
451
+ If both fail, generate silent audio as a fallback.
452
+ """
453
+ safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
454
+ file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
455
+
456
+ if os.path.exists(file_path):
457
+ print(f"Using cached TTS for text '{text[:10]}...'")
458
+ return file_path
459
+
460
+ # Try Kokoro first
461
+ if pipeline:
462
+ try:
463
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
464
+ generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
465
+ audio_segments = []
466
+ for i, (gs, ps, audio) in enumerate(generator):
467
+ audio_segments.append(audio)
468
+ full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
469
+ sf.write(file_path, full_audio, 24000)
470
+ print(f"TTS audio saved to {file_path} (Kokoro)")
471
+ return file_path
472
+ except Exception as e:
473
+ print(f"Error with Kokoro TTS: {e}")
474
 
475
+ # Fallback to gTTS
476
+ try:
477
+ print("Falling back to gTTS...")
478
+ tts = gTTS(text=text, lang='en')
479
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
480
+ tts.save(mp3_path)
481
+ audio = AudioSegment.from_mp3(mp3_path)
482
+ audio.export(file_path, format="wav")
483
+ if os.path.exists(mp3_path):
484
+ os.remove(mp3_path)
485
+ print(f"Fallback TTS saved to {file_path} (gTTS)")
486
+ return file_path
487
+ except Exception as fallback_error:
488
+ print(f"Both TTS methods failed: {fallback_error}")
489
+ # Generate silent audio as fallback
490
+ return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
491
+
492
+ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
493
+ """
494
+ Apply a smooth Ken Burns effect with a single, clean movement pattern.
495
+ """
496
+ target_w, target_h = target_resolution
497
+ clip_aspect = clip.w / clip.h
498
+ target_aspect = target_w / target_h
499
+
500
+ if clip_aspect > target_aspect:
501
+ new_height = target_h
502
+ new_width = int(new_height * clip_aspect)
503
+ else:
504
+ new_width = target_w
505
+ new_height = int(new_width / clip_aspect)
506
+
507
+ clip = clip.resize(newsize=(new_width, new_height))
508
+
509
+ base_scale = 1.15
510
+ new_width = int(new_width * base_scale)
511
+ new_height = int(new_height * base_scale)
512
+ clip = clip.resize(newsize=(new_width, new_height))
513
+
514
+ max_offset_x = new_width - target_w
515
+ max_offset_y = new_height - target_h
516
+
517
+ available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
518
+ if effect_type is None or effect_type == "random":
519
+ effect_type = random.choice(available_effects)
520
+
521
+ if effect_type == "zoom-in":
522
+ start_zoom = 0.9
523
+ end_zoom = 1.1
524
+ start_center = (new_width / 2, new_height / 2)
525
+ end_center = start_center
526
+ elif effect_type == "zoom-out":
527
+ start_zoom = 1.1
528
+ end_zoom = 0.9
529
+ start_center = (new_width / 2, new_height / 2)
530
+ end_center = start_center
531
+ elif effect_type == "pan-left":
532
+ start_zoom = 1.0
533
+ end_zoom = 1.0
534
+ start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
535
+ end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
536
+ elif effect_type == "pan-right":
537
+ start_zoom = 1.0
538
+ end_zoom = 1.0
539
+ start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
540
+ end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
541
+ elif effect_type == "up-left":
542
+ start_zoom = 1.0
543
+ end_zoom = 1.0
544
+ start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
545
+ end_center = (target_w / 2, target_h / 2)
546
+ else:
547
+ raise ValueError(f"Unsupported effect_type: {effect_type}")
548
+
549
+ def transform_frame(get_frame, t):
550
+ frame = get_frame(t)
551
+ ratio = t / clip.duration if clip.duration > 0 else 0
552
+ ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
553
+
554
+ current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
555
+ crop_w = int(target_w / current_zoom)
556
+ crop_h = int(target_h / current_zoom)
557
+
558
+ current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
559
+ current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
560
+
561
+ min_center_x = crop_w / 2
562
+ max_center_x = new_width - crop_w / 2
563
+ min_center_y = crop_h / 2
564
+ max_center_y = new_height - crop_h / 2
565
+ current_center_x = max(min_center_x, min(current_center_x, max_center_x))
566
+ current_center_y = max(min_center_y, min(current_center_y, max_center_y))
567
+
568
+ # Ensure frame is numpy array and correct type for cv2
569
+ if isinstance(frame, Image.Image):
570
+ frame = np.array(frame)
571
+ if frame.dtype != np.uint8:
572
+ frame = frame.astype(np.uint8)
573
+
574
+ # Ensure frame has 3 channels for color images
575
+ if len(frame.shape) == 2: # Grayscale
576
+ frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
577
+ elif frame.shape[2] == 4: # RGBA
578
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
579
+
580
+ # Ensure crop dimensions are valid
581
+ crop_w = max(1, crop_w)
582
+ crop_h = max(1, crop_h)
583
+
584
+ # Ensure center point is valid for cv2.getRectSubPix
585
+ current_center_x = max(0, min(current_center_x, frame.shape[1] - 1))
586
+ current_center_y = max(0, min(current_center_y, frame.shape[0] - 1))
587
+
588
+ cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
589
+ resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
590
+
591
+ return resized_frame
592
+
593
+ return clip.fl(transform_frame)
594
+
595
+ def resize_to_fill(clip, target_resolution):
596
+ """
597
+ Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
598
+ """
599
+ target_w, target_h = target_resolution
600
+ clip_aspect = clip.w / clip.h
601
+ target_aspect = target_w / target_h
602
 
603
+ if clip_aspect > target_aspect:
604
+ clip = clip.resize(height=target_h)
605
+ crop_amount = (clip.w - target_w) / 2
606
+ clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
607
+ else:
608
+ clip = clip.resize(width=target_w)
609
+ crop_amount = (clip.h - target_h) / 2
610
+ clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
611
+
612
+ return clip
613
+
614
+ def find_mp3_files():
615
+ """
616
+ Search for a background music file (e.g., background_music.mp3) in the current directory.
617
+ Returns the path to the first MP3 file found or None if none is found.
618
+ """
619
+ # Look for a specific file name first
620
+ bg_music_path = "background_music.mp3"
621
+ if os.path.exists(bg_music_path):
622
+ print(f"Found background music file: {bg_music_path}")
623
+ return bg_music_path
624
+
625
+ # If not found, search for any mp3 in the current directory
626
+ for file in os.listdir('.'):
627
+ if file.endswith('.mp3'):
628
+ print(f"Found background music file: {file}")
629
+ return file
630
+
631
+ print("No background music file found in the current directory.")
632
+ return None
633
+
634
+ def add_background_music(final_video, bg_music_volume=0.08):
635
+ """Add background music to the final video using any MP3 file found in directories."""
636
  try:
637
+ bg_music_path = find_mp3_files()
638
+ if bg_music_path and os.path.exists(bg_music_path):
639
+ print(f"Adding background music from: {bg_music_path}")
640
+ bg_music = AudioFileClip(bg_music_path)
641
+ if bg_music.duration < final_video.duration:
642
+ loops_needed = math.ceil(final_video.duration / bg_music.duration)
643
+ bg_segments = [bg_music] * loops_needed
644
+ bg_music = concatenate_audioclips(bg_segments)
645
+ bg_music = bg_music.subclip(0, final_video.duration)
646
+ bg_music = bg_music.volumex(bg_music_volume)
647
+
648
+ video_audio = final_video.audio
649
+ if video_audio:
650
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
651
+ else:
652
+ mixed_audio = bg_music
653
 
654
+ final_video = final_video.set_audio(mixed_audio)
655
+ print("Background music added successfully")
 
 
656
  else:
657
+ print("No suitable background music file found, skipping background music")
658
+
659
+ return final_video
660
+
661
+ except Exception as e:
662
+ print(f"Error adding background music: {e}")
663
+ print("Continuing without background music")
664
+ return final_video
665
+
666
+ def create_clip(media_path, asset_type, tts_path, duration, effects, narration_text, segment_index, target_resolution, caption_color):
667
+ """Create a video clip with synchronized subtitles and properly timed narration."""
668
+ try:
669
+ print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
670
+
671
+ if not os.path.exists(media_path) or not os.path.exists(tts_path):
672
+ print("Missing media or TTS file")
673
+ return None
 
 
674
 
675
+ audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
676
+ audio_duration = audio_clip.duration
677
+ target_duration = audio_duration + 0.2
678
 
679
+ if asset_type == "video":
680
+ clip = VideoFileClip(media_path)
681
+ clip = resize_to_fill(clip, target_resolution)
682
+ if clip.duration < target_duration:
683
+ clip = clip.loop(duration=target_duration)
684
+ else:
685
+ clip = clip.subclip(0, target_duration)
686
+ elif asset_type == "image":
687
+ img = Image.open(media_path)
688
+ if img.mode != 'RGB':
689
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp:
690
+ img.convert('RGB').save(temp.name)
691
+ media_path = temp.name
692
+ img.close()
693
+
694
+ clip = ImageClip(media_path).set_duration(target_duration)
695
+ clip = apply_kenburns_effect(clip, target_resolution)
696
+ clip = clip.fadein(0.3).fadeout(0.3)
697
+ else:
698
+ return None
699
 
700
+ # Add subtitles
701
+ if narration_text and caption_color != "transparent":
702
+ try:
703
+ words = narration_text.split()
704
+ chunks = []
705
+ current_chunk = []
706
+ for word in words:
707
+ current_chunk.append(word)
708
+ if len(current_chunk) >= 5:
709
+ chunks.append(' '.join(current_chunk))
710
+ current_chunk = []
711
+ if current_chunk:
712
+ chunks.append(' '.join(current_chunk))
713
+
714
+ chunk_duration = audio_duration / len(chunks) if len(chunks) > 0 else audio_duration
715
+ subtitle_clips = []
716
+
717
+ subtitle_y_position = int(target_resolution[1] * 0.70)
718
+
719
+ for i, chunk_text in enumerate(chunks):
720
+ start_time = i * chunk_duration
721
+ end_time = (i + 1) * chunk_duration
722
+
723
+ txt_clip = TextClip(
724
+ chunk_text,
725
+ fontsize=45,
726
+ font='Arial-Bold',
727
+ color=caption_color,
728
+ bg_color='rgba(0, 0, 0, 0.25)',
729
+ method='caption',
730
+ align='center',
731
+ stroke_width=2,
732
+ stroke_color=caption_color,
733
+ size=(target_resolution[0] * 0.8, None)
734
+ ).set_start(start_time).set_end(end_time)
735
+
736
+ txt_clip = txt_clip.set_position(('center', subtitle_y_position))
737
+ subtitle_clips.append(txt_clip)
738
+
739
+ clip = CompositeVideoClip([clip] + subtitle_clips)
740
+
741
+ except Exception as sub_error:
742
+ print(f"Subtitle error: {sub_error}")
743
+ # Fallback to simpler subtitle if needed
744
+ txt_clip = TextClip(
745
+ narration_text,
746
+ fontsize=28,
747
+ color=caption_color,
748
+ align='center',
749
+ size=(target_resolution[0] * 0.7, None)
750
+ ).set_position(('center', int(target_resolution[1] / 3))).set_duration(clip.duration)
751
+ clip = CompositeVideoClip([clip, txt_clip])
752
+
753
+ clip = clip.set_audio(audio_clip)
754
+ print(f"Clip created: {clip.duration:.1f}s")
755
+ return clip
756
 
757
  except Exception as e:
758
+ print(f"Error in create_clip: {str(e)}")
759
+ return None
760
+
761
+ # --- Main Video Generation Function ---
762
+ def generate_video(user_input, resolution_choice, caption_option, caption_color_input, progress=gr.Progress()):
763
+ """
764
+ Main function to orchestrate video generation based on Gradio inputs.
765
+ """
766
+ progress(0, desc="Starting video generation...")
767
+
768
+ # Set target resolution
769
+ if resolution_choice == "Full":
770
+ target_resolution = (1920, 1080)
771
+ elif resolution_choice == "Short":
772
+ target_resolution = (1080, 1920)
773
+ else:
774
+ return "Invalid resolution choice.", None
775
+
776
+ # Set caption color
777
+ caption_color = caption_color_input if caption_option == "Yes" else "transparent"
778
+
779
+ # Create temporary folder
780
+ if os.path.exists(TEMP_FOLDER):
781
+ shutil.rmtree(TEMP_FOLDER)
782
+ os.makedirs(TEMP_FOLDER)
783
+
784
  try:
 
 
 
 
 
 
 
 
 
 
 
 
785
  progress(0.1, desc="Generating script...")
786
+ script = generate_script(user_input)
 
787
  if not script:
788
+ return "Failed to generate script.", None
789
+ print("Generated Script:\n", script)
 
 
 
790
 
 
791
  progress(0.2, desc="Parsing script...")
 
792
  elements = parse_script(script)
793
+ if not elements:
794
+ return "Failed to parse script into elements.", None
795
+ print(f"Parsed {len(elements)//2} script segments.")
796
+
797
+ paired_elements = []
 
 
 
798
  for i in range(0, len(elements), 2):
799
+ if i + 1 < len(elements):
800
+ paired_elements.append((elements[i], elements[i+1]))
 
801
 
802
+ if not paired_elements:
803
+ return "No valid script segments found.", None
 
 
804
 
805
+ clips = []
806
+ total_segments = len(paired_elements)
807
+ for idx, (media_elem, tts_elem) in enumerate(paired_elements):
808
+ progress(0.3 + (idx * 0.5 / total_segments), desc=f"Processing segment {idx+1}/{total_segments}...")
809
+ print(f"\nProcessing segment {idx+1}/{total_segments} with prompt: '{media_elem['prompt']}'")
810
 
811
+ media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=total_segments)
812
+ if not media_asset:
813
+ print(f"Skipping segment {idx+1} due to missing media asset.")
814
  continue
815
 
816
+ tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
817
+ if not tts_path:
818
+ print(f"Skipping segment {idx+1} due to TTS generation failure.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
 
 
 
 
 
 
 
 
 
821
  clip = create_clip(
822
+ media_path=media_asset['path'],
823
+ asset_type=media_asset['asset_type'],
824
+ tts_path=tts_path,
825
+ duration=tts_elem['duration'],
826
+ effects=media_elem.get('effects', 'fade-in'),
827
+ narration_text=tts_elem['text'],
828
+ segment_index=idx,
829
+ target_resolution=target_resolution,
830
+ caption_color=caption_color
831
  )
 
832
  if clip:
833
  clips.append(clip)
 
834
  else:
835
+ print(f"Clip creation failed for segment {idx+1}.")
 
 
836
 
837
  if not clips:
838
+ return "No clips were successfully created.", None
 
 
839
 
840
+ progress(0.8, desc="Concatenating clips...")
841
+ final_video = concatenate_videoclips(clips, method="compose")
842
 
843
+ progress(0.9, desc="Adding background music...")
844
+ final_video = add_background_music(final_video, bg_music_volume=0.08)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
 
846
+ progress(0.95, desc="Exporting final video...")
847
+ output_path = os.path.join(TEMP_FOLDER, OUTPUT_VIDEO_FILENAME)
848
+ final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
 
850
+ progress(1.0, desc="Video generation complete.")
851
+ return "Video generated successfully!", output_path
852
 
853
  except Exception as e:
854
+ print(f"An error occurred: {e}")
855
+ return f"An error occurred: {e}", None
 
 
856
 
857
  finally:
858
+ # Clean up temporary files
859
+ if os.path.exists(TEMP_FOLDER):
860
+ shutil.rmtree(TEMP_FOLDER)
861
+ print("Temporary files removed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862
 
863
+ # --- Gradio Interface ---
864
+ with gr.Blocks() as demo:
865
+ gr.Markdown("# AI Documentary Video Generator")
866
+ gr.Markdown("Enter a concept, choose settings, and generate a short documentary video.")
 
 
 
 
 
 
 
 
 
867
 
868
  with gr.Row():
869
+ user_input = gr.Textbox(label="Video Concept", placeholder="e.g., The secret life of squirrels")
870
+ resolution_choice = gr.Radio(["Full", "Short"], label="Target Resolution", value="Short")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871
 
872
+ with gr.Row():
873
+ caption_option = gr.Radio(["Yes", "No"], label="Add Captions?", value="Yes")
874
+ caption_color_input = gr.Textbox(label="Caption Color (e.g., white, yellow)", value="white", visible=True)
875
+
876
+ # Update caption color visibility based on caption option
877
+ caption_option.change(
878
+ lambda x: gr.update(visible=x == "Yes"),
879
+ inputs=caption_option,
880
+ outputs=caption_color_input
881
+ )
882
 
883
+ generate_button = gr.Button("Generate Video")
884
+ status_output = gr.Textbox(label="Status", interactive=False)
885
+ video_output = gr.Video(label="Generated Video")
 
 
886
 
887
  generate_button.click(
888
+ fn=generate_video,
889
+ inputs=[user_input, resolution_choice, caption_option, caption_color_input],
890
  outputs=[status_output, video_output]
891
  )
892
 
 
 
 
 
 
 
 
 
 
 
 
 
893
  if __name__ == "__main__":
894
+ # Ensure TEMP_FOLDER exists before starting
895
+ if not os.path.exists(TEMP_FOLDER):
896
+ os.makedirs(TEMP_FOLDER)
897
+ demo.launch()