testdeep123 commited on
Commit
0a38b03
·
verified ·
1 Parent(s): e7589a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +669 -362
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import os
3
  import shutil
@@ -18,6 +19,7 @@ from moviepy.editor import (
18
  CompositeVideoClip, TextClip, CompositeAudioClip, ColorClip
19
  )
20
  import moviepy.video.fx.all as vfx
 
21
  from pydub import AudioSegment
22
  from PIL import Image, ImageDraw, ImageFont
23
  from bs4 import BeautifulSoup
@@ -26,30 +28,33 @@ from gtts import gTTS
26
  import logging
27
 
28
  # --- Configuration ---
29
- # IMPORTANT: Use Hugging Face Secrets for API keys in a real Space
30
- PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna' # Replace with your Pexels API Key
31
- OPENROUTER_API_KEY = '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' # Replace with your OpenRouter API Key
 
 
32
  OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" # Using a known free model
33
  # OPENROUTER_MODEL = "mistralai/mistral-small-latest" # Or a small paid one if needed
34
 
35
- TEMP_FOLDER_BASE = "/tmp/ai_doc_generator"
36
  OUTPUT_VIDEO_FILENAME = "final_documentary.mp4"
37
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38
- DEFAULT_FONT = "DejaVuSans.ttf" # A common font available in many Linux distros, adjust if needed
 
 
39
  BGM_FILE = "background_music.mp3" # Optional: Place a royalty-free mp3 here
40
  BGM_VOLUME = 0.1 # Background music volume multiplier (0.0 to 1.0)
41
 
42
  # --- Logging Setup ---
43
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
44
 
45
- # --- Kokoro TTS Initialization (Optional) ---
46
- KOKORO_ENABLED = False
47
- pipeline = None
48
  # try:
49
  # from kokoro import KPipeline
50
- # # Check for GPU availability if desired, default to CPU
51
- # device = 'cuda' if torch.cuda.is_available() else 'cpu'
52
- # pipeline = KPipeline(lang_code='a', device=device) # 'a' for multilingual? Check Kokoro docs
53
  # KOKORO_ENABLED = True
54
  # logging.info("✅ Kokoro TTS Initialized.")
55
  # except ImportError:
@@ -59,11 +64,15 @@ pipeline = None
59
  # logging.warning(f"⚠️ Error initializing Kokoro TTS: {e}. Using gTTS fallback.")
60
  # pipeline = None
61
 
62
- # --- Helper Functions ---
63
-
64
  def generate_script(topic, api_key, model):
65
  """Generates a documentary script using OpenRouter API."""
66
  logging.info(f"Generating script for topic: {topic}")
 
 
 
 
 
67
  prompt = f"""Create a short documentary script about '{topic}'.
68
  The script should be structured as a sequence of scenes and narrations.
69
  Each scene description should be enclosed in [SCENE: description] tags. The description should be concise and suggest visuals (e.g., 'drone shot of mountains', 'close up of a historical artifact', 'archival footage of protests').
@@ -80,7 +89,9 @@ Generate the script now:
80
  """
81
  headers = {
82
  "Authorization": f"Bearer {api_key}",
83
- "Content-Type": "application/json"
 
 
84
  }
85
  data = {
86
  "model": model,
@@ -88,23 +99,41 @@ Generate the script now:
88
  "max_tokens": 1000, # Adjust as needed
89
  }
90
  try:
91
- response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=60)
 
 
 
 
92
  response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
93
  result = response.json()
 
 
 
 
 
 
94
  script_content = result['choices'][0]['message']['content']
95
  logging.info("✅ Script generated successfully.")
96
  # Basic validation
97
  if "[SCENE:" not in script_content or "[NARRATION:" not in script_content:
98
  logging.error("❌ Script generation failed: Output format incorrect.")
99
  logging.debug(f"Raw script output: {script_content}")
100
- return None
 
 
101
  return script_content
 
 
 
 
 
 
102
  except requests.exceptions.RequestException as e:
103
  logging.error(f"❌ Script generation failed: API request error: {e}")
104
  return None
105
- except (KeyError, IndexError) as e:
106
- logging.error(f"❌ Script generation failed: Unexpected API response format: {e}")
107
- logging.debug(f"Raw API response: {response.text}")
108
  return None
109
  except Exception as e:
110
  logging.error(f"❌ Script generation failed: An unexpected error occurred: {e}")
@@ -115,21 +144,43 @@ def parse_script(script_text):
115
  """Parses the generated script into scene prompts and narration text."""
116
  logging.info("Parsing script...")
117
  if not script_text:
 
118
  return None
119
 
120
- # Regex to find scene and narration blocks
121
  pattern = re.compile(r"\[SCENE:\s*(.*?)\s*\]\s*\[NARRATION:\s*(.*?)\s*\]", re.DOTALL | re.IGNORECASE)
122
  matches = pattern.findall(script_text)
123
 
124
  if not matches:
125
- logging.error("❌ Script parsing failed: No valid [SCENE]/[NARRATION] pairs found.")
126
- logging.debug(f"Script content for parsing: {script_text}")
127
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  elements = []
130
  for scene_desc, narration_text in matches:
131
  scene_desc = scene_desc.strip()
132
- narration_text = narration_text.strip().replace('\n', ' ') # Clean up narration
133
  if scene_desc and narration_text:
134
  elements.append({"type": "scene", "prompt": scene_desc})
135
  elements.append({"type": "narration", "text": narration_text})
@@ -140,94 +191,121 @@ def parse_script(script_text):
140
  logging.info(f"✅ Script parsed into {len(elements)//2} scene/narration pairs.")
141
  return elements
142
 
143
- def search_pexels(query, api_key, media_type="videos", per_page=5):
144
  """Searches Pexels API for videos or photos."""
145
- if not api_key or api_key == "YOUR_PEXELS_API_KEY_HERE":
146
  logging.warning("⚠️ Pexels API key not configured. Skipping search.")
147
  return []
148
- logging.info(f"Searching Pexels {media_type} for: {query}")
149
  base_url = f"https://api.pexels.com/{media_type}/search"
150
  headers = {"Authorization": api_key}
151
- params = {"query": query, "per_page": per_page, "orientation": "landscape"} # Default landscape
 
 
152
 
153
  try:
154
- response = requests.get(base_url, headers=headers, params=params, timeout=20)
155
  response.raise_for_status()
156
  data = response.json()
157
 
158
  results = []
159
  media_key = 'videos' if media_type == 'videos' else 'photos'
160
  link_key = 'video_files' if media_type == 'videos' else 'src'
 
 
 
 
 
161
 
162
- for item in data.get(media_key, []):
163
  if media_type == 'videos':
164
- # Find HD or highest quality video link
165
- video_links = sorted(item.get(link_key, []), key=lambda x: x.get('width', 0), reverse=True)
166
  if video_links:
167
- # Prefer HD (1920x1080) or similar if available
168
- hd_link = next((link['link'] for link in video_links if link.get('quality') == 'hd' and link.get('width') == 1920), None)
169
  if hd_link:
170
- results.append({'url': hd_link, 'type': 'video'})
171
  elif video_links[0].get('link'): # Fallback to highest available
172
- results.append({'url': video_links[0]['link'], 'type': 'video'})
 
173
  else: # photos
174
- # Get large or original image link
175
  img_links = item.get(link_key, {})
176
- if img_links.get('large2x'):
177
- results.append({'url': img_links['large2x'], 'type': 'image'})
178
- elif img_links.get('large'):
179
- results.append({'url': img_links['large'], 'type': 'image'})
180
- elif img_links.get('original'):
181
- results.append({'url': img_links['original'], 'type': 'image'})
182
-
183
- logging.info(f"✅ Found {len(results)} Pexels {media_type} results.")
184
  return results
185
 
 
 
 
 
 
 
186
  except requests.exceptions.RequestException as e:
187
- logging.error(f"❌ Pexels API request error: {e}")
188
  return []
189
  except Exception as e:
190
- logging.error(f"❌ Error processing Pexels response: {e}")
191
  traceback.print_exc()
192
  return []
193
 
194
  def download_media(url, save_dir):
195
  """Downloads media (video or image) from a URL."""
196
- logging.info(f"Downloading media from: {url}")
197
  try:
198
- response = requests.get(url, stream=True, timeout=60, headers={'User-Agent': USER_AGENT})
199
  response.raise_for_status()
200
 
201
- # Try to get filename from URL or Content-Disposition
202
- filename = url.split('/')[-1].split('?')[0]
203
- if not filename or '.' not in filename: # Basic check for extension
204
- # Look for content-disposition header
205
- cd = response.headers.get('content-disposition')
206
- if cd:
207
- fname = re.findall('filename="?(.+)"?', cd)
208
- if fname:
209
- filename = fname[0]
210
- # If still no good filename, generate one based on type
211
- if not filename or '.' not in filename:
212
- content_type = response.headers.get('content-type', '').lower()
213
- ext = '.jpg' # default
214
- if 'video' in content_type:
215
- ext = '.mp4'
216
- elif 'jpeg' in content_type or 'jpg' in content_type:
217
- ext = '.jpg'
218
- elif 'png' in content_type:
219
- ext = '.png'
220
- filename = f"media_{int(time.time())}{ext}"
 
 
 
 
 
 
 
 
221
 
222
 
223
  save_path = os.path.join(save_dir, filename)
 
224
 
225
  with open(save_path, 'wb') as f:
226
- for chunk in response.iter_content(chunk_size=8192):
227
  f.write(chunk)
228
 
229
- logging.info(f"✅ Media downloaded successfully to: {save_path}")
 
 
 
 
 
230
  return save_path
 
 
 
231
  except requests.exceptions.RequestException as e:
232
  logging.error(f"❌ Media download failed: Request error: {e}")
233
  return None
@@ -237,159 +315,247 @@ def download_media(url, save_dir):
237
  return None
238
 
239
  def generate_tts(text, lang, save_dir, segment_index):
240
- """Generates TTS audio using Kokoro (if enabled) or gTTS."""
241
  filename = f"narration_{segment_index}.mp3"
242
  filepath = os.path.join(save_dir, filename)
243
- logging.info(f"Generating TTS for segment {segment_index}: '{text[:50]}...'")
 
 
 
 
 
 
 
244
 
245
  audio_duration = 0
246
  success = False
247
 
248
- # Try Kokoro first if enabled and initialized
249
- # if KOKORO_ENABLED and pipeline:
250
- # try:
251
- # logging.info("Attempting TTS generation with Kokoro...")
252
- # # Assuming Kokoro outputs a numpy array and sample rate
253
- # wav, sr = pipeline.tts(text=text)
254
- # sf.write(filepath, wav, sr)
255
- # audio_duration = len(wav) / sr
256
- # logging.info(f"✅ Kokoro TTS generated successfully ({audio_duration:.2f}s).")
257
- # success = True
258
- # except Exception as e:
259
- # logging.warning(f"⚠️ Kokoro TTS failed: {e}. Falling back to gTTS.")
260
-
261
- # Fallback to gTTS
262
- if not success:
263
  try:
264
- logging.info("Attempting TTS generation with gTTS...")
265
- tts = gTTS(text=text, lang=lang)
266
- tts.save(filepath)
267
- # Get duration using soundfile
268
- try:
269
- audio_info = sf.info(filepath)
270
- audio_duration = audio_info.duration
271
- except Exception as e_dur:
272
- logging.warning(f"⚠️ Could not get duration using soundfile ({e_dur}), trying pydub...")
273
- try:
274
- audio_seg = AudioSegment.from_mp3(filepath)
275
- audio_duration = len(audio_seg) / 1000.0
276
- except Exception as e_dur_pd:
277
- logging.error(f"❌ Failed to get duration with pydub as well ({e_dur_pd}). Setting duration to estimated.")
278
- # Estimate duration based on words (very rough)
279
- words_per_minute = 150
280
- num_words = len(text.split())
281
- audio_duration = (num_words / words_per_minute) * 60
282
- if audio_duration < 2: audio_duration = 2 # Minimum duration
283
-
284
- logging.info(f" gTTS generated successfully ({audio_duration:.2f}s).")
285
- success = True
286
- except Exception as e:
287
- logging.error(f"❌ gTTS failed: {e}")
288
- traceback.print_exc()
289
- success = False
 
 
 
 
 
 
 
290
 
291
  return filepath if success else None, audio_duration if success else 0
292
 
 
293
  def resize_media_to_fill(clip, target_size):
294
  """Resizes a MoviePy clip (video or image) to fill the target size, cropping if necessary."""
295
- # target_size = (width, height)
296
  target_w, target_h = target_size
 
 
 
 
297
  target_aspect = target_w / target_h
298
 
 
 
 
 
299
  clip_w, clip_h = clip.size
 
 
 
 
 
 
300
  clip_aspect = clip_w / clip_h
301
 
302
  if abs(clip_aspect - target_aspect) < 0.01: # Aspect ratios are close enough
303
- return clip.resize(width=target_w) # Or height=target_h
 
304
 
305
  if clip_aspect > target_aspect:
306
- # Clip is wider than target, resize to target height and crop width
307
  resized_clip = clip.resize(height=target_h)
308
- crop_width = resized_clip.w
309
- crop_x_center = crop_width / 2
310
- crop_x1 = int(crop_x_center - target_w / 2)
311
- crop_x2 = int(crop_x_center + target_w / 2)
312
- # Ensure crop coordinates are within bounds
313
- crop_x1 = max(0, crop_x1)
314
- crop_x2 = min(resized_clip.w, crop_x2)
315
- # Adjust if calculated width is slightly off due to rounding
316
  if crop_x2 - crop_x1 != target_w:
317
- crop_x2 = crop_x1 + target_w # Prioritize target width
 
 
 
318
 
319
- return resized_clip.fx(vfx.crop, x1=crop_x1, y1=0, x2=crop_x2, y2=target_h)
320
  else:
321
- # Clip is taller than target, resize to target width and crop height
322
  resized_clip = clip.resize(width=target_w)
323
- crop_height = resized_clip.h
324
- crop_y_center = crop_height / 2
325
- crop_y1 = int(crop_y_center - target_h / 2)
326
- crop_y2 = int(crop_y_center + target_h / 2)
327
- # Ensure crop coordinates are within bounds
328
- crop_y1 = max(0, crop_y1)
329
- crop_y2 = min(resized_clip.h, crop_y2)
330
- # Adjust if calculated height is slightly off
331
  if crop_y2 - crop_y1 != target_h:
332
  crop_y2 = crop_y1 + target_h
 
 
 
 
 
333
 
334
- return resized_clip.fx(vfx.crop, x1=0, y1=crop_y1, x2=target_w, y2=crop_y2)
335
 
 
 
 
 
 
336
 
337
- def apply_ken_burns(image_clip, duration, target_size, zoom_factor=1.1):
338
- """Applies a subtle zoom-out Ken Burns effect to an ImageClip."""
339
- # Ensure the input clip already matches the target size
340
  if image_clip.size != target_size:
341
- logging.warning("Applying Ken Burns to an image not matching target size, resizing first.")
342
  image_clip = resize_media_to_fill(image_clip, target_size)
343
 
 
 
 
 
 
344
  # Define the resize function based on time `t`
345
  def resize_func(t):
346
- # Zoom out: start at zoom_factor, end at 1.0
347
- current_zoom = 1 + (zoom_factor - 1) * (1 - t / duration)
 
 
 
 
 
 
348
  return current_zoom
349
 
350
- # Apply the resize effect over time
351
- # Need to center the zoom effect
352
  zoomed_clip = image_clip.fx(vfx.resize, resize_func)
353
- # Crop back to target size, centered
354
- final_clip = zoomed_clip.fx(vfx.crop, x_center=zoomed_clip.w/2, y_center=zoomed_clip.h/2, width=target_size[0], height=target_size[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
  return final_clip.set_duration(duration)
357
 
358
 
359
- def create_caption_clip(text, duration, clip_size, font_size=None, font_path=DEFAULT_FONT, color='white', stroke_color='black', stroke_width=1.5, position=('center', 'bottom'), margin=20):
360
- """Creates a MoviePy TextClip for captions with basic wrapping."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  width, height = clip_size
362
- max_text_width = width * 0.8 # Allow text to occupy 80% of the width
 
363
 
364
  if font_size is None:
365
- font_size = max(20, int(height / 25)) # Dynamic font size based on height
 
 
366
 
367
- # Basic word wrapping
368
  try:
369
- # Attempt to load the font to estimate size
370
- pil_font = ImageFont.truetype(font_path, font_size)
371
  except IOError:
372
- logging.warning(f"Font '{font_path}' not found. Using MoviePy default.")
373
- pil_font = None # Use MoviePy default if specified font fails
 
374
 
375
  words = text.split()
376
  lines = []
377
  current_line = ""
378
- line_width_func = lambda txt: pil_font.getbbox(txt)[2] if pil_font else len(txt) * font_size * 0.6 # Estimate width
379
 
 
380
  for word in words:
381
  test_line = f"{current_line} {word}".strip()
382
- # Estimate width (this is approximate)
383
- if line_width_func(test_line) <= max_text_width:
 
384
  current_line = test_line
385
  else:
386
  if current_line: # Add the previous line if it wasn't empty
387
  lines.append(current_line)
388
  current_line = word # Start new line with the current word
389
  # Handle case where a single word is too long
390
- if line_width_func(current_line) > max_text_width:
391
- logging.warning(f"Word '{current_line}' is too long for caption width.")
392
- # Could implement character-level wrapping here if needed
 
 
 
 
 
 
 
 
393
 
394
  if current_line: # Add the last line
395
  lines.append(current_line)
@@ -402,126 +568,207 @@ def create_caption_clip(text, duration, clip_size, font_size=None, font_path=DEF
402
  wrapped_text,
403
  fontsize=font_size,
404
  color=color,
405
- font=font_path, # MoviePy might handle font lookup differently
406
  stroke_color=stroke_color,
407
  stroke_width=stroke_width,
408
- method='caption', # Use caption method for better wrapping if available
409
  size=(int(max_text_width), None), # Constrain width for wrapping
410
  align='center'
411
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  except Exception as e:
413
- logging.error(f"Error creating TextClip (maybe font issue?): {e}. Using simpler TextClip.")
414
- # Fallback to simpler TextClip without stroke/specific font if needed
415
- caption = TextClip(wrapped_text, fontsize=font_size, color=color, method='caption', size=(int(max_text_width), None), align='center')
 
416
 
417
 
418
  # Set position with margin
419
- pos_x, pos_y = position
420
- final_pos = list(caption.pos(pos_x, pos_y)) # Get numeric position
 
421
 
422
- if 'bottom' in pos_y:
423
- final_pos[1] -= margin
424
- elif 'top' in pos_y:
425
- final_pos[1] += margin
426
- if 'right' in pos_x:
427
- final_pos[0] -= margin
428
- elif 'left' in pos_x:
429
- final_pos[0] += margin
 
 
430
 
431
- caption = caption.set_position(tuple(final_pos)).set_duration(duration)
432
- return caption
 
 
 
433
 
434
 
435
  def create_clip(media_path, media_type, audio_path, audio_duration, target_size, add_captions, narration_text, segment_index):
436
  """Creates a single video clip from media, audio, and optional captions."""
437
- logging.info(f"Creating clip {segment_index} - Type: {media_type}, Duration: {audio_duration:.2f}s")
 
 
 
438
 
439
  try:
440
- # Load Audio
 
441
  audio_clip = AudioFileClip(audio_path)
442
- # Verify audio duration (sometimes file reading is slightly off)
443
- if abs(audio_clip.duration - audio_duration) > 0.1:
444
- logging.warning(f"Audio file duration ({audio_clip.duration:.2f}s) differs from expected ({audio_duration:.2f}s). Using file duration.")
445
  audio_duration = audio_clip.duration
446
- # Ensure minimum duration to avoid issues
447
- if audio_duration < 0.1:
448
- logging.warning(f"Audio duration is very short ({audio_duration:.2f}s). Setting minimum 0.5s.")
449
  audio_duration = 0.5
 
450
  audio_clip = audio_clip.subclip(0, audio_duration)
 
451
 
452
-
453
- # Load Media (Video or Image)
454
  if media_type == 'video':
 
455
  try:
456
- video_clip = VideoFileClip(media_path, target_resolution=(target_size[1], target_size[0])) # height, width
 
 
457
  # Trim or loop video to match audio duration
458
- if video_clip.duration >= audio_duration:
459
- video_clip = video_clip.subclip(0, audio_duration)
460
  else:
461
- # Loop the video if it's shorter than the audio
462
- logging.warning(f"Video duration ({video_clip.duration:.2f}s) shorter than audio ({audio_duration:.2f}s). Looping video.")
463
- # video_clip = video_clip.fx(vfx.loop, duration=audio_duration) # Loop is simpler
464
- # Alternatively freeze last frame:
465
- num_loops = math.ceil(audio_duration / video_clip.duration)
466
- video_clip = concatenate_videoclips([video_clip] * num_loops).subclip(0, audio_duration)
467
-
468
-
469
- main_clip = resize_media_to_fill(video_clip, target_size)
 
 
 
470
 
471
  except Exception as e:
472
- logging.error(f"❌ Failed to load or process video file '{media_path}': {e}. Creating black clip.")
 
473
  main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
474
 
475
  elif media_type == 'image':
 
476
  try:
477
- # Load image, resize to fill target, apply Ken Burns
478
  img_clip_base = ImageClip(media_path)
479
- img_clip_resized = resize_media_to_fill(img_clip_base, target_size)
480
- main_clip = apply_ken_burns(img_clip_resized, audio_duration, target_size)
481
-
482
  except Exception as e:
483
- logging.error(f"❌ Failed to load or process image file '{media_path}': {e}. Creating black clip.")
 
484
  main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
485
- else:
486
- logging.error(f"❌ Unknown media type: {media_type}. Creating black clip.")
 
487
  main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
488
 
489
- # Set duration definitively and add audio
490
- main_clip = main_clip.set_duration(audio_duration).set_audio(audio_clip)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
 
492
- # Add Captions if enabled
493
  if add_captions and narration_text:
494
- caption_clip = create_caption_clip(narration_text, audio_duration, target_size)
495
- final_clip = CompositeVideoClip([main_clip, caption_clip], size=target_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  else:
497
- final_clip = main_clip
498
 
499
- logging.info(f"✅ Clip {segment_index} created successfully.")
500
- return final_clip
 
 
501
 
502
  except Exception as e:
503
- logging.error(f"❌ Failed to create clip {segment_index}: {e}")
504
  traceback.print_exc()
 
 
 
505
  return None
506
 
 
507
  def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
508
  """Adds background music to the final video clip."""
509
  if not os.path.exists(music_file):
510
  logging.warning(f"Background music file '{music_file}' not found. Skipping BGM.")
511
  return video_clip
 
 
 
 
 
512
 
513
  logging.info(f"Adding background music from {music_file}")
 
 
 
514
  try:
515
  bgm_clip = AudioFileClip(music_file)
516
  video_duration = video_clip.duration
517
 
518
- # Loop or trim BGM to match video duration
519
  if bgm_clip.duration < video_duration:
520
- # Loop BGM - Use audio_loop fx
521
- bgm_clip = bgm_clip.fx(afx.audio_loop, duration=video_duration)
522
- # Alternative manual loop:
523
- # num_loops = math.ceil(video_duration / bgm_clip.duration)
524
- # bgm_clip = concatenate_audioclips([bgm_clip] * num_loops).subclip(0, video_duration)
525
  else:
526
  bgm_clip = bgm_clip.subclip(0, video_duration)
527
 
@@ -529,125 +776,164 @@ def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
529
  bgm_clip = bgm_clip.volumex(volume)
530
 
531
  # Combine with existing audio
532
- original_audio = video_clip.audio
533
  if original_audio:
 
 
 
 
 
 
 
 
534
  combined_audio = CompositeAudioClip([original_audio, bgm_clip])
535
  else:
536
- # Handle case where video might not have narration audio (e.g., if all TTS failed)
537
  logging.warning("Video clip has no primary audio. Adding BGM only.")
538
  combined_audio = bgm_clip
539
 
540
- video_clip = video_clip.set_audio(combined_audio)
541
  logging.info("✅ Background music added.")
542
- return video_clip
 
 
 
 
 
543
 
544
  except Exception as e:
545
  logging.error(f"❌ Failed to add background music: {e}")
546
  traceback.print_exc()
547
- return video_clip # Return original clip on failure
 
 
 
548
 
549
 
550
  # --- Main Gradio Function ---
551
- def generate_video_process(topic, resolution_choice, add_captions_option, add_bgm_option, progress=gr.Progress()):
552
  """The main function called by Gradio to generate the video."""
553
  start_time = time.time()
554
- status_log = []
555
  temp_dir = None
556
  final_video_path = None
 
557
 
558
- # Create a unique temporary directory for this run
559
  try:
560
  temp_dir = tempfile.mkdtemp(prefix=TEMP_FOLDER_BASE + "_")
561
- status_log.append(f"Temporary directory created: {temp_dir}")
562
  logging.info(f"Using temp directory: {temp_dir}")
563
 
 
 
 
 
 
 
 
564
  # --- 1. Generate Script ---
565
  progress(0.1, desc="Generating script...")
566
- status_log.append("🔄 Generating script...")
567
  script = generate_script(topic, OPENROUTER_API_KEY, OPENROUTER_MODEL)
568
  if not script:
569
- status_log.append("❌ Script generation failed. Check API key and model.")
 
570
  return "\n".join(status_log), None
571
  status_log.append("✅ Script generated.")
572
- # status_log.append(f"Raw Script:\n{script[:500]}...") # Optional: Log snippet
573
 
574
  # --- 2. Parse Script ---
575
  progress(0.2, desc="Parsing script...")
576
- status_log.append("🔄 Parsing script...")
577
  elements = parse_script(script)
578
- if not elements:
579
- status_log.append("❌ Script parsing failed. Check script format.")
580
  return "\n".join(status_log), None
581
  num_segments = len(elements) // 2
582
  status_log.append(f"✅ Script parsed into {num_segments} segments.")
583
 
584
- # --- 3. Process Segments (Media Search, Download, TTS, Clip Creation) ---
585
- clips = []
586
- target_size = (1920, 1080) if resolution_choice == "Full HD (16:9)" else (1080, 1920) # W, H
587
- status_log.append(f"Target resolution: {target_size[0]}x{target_size[1]}")
588
-
589
  for i in range(0, len(elements), 2):
590
  segment_index = i // 2
591
- current_progress = 0.2 + (0.6 * (segment_index / num_segments))
592
- progress(current_progress, desc=f"Processing segment {segment_index + 1}/{num_segments}")
 
 
 
 
 
593
 
594
  scene_elem = elements[i]
595
  narration_elem = elements[i+1]
596
- scene_prompt = scene_elem['prompt']
597
- narration_text = narration_elem['text']
 
 
 
 
 
 
 
 
 
 
 
598
 
599
  status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments} ---")
600
- status_log.append(f"Scene Prompt: {scene_prompt}")
601
- status_log.append(f"Narration: {narration_text[:100]}...")
602
 
603
  # 3a. Generate TTS
604
- status_log.append("🔄 Generating narration audio...")
605
  tts_path, tts_duration = generate_tts(narration_text, 'en', temp_dir, segment_index)
606
- if not tts_path or tts_duration <= 0.1: # Check for valid duration
607
- status_log.append(f"⚠️ TTS generation failed for segment {segment_index + 1}. Skipping segment.")
608
  logging.warning(f"Skipping segment {segment_index+1} due to TTS failure.")
609
  continue
610
- status_log.append(f"✅ Narration audio generated ({tts_duration:.2f}s): {os.path.basename(tts_path)}")
 
611
 
612
- # 3b. Search for Media
613
- status_log.append("🔄 Searching for media...")
614
  media_path = None
615
  media_type = None
616
 
617
- # Try Pexels Video first
618
- video_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="videos")
619
  if video_results:
620
  selected_media = random.choice(video_results)
621
- status_log.append(f"📥 Downloading Pexels video: {selected_media['url']}")
622
  media_path = download_media(selected_media['url'], temp_dir)
623
  if media_path:
624
  media_type = 'video'
 
625
  else:
626
  status_log.append("⚠️ Video download failed.")
627
 
628
- # Try Pexels Image if video fails or not found
629
  if not media_path:
630
- status_log.append("🔄 No suitable video found/downloaded. Searching Pexels images...")
631
- image_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="photos")
632
  if image_results:
633
  selected_media = random.choice(image_results)
634
- status_log.append(f"📥 Downloading Pexels image: {selected_media['url']}")
635
  media_path = download_media(selected_media['url'], temp_dir)
636
  if media_path:
637
  media_type = 'image'
 
638
  else:
639
  status_log.append("⚠️ Image download failed.")
640
 
641
- # Fallback: If no media found after searches
642
  if not media_path:
643
- status_log.append(f"⚠️ No suitable media found for '{scene_prompt}'. Using black screen.")
644
- media_type = 'color' # Special type for ColorClip
645
- media_path = None # No path needed for color clip
646
 
647
  # 3c. Create Clip
648
- status_log.append(f"🔄 Creating video clip for segment {segment_index + 1}...")
649
  clip = create_clip(
650
- media_path=media_path if media_type != 'color' else None, # Pass None if color
651
  media_type=media_type,
652
  audio_path=tts_path,
653
  audio_duration=tts_duration,
@@ -659,110 +945,141 @@ def generate_video_process(topic, resolution_choice, add_captions_option, add_bg
659
 
660
  if clip:
661
  clips.append(clip)
662
- status_log.append(f"✅ Clip {segment_index + 1} created.")
663
  else:
664
- status_log.append(f"❌ Failed to create clip for segment {segment_index + 1}. Skipping.")
665
  logging.error(f"Failed to create clip {segment_index+1}, skipping.")
666
 
667
 
668
  if not clips:
669
  status_log.append("\n❌ No valid clips were created. Cannot generate video.")
 
670
  return "\n".join(status_log), None
671
 
 
 
 
672
  # --- 4. Concatenate Clips ---
673
  progress(0.85, desc="Combining video clips...")
674
  status_log.append("\n🔄 Combining video clips...")
 
675
  try:
676
- final_clip = concatenate_videoclips(clips, method="compose")
 
677
  status_log.append("✅ Clips combined successfully.")
678
  except Exception as e:
679
  status_log.append(f"❌ Error concatenating clips: {e}")
680
  logging.error(f"Concatenation failed: {e}")
681
  traceback.print_exc()
682
- # Attempt cleanup even on error
683
- for clip in clips:
684
- clip.close()
685
- return "\n".join(status_log), None
686
-
687
 
688
  # --- 5. Add Background Music (Optional) ---
689
- if add_bgm_option:
690
  progress(0.9, desc="Adding background music...")
691
- status_log.append("🔄 Adding background music...")
692
  final_clip = add_background_music(final_clip, music_file=BGM_FILE, volume=BGM_VOLUME)
693
-
694
 
695
  # --- 6. Write Final Video ---
696
- progress(0.95, desc="Writing final video file...")
697
- status_log.append("🔄 Writing final video file (this may take time)...")
698
- output_path = os.path.join(temp_dir, OUTPUT_VIDEO_FILENAME)
699
- try:
700
- # Use 'medium' preset for better quality/size balance than 'ultrafast'
701
- # Use 'libx264' for wide compatibility. Adjust audio_codec if needed.
702
- # threads=4 can help speed up encoding on multi-core systems
703
- final_clip.write_videofile(
704
- output_path,
705
- codec='libx264',
706
- audio_codec='aac',
707
- fps=24,
708
- preset='medium',
709
- threads=4,
710
- logger='bar' # Use None for less verbose output, or 'bar' for progress
711
- )
712
- status_log.append(f"✅ Final video saved to: {output_path}")
713
- final_video_path = output_path # Set the path to be returned
714
- except Exception as e:
715
- status_log.append(f" Error writing final video file: {e}")
716
- logging.error(f"Final video write failed: {e}")
717
- traceback.print_exc()
718
- final_video_path = None # Ensure no path is returned on failure
719
- finally:
720
- # Ensure MoviePy resources are released
721
- final_clip.close()
722
- for clip in clips:
723
- try:
724
- clip.close()
725
- if clip.audio: clip.audio.close()
726
- except:
727
- pass # Ignore errors during cleanup
728
 
729
 
730
  except Exception as e:
731
- status_log.append(f"\n❌ An unexpected error occurred during video generation: {e}")
732
- logging.error("An unexpected error occurred in generate_video_process:")
733
  logging.error(traceback.format_exc())
734
  final_video_path = None # Ensure failure state
735
 
736
  finally:
737
  # --- 7. Cleanup ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
  if temp_dir and os.path.exists(temp_dir):
739
  try:
740
- shutil.rmtree(temp_dir)
741
- status_log.append(f"🧹 Temporary directory cleaned up: {temp_dir}")
742
- logging.info(f"Cleaned up temp directory: {temp_dir}")
743
- except Exception as e:
744
- status_log.append(f"⚠️ Error cleaning up temporary directory {temp_dir}: {e}")
745
- logging.warning(f"Cleanup failed for {temp_dir}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
 
747
  end_time = time.time()
748
  total_time = end_time - start_time
749
- status_log.append(f"\n--- Generation Complete ---")
750
- status_log.append(f"Total time: {total_time:.2f} seconds")
751
 
752
  progress(1.0, desc="Finished!")
753
  return "\n".join(status_log), final_video_path
754
 
755
 
756
  # --- Gradio Interface Definition ---
757
- with gr.Blocks() as iface:
758
- gr.Markdown("# 🤖 AI Documentary Generator")
759
- gr.Markdown("Enter a topic, choose your settings, and let the AI create a short video documentary!")
760
 
761
  with gr.Row():
762
  with gr.Column(scale=1):
763
  topic_input = gr.Textbox(
764
  label="Video Topic",
765
- placeholder="e.g., The History of Coffee, The Secrets of the Deep Ocean, The Rise of Quantum Computing",
766
  lines=2
767
  )
768
  resolution_input = gr.Radio(
@@ -770,14 +1087,19 @@ with gr.Blocks() as iface:
770
  choices=["Short (9:16)", "Full HD (16:9)"],
771
  value="Short (9:16)"
772
  )
773
- captions_input = gr.Checkbox(label="Add Captions", value=True)
774
- bgm_input = gr.Checkbox(label=f"Add Background Music ({os.path.basename(BGM_FILE) if os.path.exists(BGM_FILE) else 'No BGM file found'})", value=True, interactive=os.path.exists(BGM_FILE))
 
 
 
 
 
775
 
776
- generate_button = gr.Button("Generate Video", variant="primary")
777
 
778
  with gr.Column(scale=2):
779
- status_output = gr.Textbox(label="Status Log", lines=15, interactive=False)
780
- video_output = gr.Video(label="Generated Video")
781
 
782
  generate_button.click(
783
  fn=generate_video_process,
@@ -787,28 +1109,21 @@ with gr.Blocks() as iface:
787
 
788
  gr.Examples(
789
  examples=[
790
- ["The lifecycle of a butterfly", "Short (9:16)", True, True],
791
- ["Ancient Roman Engineering", "Full HD (16:9)", True, False],
792
- ["The impact of social media", "Short (9:16)", False, True],
 
793
  ],
794
- inputs=[topic_input, resolution_input, captions_input, bgm_input]
 
795
  )
796
 
797
  # --- Launch the App ---
798
  if __name__ == "__main__":
799
- # Optional: Check for API keys on startup
800
- if not PEXELS_API_KEY or PEXELS_API_KEY == "YOUR_PEXELS_API_KEY_HERE":
801
- logging.warning("PEXELS_API_KEY is not set. Media search will be limited.")
802
- print("WARNING: PEXELS_API_KEY is not set. Media search will be limited.")
803
- if not OPENROUTER_API_KEY or OPENROUTER_API_KEY == "YOUR_OPENROUTER_API_KEY_HERE":
804
- logging.warning("OPENROUTER_API_KEY is not set. Script generation will fail.")
805
- print("WARNING: OPENROUTER_API_KEY is not set. Script generation will fail.")
806
-
807
- # Optional: Add a placeholder BGM file if it doesn't exist
808
- if not os.path.exists(BGM_FILE):
809
- logging.warning(f"Background music file '{BGM_FILE}' not found. Creating a silent placeholder.")
810
  try:
811
- # Create a short silent mp3 using pydub
812
  silent_segment = AudioSegment.silent(duration=1000) # 1 second silence
813
  silent_segment.export(BGM_FILE, format="mp3")
814
  logging.info(f"Created silent placeholder BGM file: {BGM_FILE}")
@@ -816,19 +1131,11 @@ if __name__ == "__main__":
816
  logging.error(f"Could not create placeholder BGM file: {e}")
817
 
818
 
819
- # Fix ImageMagick policy (attempt) - May need sudo/root privileges not available in all environments
820
- # def fix_imagemagick_policy():
821
- # policy_path = "/etc/ImageMagick-6/policy.xml" # Adjust path if needed
822
- # if os.path.exists(policy_path):
823
- # try:
824
- # # Use sed to modify the policy file (requires sed command)
825
- # os.system(f"sed -i 's/rights=\"none\" pattern=\"PS\"/rights=\"read|write\" pattern=\"PS\"/' {policy_path}")
826
- # os.system(f"sed -i 's/rights=\"none\" pattern=\"LABEL\"/rights=\"read|write\" pattern=\"LABEL\"/' {policy_path}")
827
- # os.system(f"sed -i 's/rights=\"none\" pattern=\"TEXT\"/rights=\"read|write\" pattern=\"TEXT\"/' {policy_path}") # Add TEXT pattern
828
- # logging.info(f"Attempted to update ImageMagick policy at {policy_path}")
829
- # except Exception as e:
830
- # logging.warning(f"Failed to automatically update ImageMagick policy: {e}. Manual adjustment might be needed if text rendering fails.")
831
- # fix_imagemagick_policy()
832
-
833
 
834
- iface.launch(debug=True, share=True) # Set share=True for public link if needed
 
 
1
+ # -*- coding: utf-8 -*-
2
  import gradio as gr
3
  import os
4
  import shutil
 
19
  CompositeVideoClip, TextClip, CompositeAudioClip, ColorClip
20
  )
21
  import moviepy.video.fx.all as vfx
22
+ import moviepy.audio.fx.all as afx # Import audio effects
23
  from pydub import AudioSegment
24
  from PIL import Image, ImageDraw, ImageFont
25
  from bs4 import BeautifulSoup
 
28
  import logging
29
 
30
  # --- Configuration ---
31
+ # WARNING: Hardcoding keys is generally discouraged due to security risks.
32
+ # Anyone who can see this code can use your keys.
33
+ PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
34
+ OPENROUTER_API_KEY = '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'
35
+
36
  OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" # Using a known free model
37
  # OPENROUTER_MODEL = "mistralai/mistral-small-latest" # Or a small paid one if needed
38
 
39
+ TEMP_FOLDER_BASE = "/tmp/ai_doc_generator" # Use /tmp inside container
40
  OUTPUT_VIDEO_FILENAME = "final_documentary.mp4"
41
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
42
+ # Try a very common font likely available in the base python image or installed via apt
43
+ # If text fails, consider installing specific font packages in Dockerfile (e.g., fonts-freefont-ttf)
44
+ DEFAULT_FONT = "DejaVuSans.ttf" # Or try "FreeSans.ttf" if fonts-freefont-ttf is installed
45
  BGM_FILE = "background_music.mp3" # Optional: Place a royalty-free mp3 here
46
  BGM_VOLUME = 0.1 # Background music volume multiplier (0.0 to 1.0)
47
 
48
  # --- Logging Setup ---
49
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
50
 
51
+ # --- Kokoro TTS Initialization (Optional - Keep commented unless installed) ---
52
+ # KOKORO_ENABLED = False
53
+ # pipeline = None
54
  # try:
55
  # from kokoro import KPipeline
56
+ # device = 'cpu' # Default to CPU
57
+ # pipeline = KPipeline(lang_code='a', device=device)
 
58
  # KOKORO_ENABLED = True
59
  # logging.info("✅ Kokoro TTS Initialized.")
60
  # except ImportError:
 
64
  # logging.warning(f"⚠️ Error initializing Kokoro TTS: {e}. Using gTTS fallback.")
65
  # pipeline = None
66
 
67
+ # --- Helper Functions --- (Keep all helper functions from the previous version)
 
68
  def generate_script(topic, api_key, model):
69
  """Generates a documentary script using OpenRouter API."""
70
  logging.info(f"Generating script for topic: {topic}")
71
+ # Check if API key is placeholder or empty
72
+ if not api_key or "sk-or-v1-" not in api_key:
73
+ logging.error("❌ Script generation failed: OpenRouter API Key is missing or invalid.")
74
+ return None
75
+
76
  prompt = f"""Create a short documentary script about '{topic}'.
77
  The script should be structured as a sequence of scenes and narrations.
78
  Each scene description should be enclosed in [SCENE: description] tags. The description should be concise and suggest visuals (e.g., 'drone shot of mountains', 'close up of a historical artifact', 'archival footage of protests').
 
89
  """
90
  headers = {
91
  "Authorization": f"Bearer {api_key}",
92
+ "Content-Type": "application/json",
93
+ "HTTP-Referer": "http://localhost", # Some APIs require Referer
94
+ "X-Title": "AI Documentary Generator" # Optional custom title
95
  }
96
  data = {
97
  "model": model,
 
99
  "max_tokens": 1000, # Adjust as needed
100
  }
101
  try:
102
+ response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=90) # Increased timeout
103
+ logging.debug(f"OpenRouter Request: Headers={headers}, Data={data}")
104
+ logging.debug(f"OpenRouter Response Status: {response.status_code}")
105
+ logging.debug(f"OpenRouter Response Body: {response.text[:500]}...") # Log beginning of response
106
+
107
  response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
108
  result = response.json()
109
+
110
+ if not result.get('choices') or not result['choices'][0].get('message') or not result['choices'][0]['message'].get('content'):
111
+ logging.error("❌ Script generation failed: Unexpected API response format (missing content).")
112
+ logging.debug(f"Full API response: {result}")
113
+ return None
114
+
115
  script_content = result['choices'][0]['message']['content']
116
  logging.info("✅ Script generated successfully.")
117
  # Basic validation
118
  if "[SCENE:" not in script_content or "[NARRATION:" not in script_content:
119
  logging.error("❌ Script generation failed: Output format incorrect.")
120
  logging.debug(f"Raw script output: {script_content}")
121
+ # Return the raw content anyway, maybe parsing can salvage something
122
+ # return None
123
+ return script_content # Let parsing try
124
  return script_content
125
+ except requests.exceptions.Timeout:
126
+ logging.error("❌ Script generation failed: API request timed out.")
127
+ return None
128
+ except requests.exceptions.HTTPError as e:
129
+ logging.error(f"❌ Script generation failed: HTTP error: {e.response.status_code} - {e.response.text}")
130
+ return None
131
  except requests.exceptions.RequestException as e:
132
  logging.error(f"❌ Script generation failed: API request error: {e}")
133
  return None
134
+ except (KeyError, IndexError, TypeError) as e:
135
+ logging.error(f"❌ Script generation failed: Error processing API response: {e}")
136
+ logging.debug(f"Raw API response text: {response.text}")
137
  return None
138
  except Exception as e:
139
  logging.error(f"❌ Script generation failed: An unexpected error occurred: {e}")
 
144
  """Parses the generated script into scene prompts and narration text."""
145
  logging.info("Parsing script...")
146
  if not script_text:
147
+ logging.error("❌ Script parsing failed: Input script text is empty.")
148
  return None
149
 
150
+ # Regex to find scene and narration blocks, more tolerant to whitespace variations
151
  pattern = re.compile(r"\[SCENE:\s*(.*?)\s*\]\s*\[NARRATION:\s*(.*?)\s*\]", re.DOTALL | re.IGNORECASE)
152
  matches = pattern.findall(script_text)
153
 
154
  if not matches:
155
+ # Try a simpler split if the strict pattern fails, maybe format was slightly off
156
+ logging.warning("⚠️ Strict [SCENE]/[NARRATION] parsing failed. Attempting fallback split.")
157
+ elements_temp = re.split(r'\[(SCENE|NARRATION):\s*', script_text, flags=re.IGNORECASE)
158
+ if len(elements_temp) > 1:
159
+ elements_temp = [el.strip().rstrip(']') for el in elements_temp if el and el.strip() not in ['SCENE', 'NARRATION']]
160
+ # Try to pair them up
161
+ paired_elements = []
162
+ for i in range(0, len(elements_temp) - 1, 2):
163
+ # Basic check if first looks like scene and second like narration
164
+ if len(elements_temp[i]) < 100 and len(elements_temp[i+1]) > 10: # Heuristic
165
+ paired_elements.append({"type": "scene", "prompt": elements_temp[i]})
166
+ paired_elements.append({"type": "narration", "text": elements_temp[i+1].replace('\n', ' ')})
167
+ if paired_elements:
168
+ logging.info(f"✅ Fallback parsing successful, found {len(paired_elements)//2} pairs.")
169
+ return paired_elements
170
+ else:
171
+ logging.error("❌ Fallback script parsing also failed.")
172
+ logging.debug(f"Script content for parsing: {script_text}")
173
+ return None
174
+ else:
175
+ logging.error("❌ Script parsing failed: No [SCENE]/[NARRATION] pairs found, and fallback split failed.")
176
+ logging.debug(f"Script content for parsing: {script_text}")
177
+ return None
178
+
179
 
180
  elements = []
181
  for scene_desc, narration_text in matches:
182
  scene_desc = scene_desc.strip()
183
+ narration_text = narration_text.strip().replace('\n', ' ').replace('"', "'") # Clean up narration, replace double quotes
184
  if scene_desc and narration_text:
185
  elements.append({"type": "scene", "prompt": scene_desc})
186
  elements.append({"type": "narration", "text": narration_text})
 
191
  logging.info(f"✅ Script parsed into {len(elements)//2} scene/narration pairs.")
192
  return elements
193
 
194
+ def search_pexels(query, api_key, media_type="videos", per_page=5, orientation="any"):
195
  """Searches Pexels API for videos or photos."""
196
+ if not api_key or api_key == "YOUR_PEXELS_API_KEY_HERE": # Check actual key too
197
  logging.warning("⚠️ Pexels API key not configured. Skipping search.")
198
  return []
199
+ logging.info(f"Searching Pexels {media_type} for: '{query}' (Orientation: {orientation})")
200
  base_url = f"https://api.pexels.com/{media_type}/search"
201
  headers = {"Authorization": api_key}
202
+ params = {"query": query, "per_page": per_page}
203
+ if orientation != "any":
204
+ params["orientation"] = orientation # landscape or portrait
205
 
206
  try:
207
+ response = requests.get(base_url, headers=headers, params=params, timeout=30) # Increased timeout
208
  response.raise_for_status()
209
  data = response.json()
210
 
211
  results = []
212
  media_key = 'videos' if media_type == 'videos' else 'photos'
213
  link_key = 'video_files' if media_type == 'videos' else 'src'
214
+ items = data.get(media_key, [])
215
+
216
+ if not items:
217
+ logging.info(f"No Pexels {media_type} results found for '{query}'.")
218
+ return []
219
 
220
+ for item in items:
221
  if media_type == 'videos':
222
+ video_links = sorted([vf for vf in item.get(link_key, []) if vf.get('link')], key=lambda x: x.get('width', 0), reverse=True)
 
223
  if video_links:
224
+ # Prefer HD (1920 or 1280 width) or highest quality
225
+ hd_link = next((link['link'] for link in video_links if link.get('width') in [1920, 1280]), None)
226
  if hd_link:
227
+ results.append({'url': hd_link, 'type': 'video', 'width': next(link['width'] for link in video_links if link['link'] == hd_link), 'height': next(link['height'] for link in video_links if link['link'] == hd_link)})
228
  elif video_links[0].get('link'): # Fallback to highest available
229
+ link_data = video_links[0]
230
+ results.append({'url': link_data['link'], 'type': 'video', 'width': link_data.get('width'), 'height': link_data.get('height')})
231
  else: # photos
 
232
  img_links = item.get(link_key, {})
233
+ # Prioritize larger sizes
234
+ chosen_url = img_links.get('large2x') or img_links.get('large') or img_links.get('original') or img_links.get('medium')
235
+ if chosen_url:
236
+ results.append({'url': chosen_url, 'type': 'image', 'width': item.get('width'), 'height': item.get('height')})
237
+
238
+ logging.info(f"✅ Found {len(results)} Pexels {media_type} results for '{query}'.")
 
 
239
  return results
240
 
241
+ except requests.exceptions.Timeout:
242
+ logging.error(f"❌ Pexels API request timed out for '{query}'.")
243
+ return []
244
+ except requests.exceptions.HTTPError as e:
245
+ logging.error(f"❌ Pexels API HTTP error for '{query}': {e.response.status_code} - {e.response.text}")
246
+ return []
247
  except requests.exceptions.RequestException as e:
248
+ logging.error(f"❌ Pexels API request error for '{query}': {e}")
249
  return []
250
  except Exception as e:
251
+ logging.error(f"❌ Error processing Pexels response for '{query}': {e}")
252
  traceback.print_exc()
253
  return []
254
 
255
  def download_media(url, save_dir):
256
  """Downloads media (video or image) from a URL."""
257
+ logging.info(f"Downloading media from: {url[:100]}...") # Log truncated URL
258
  try:
259
+ response = requests.get(url, stream=True, timeout=120, headers={'User-Agent': USER_AGENT}) # Increased timeout
260
  response.raise_for_status()
261
 
262
+ # Try to get filename
263
+ filename = None
264
+ cd = response.headers.get('content-disposition')
265
+ if cd:
266
+ fname = re.findall('filename="?(.+)"?', cd)
267
+ if fname:
268
+ filename = fname[0]
269
+
270
+ if not filename:
271
+ # Basic filename from URL path
272
+ filename = url.split('/')[-1].split('?')[0]
273
+ # Clean filename and ensure extension
274
+ filename = re.sub(r'[^\w\.\-]', '_', filename) # Replace invalid chars
275
+ if '.' not in filename[-5:]: # Check last 5 chars for extension
276
+ # Guess extension from content type
277
+ content_type = response.headers.get('content-type', '').lower()
278
+ ext = '.vid' # default video extension
279
+ if 'jpeg' in content_type or 'jpg' in content_type: ext = '.jpg'
280
+ elif 'png' in content_type: ext = '.png'
281
+ elif 'mp4' in content_type: ext = '.mp4'
282
+ elif 'video' in content_type: ext = '.mp4' # Guess mp4 for generic video
283
+ elif 'image' in content_type: ext = '.jpg' # Guess jpg for generic image
284
+ filename = f"media_{int(time.time())}{ext}"
285
+
286
+ # Ensure filename is not excessively long
287
+ if len(filename) > 100:
288
+ name, ext = os.path.splitext(filename)
289
+ filename = name[:95] + ext
290
 
291
 
292
  save_path = os.path.join(save_dir, filename)
293
+ logging.info(f"Saving media to: {save_path}")
294
 
295
  with open(save_path, 'wb') as f:
296
+ for chunk in response.iter_content(chunk_size=8192*4): # Larger chunk size
297
  f.write(chunk)
298
 
299
+ # Verify file size (basic check)
300
+ file_size = os.path.getsize(save_path)
301
+ if file_size < 1024: # Less than 1KB might indicate an issue
302
+ logging.warning(f"⚠️ Downloaded media file size is small ({file_size} bytes). Check file: {save_path}")
303
+
304
+ logging.info(f"✅ Media downloaded successfully ({file_size / 1024:.1f} KB).")
305
  return save_path
306
+ except requests.exceptions.Timeout:
307
+ logging.error(f"❌ Media download timed out: {url}")
308
+ return None
309
  except requests.exceptions.RequestException as e:
310
  logging.error(f"❌ Media download failed: Request error: {e}")
311
  return None
 
315
  return None
316
 
317
  def generate_tts(text, lang, save_dir, segment_index):
318
+ """Generates TTS audio using gTTS."""
319
  filename = f"narration_{segment_index}.mp3"
320
  filepath = os.path.join(save_dir, filename)
321
+ # Clean text for TTS - remove characters that might cause issues
322
+ text = re.sub(r'[\[\]\*#]', '', text) # Remove brackets, asterisks, hash
323
+ text = text.strip()
324
+ if not text:
325
+ logging.error(f"❌ TTS failed for segment {segment_index}: Text is empty after cleaning.")
326
+ return None, 0
327
+
328
+ logging.info(f"Generating TTS for segment {segment_index}: '{text[:60]}...'")
329
 
330
  audio_duration = 0
331
  success = False
332
 
333
+ try:
334
+ logging.info("Attempting TTS generation with gTTS...")
335
+ tts = gTTS(text=text, lang=lang, slow=False) # Use slow=False for normal speed
336
+ tts.save(filepath)
337
+ # Get duration using soundfile as primary method
 
 
 
 
 
 
 
 
 
 
338
  try:
339
+ audio_info = sf.info(filepath)
340
+ audio_duration = audio_info.duration
341
+ if audio_duration < 0.1: # Check for invalid duration from sf.info
342
+ raise ValueError("Soundfile reported near-zero duration")
343
+ except Exception as e_dur_sf:
344
+ logging.warning(f"⚠️ Could not get accurate duration using soundfile ({e_dur_sf}). Trying pydub...")
345
+ try:
346
+ # Ensure file is written before pydub tries to read
347
+ time.sleep(0.1)
348
+ audio_seg = AudioSegment.from_mp3(filepath)
349
+ audio_duration = len(audio_seg) / 1000.0
350
+ except Exception as e_dur_pd:
351
+ logging.error(f"❌ Failed to get duration with pydub as well ({e_dur_pd}). Estimating duration.")
352
+ # Estimate duration based on words (rough fallback)
353
+ words_per_minute = 140 # Adjusted estimate
354
+ num_words = len(text.split())
355
+ audio_duration = max(1.0, (num_words / words_per_minute) * 60) # Ensure at least 1 second
356
+
357
+ # Final duration sanity check
358
+ if audio_duration < 0.5:
359
+ logging.warning(f"⚠️ Calculated audio duration is very short ({audio_duration:.2f}s). Setting minimum to 1.0s.")
360
+ audio_duration = 1.0
361
+
362
+
363
+ logging.info(f"✅ gTTS generated successfully ({audio_duration:.2f}s).")
364
+ success = True
365
+ except gTTS.gTTSError as e_gtts:
366
+ logging.error(f"❌ gTTS API Error: {e_gtts}")
367
+ success = False
368
+ except Exception as e:
369
+ logging.error(f"❌ gTTS failed with unexpected error: {e}")
370
+ traceback.print_exc()
371
+ success = False
372
 
373
  return filepath if success else None, audio_duration if success else 0
374
 
375
+
376
  def resize_media_to_fill(clip, target_size):
377
  """Resizes a MoviePy clip (video or image) to fill the target size, cropping if necessary."""
 
378
  target_w, target_h = target_size
379
+ if target_w == 0 or target_h == 0:
380
+ logging.error("Target size cannot have zero dimensions.")
381
+ return clip # Return original clip
382
+
383
  target_aspect = target_w / target_h
384
 
385
+ # Ensure clip has size attribute
386
+ if not hasattr(clip, 'size'):
387
+ logging.error("Input clip does not have 'size' attribute.")
388
+ return clip
389
  clip_w, clip_h = clip.size
390
+ if clip_w == 0 or clip_h == 0:
391
+ logging.warning("Input clip has zero dimensions. Cannot resize.")
392
+ # Return a black clip of target size instead?
393
+ return ColorClip(size=target_size, color=(0,0,0), duration=clip.duration if hasattr(clip, 'duration') else 1)
394
+
395
+
396
  clip_aspect = clip_w / clip_h
397
 
398
  if abs(clip_aspect - target_aspect) < 0.01: # Aspect ratios are close enough
399
+ # Just resize to fit width, height should scale correctly
400
+ return clip.resize(width=target_w)
401
 
402
  if clip_aspect > target_aspect:
403
+ # Clip is wider than target: Resize based on height, then crop width
404
  resized_clip = clip.resize(height=target_h)
405
+ # Calculate crop coordinates
406
+ crop_x_center = resized_clip.w / 2
407
+ crop_x1 = max(0, int(crop_x_center - target_w / 2))
408
+ crop_x2 = min(resized_clip.w, int(crop_x_center + target_w / 2))
409
+ # Adjust width if rounding caused issues
 
 
 
410
  if crop_x2 - crop_x1 != target_w:
411
+ crop_x2 = crop_x1 + target_w
412
+ if crop_x2 > resized_clip.w: # Ensure it doesn't go out of bounds
413
+ crop_x2 = resized_clip.w
414
+ crop_x1 = max(0, crop_x2 - target_w)
415
 
416
+ return resized_clip.fx(vfx.crop, x1=crop_x1, y1=0, width=target_w, height=target_h)
417
  else:
418
+ # Clip is taller than target: Resize based on width, then crop height
419
  resized_clip = clip.resize(width=target_w)
420
+ # Calculate crop coordinates
421
+ crop_y_center = resized_clip.h / 2
422
+ crop_y1 = max(0, int(crop_y_center - target_h / 2))
423
+ crop_y2 = min(resized_clip.h, int(crop_y_center + target_h / 2))
424
+ # Adjust height if rounding caused issues
 
 
 
425
  if crop_y2 - crop_y1 != target_h:
426
  crop_y2 = crop_y1 + target_h
427
+ if crop_y2 > resized_clip.h: # Ensure it doesn't go out of bounds
428
+ crop_y2 = resized_clip.h
429
+ crop_y1 = max(0, crop_y2 - target_h)
430
+
431
+ return resized_clip.fx(vfx.crop, x1=0, y1=crop_y1, width=target_w, height=target_h)
432
 
 
433
 
434
+ def apply_ken_burns(image_clip, duration, target_size, zoom_factor=1.15, direction='zoom_out'):
435
+ """Applies Ken Burns effect (zoom in/out, simple pan) to an ImageClip."""
436
+ if not isinstance(image_clip, ImageClip):
437
+ logging.warning("Ken Burns effect can only be applied to ImageClips.")
438
+ return image_clip.set_duration(duration) # Just set duration if not image
439
 
440
+ # Ensure the input clip already matches the target size (or resize it)
 
 
441
  if image_clip.size != target_size:
442
+ logging.info("Applying Ken Burns: Resizing image to fill target size first.")
443
  image_clip = resize_media_to_fill(image_clip, target_size)
444
 
445
+ # Make sure the base clip has the correct duration before applying effects
446
+ image_clip = image_clip.set_duration(duration)
447
+
448
+ img_w, img_h = image_clip.size
449
+
450
  # Define the resize function based on time `t`
451
  def resize_func(t):
452
+ if direction == 'zoom_out':
453
+ # Zoom out: start at zoom_factor, end at 1.0
454
+ current_zoom = 1 + (zoom_factor - 1) * (1 - t / duration)
455
+ elif direction == 'zoom_in':
456
+ # Zoom in: start at 1.0, end at zoom_factor
457
+ current_zoom = 1 + (zoom_factor - 1) * (t / duration)
458
+ else: # No zoom
459
+ current_zoom = 1.0
460
  return current_zoom
461
 
462
+ # Apply zoom effect
 
463
  zoomed_clip = image_clip.fx(vfx.resize, resize_func)
464
+
465
+ # Simple Pan (optional, can be randomized)
466
+ # Example: Pan slightly horizontally
467
+ pan_intensity = 0.05 # Fraction of width/height to pan
468
+ start_x_offset = 0
469
+ end_x_offset = pan_intensity * img_w * random.choice([-1, 1]) # Pan left or right
470
+ start_y_offset = 0
471
+ end_y_offset = pan_intensity * img_h * random.choice([-1, 1]) # Pan up or down
472
+
473
+ def position_func(t):
474
+ current_x = start_x_offset + (end_x_offset - start_x_offset) * (t / duration)
475
+ current_y = start_y_offset + (end_y_offset - start_y_offset) * (t / duration)
476
+ # Position is relative to the zoomed clip's center
477
+ center_x = zoomed_clip.w / 2 - current_x
478
+ center_y = zoomed_clip.h / 2 - current_y
479
+ return (center_x - target_size[0]/2, center_y - target_size[1]/2) # Top-left corner for crop
480
+
481
+ # Apply cropping based on the calculated position
482
+ # Use a function for position to simulate pan
483
+ final_clip = zoomed_clip.fx(vfx.crop, x1=lambda t: position_func(t)[0], y1=lambda t: position_func(t)[1], width=target_size[0], height=target_size[1])
484
 
485
  return final_clip.set_duration(duration)
486
 
487
 
488
+ def find_font(preferred_font=DEFAULT_FONT):
489
+ """Tries to find a usable font file."""
490
+ # 1. Check if preferred font exists directly (e.g., uploaded)
491
+ if os.path.exists(preferred_font):
492
+ logging.info(f"Using specified font: {preferred_font}")
493
+ return preferred_font
494
+
495
+ # 2. Common system font paths (Linux)
496
+ font_paths = [
497
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
498
+ "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
499
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
500
+ "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # If installed
501
+ # Add more paths if needed
502
+ ]
503
+ for path in font_paths:
504
+ if os.path.exists(path):
505
+ logging.info(f"Found system font: {path}")
506
+ return path
507
+
508
+ # 3. Use MoviePy's default if nothing else is found
509
+ logging.warning(f"Could not find specified font '{preferred_font}' or common system fonts. Relying on MoviePy's default.")
510
+ return None # Let MoviePy use its internal default
511
+
512
+
513
+ def create_caption_clip(text, duration, clip_size, font_size=None, font_path=None, color='white', stroke_color='black', stroke_width=1.5, position=('center', 'bottom'), margin_percent=5):
514
+ """Creates a MoviePy TextClip for captions with wrapping and background."""
515
  width, height = clip_size
516
+ max_text_width = width * 0.85 # Allow text to occupy 85% of the width
517
+ margin = int(height * (margin_percent / 100)) # Margin based on percentage of height
518
 
519
  if font_size is None:
520
+ font_size = max(20, int(height / 28)) # Dynamic font size based on height
521
+
522
+ actual_font_path = find_font(font_path or DEFAULT_FONT)
523
 
524
+ # Use Pillow for reliable text wrapping (MoviePy's can be inconsistent)
525
  try:
526
+ pil_font = ImageFont.truetype(actual_font_path, font_size) if actual_font_path else ImageFont.load_default()
 
527
  except IOError:
528
+ logging.warning(f"Failed to load font '{actual_font_path}' with Pillow. Using default.")
529
+ pil_font = ImageFont.load_default()
530
+ font_size = 18 # Reset font size if using default
531
 
532
  words = text.split()
533
  lines = []
534
  current_line = ""
 
535
 
536
+ # Simple greedy word wrapping using Pillow's textlength
537
  for word in words:
538
  test_line = f"{current_line} {word}".strip()
539
+ # Use textlength for more accurate width calculation
540
+ line_width = pil_font.getlength(test_line)
541
+ if line_width <= max_text_width:
542
  current_line = test_line
543
  else:
544
  if current_line: # Add the previous line if it wasn't empty
545
  lines.append(current_line)
546
  current_line = word # Start new line with the current word
547
  # Handle case where a single word is too long
548
+ if pil_font.getlength(current_line) > max_text_width:
549
+ logging.warning(f"Word '{current_line}' might be too long for caption width.")
550
+ # Basic split for very long words (optional)
551
+ # while pil_font.getlength(current_line) > max_text_width:
552
+ # for i in range(len(current_line)-1, 0, -1):
553
+ # if pil_font.getlength(current_line[:i]) <= max_text_width:
554
+ # lines.append(current_line[:i] + '-')
555
+ # current_line = current_line[i:]
556
+ # break
557
+ # else: # Cannot split further
558
+ # break # Avoid infinite loop
559
 
560
  if current_line: # Add the last line
561
  lines.append(current_line)
 
568
  wrapped_text,
569
  fontsize=font_size,
570
  color=color,
571
+ font=actual_font_path if actual_font_path else 'Arial', # Provide a common fallback font name
572
  stroke_color=stroke_color,
573
  stroke_width=stroke_width,
574
+ method='caption', # Use caption method for potential internal wrapping
575
  size=(int(max_text_width), None), # Constrain width for wrapping
576
  align='center'
577
  )
578
+
579
+ # Add a semi-transparent background for better readability
580
+ bg_color = (0, 0, 0) # Black background
581
+ bg_opacity = 0.5
582
+ # Create a ColorClip slightly larger than the text
583
+ txt_width, txt_height = caption.size
584
+ bg_padding = int(font_size * 0.3) # Padding around text
585
+ bg_clip = ColorClip(
586
+ size=(txt_width + 2 * bg_padding, txt_height + 2 * bg_padding),
587
+ color=bg_color,
588
+ ismask=False,
589
+ duration=duration
590
+ ).set_opacity(bg_opacity)
591
+
592
+ # Composite text on background
593
+ caption_with_bg = CompositeVideoClip([
594
+ bg_clip.set_position('center'),
595
+ caption.set_position('center')
596
+ ], size=bg_clip.size).set_duration(duration)
597
+
598
+
599
  except Exception as e:
600
+ logging.error(f"Error creating TextClip (maybe font issue?): {e}. Using simple TextClip.")
601
+ traceback.print_exc()
602
+ # Fallback to simpler TextClip without stroke/bg if needed
603
+ caption_with_bg = TextClip(wrapped_text, fontsize=font_size, color=color, method='caption', size=(int(max_text_width), None), align='center').set_duration(duration)
604
 
605
 
606
  # Set position with margin
607
+ # MoviePy position can be tricky, calculate manually
608
+ final_pos = ['center', 'center'] # Default
609
+ caption_w, caption_h = caption_with_bg.size
610
 
611
+ if isinstance(position, tuple) or isinstance(position, list):
612
+ pos_x, pos_y = position
613
+ # Horizontal positioning
614
+ if 'left' in pos_x: final_pos[0] = margin
615
+ elif 'right' in pos_x: final_pos[0] = width - caption_w - margin
616
+ else: final_pos[0] = (width - caption_w) / 2 # Center default
617
+ # Vertical positioning
618
+ if 'top' in pos_y: final_pos[1] = margin
619
+ elif 'bottom' in pos_y: final_pos[1] = height - caption_h - margin
620
+ else: final_pos[1] = (height - caption_h) / 2 # Center default
621
 
622
+ # Ensure positions are integers
623
+ final_pos = (int(final_pos[0]), int(final_pos[1]))
624
+
625
+ caption_with_bg = caption_with_bg.set_position(final_pos).set_duration(duration)
626
+ return caption_with_bg
627
 
628
 
629
  def create_clip(media_path, media_type, audio_path, audio_duration, target_size, add_captions, narration_text, segment_index):
630
  """Creates a single video clip from media, audio, and optional captions."""
631
+ logging.info(f"--- Creating Clip {segment_index + 1} ---")
632
+ logging.info(f"Type: {media_type}, Audio Duration: {audio_duration:.2f}s, Target Size: {target_size}")
633
+ main_clip = None
634
+ audio_clip = None
635
 
636
  try:
637
+ # --- Load Audio ---
638
+ logging.info("Loading audio...")
639
  audio_clip = AudioFileClip(audio_path)
640
+ # Verify audio duration and clamp if necessary
641
+ if abs(audio_clip.duration - audio_duration) > 0.2: # Allow slightly larger diff
642
+ logging.warning(f"Audio file duration ({audio_clip.duration:.2f}s) differs significantly from expected ({audio_duration:.2f}s). Using file duration.")
643
  audio_duration = audio_clip.duration
644
+ # Ensure minimum duration
645
+ if audio_duration < 0.5:
646
+ logging.warning(f"Audio duration is very short ({audio_duration:.2f}s). Clamping to 0.5s.")
647
  audio_duration = 0.5
648
+ # Trim audio clip precisely
649
  audio_clip = audio_clip.subclip(0, audio_duration)
650
+ logging.info("Audio loaded.")
651
 
652
+ # --- Load Media (Video, Image, or Color) ---
 
653
  if media_type == 'video':
654
+ logging.info(f"Loading video: {media_path}")
655
  try:
656
+ # Load with target resolution hint, disable audio from video file
657
+ video_clip_raw = VideoFileClip(media_path, audio=False, target_resolution=(target_size[1], target_size[0]))
658
+
659
  # Trim or loop video to match audio duration
660
+ if video_clip_raw.duration >= audio_duration:
661
+ video_clip_timed = video_clip_raw.subclip(0, audio_duration)
662
  else:
663
+ # Loop the video if it's shorter
664
+ logging.info(f"Video duration ({video_clip_raw.duration:.2f}s) shorter than audio ({audio_duration:.2f}s). Looping video.")
665
+ # Use fx.loop for cleaner looping
666
+ video_clip_timed = video_clip_raw.fx(vfx.loop, duration=audio_duration)
667
+ # Fallback: manual concatenate (less efficient)
668
+ # num_loops = math.ceil(audio_duration / video_clip_raw.duration)
669
+ # video_clip_timed = concatenate_videoclips([video_clip_raw] * num_loops).subclip(0, audio_duration)
670
+
671
+ main_clip = resize_media_to_fill(video_clip_timed, target_size)
672
+ logging.info("Video processed.")
673
+ # Clean up raw clip explicitly? Moviepy should handle it, but just in case
674
+ # video_clip_raw.close() # Might cause issues if timed clip still references it
675
 
676
  except Exception as e:
677
+ logging.error(f"❌ Failed to load/process video '{media_path}': {e}. Using black clip.")
678
+ traceback.print_exc()
679
  main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
680
 
681
  elif media_type == 'image':
682
+ logging.info(f"Loading image: {media_path}")
683
  try:
 
684
  img_clip_base = ImageClip(media_path)
685
+ # Apply Ken Burns effect (includes resizing and duration setting)
686
+ main_clip = apply_ken_burns(img_clip_base, audio_duration, target_size)
687
+ logging.info("Image processed with Ken Burns effect.")
688
  except Exception as e:
689
+ logging.error(f"❌ Failed to load/process image '{media_path}': {e}. Using black clip.")
690
+ traceback.print_exc()
691
  main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
692
+
693
+ else: # Includes 'color' type or any unexpected type
694
+ logging.info(f"Media type is '{media_type}'. Using black background.")
695
  main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
696
 
697
+ # --- Combine Video/Image and Audio ---
698
+ if main_clip and audio_clip:
699
+ # Ensure main_clip has correct duration before setting audio
700
+ main_clip = main_clip.set_duration(audio_duration)
701
+ main_clip = main_clip.set_audio(audio_clip)
702
+ logging.info("Audio attached to visual clip.")
703
+ elif main_clip:
704
+ logging.warning("Audio clip was not loaded successfully. Video will be silent.")
705
+ main_clip = main_clip.set_duration(audio_duration)
706
+ else:
707
+ logging.error("❌ Failed to create main visual clip. Skipping segment.")
708
+ if audio_clip: audio_clip.close()
709
+ return None
710
+
711
+
712
+ # --- Add Captions (if enabled) ---
713
+ final_composite_clip = main_clip # Start with the main clip
714
 
 
715
  if add_captions and narration_text:
716
+ logging.info("Adding captions...")
717
+ try:
718
+ caption_clip = create_caption_clip(
719
+ narration_text,
720
+ audio_duration,
721
+ target_size,
722
+ font_path=DEFAULT_FONT # Pass the default font path
723
+ )
724
+ # Composite caption on top of the main clip
725
+ final_composite_clip = CompositeVideoClip([main_clip, caption_clip], size=target_size)
726
+ logging.info("Captions added.")
727
+ except Exception as e:
728
+ logging.error(f"❌ Failed to create or composite captions: {e}")
729
+ traceback.print_exc()
730
+ # Proceed without captions if creation failed
731
+ final_composite_clip = main_clip
732
  else:
733
+ logging.info("Captions disabled or no narration text.")
734
 
735
+
736
+ logging.info(f"✅ Clip {segment_index + 1} created successfully.")
737
+ # Return the final composited clip (with or without captions)
738
+ return final_composite_clip
739
 
740
  except Exception as e:
741
+ logging.error(f"❌ Failed to create clip {segment_index + 1}: {e}")
742
  traceback.print_exc()
743
+ # Ensure cleanup on error
744
+ if main_clip and hasattr(main_clip, 'close'): main_clip.close()
745
+ if audio_clip and hasattr(audio_clip, 'close'): audio_clip.close()
746
  return None
747
 
748
+
749
  def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
750
  """Adds background music to the final video clip."""
751
  if not os.path.exists(music_file):
752
  logging.warning(f"Background music file '{music_file}' not found. Skipping BGM.")
753
  return video_clip
754
+ # Check if file is empty or too small
755
+ if os.path.getsize(music_file) < 1024:
756
+ logging.warning(f"Background music file '{music_file}' is very small. Skipping BGM.")
757
+ return video_clip
758
+
759
 
760
  logging.info(f"Adding background music from {music_file}")
761
+ bgm_clip = None
762
+ original_audio = video_clip.audio # Get existing audio first
763
+
764
  try:
765
  bgm_clip = AudioFileClip(music_file)
766
  video_duration = video_clip.duration
767
 
768
+ # Loop or trim BGM
769
  if bgm_clip.duration < video_duration:
770
+ logging.info(f"Looping BGM (duration {bgm_clip.duration:.2f}s) for video ({video_duration:.2f}s)")
771
+ bgm_clip = bgm_clip.fx(afx.audio_loop, duration=video_duration)
 
 
 
772
  else:
773
  bgm_clip = bgm_clip.subclip(0, video_duration)
774
 
 
776
  bgm_clip = bgm_clip.volumex(volume)
777
 
778
  # Combine with existing audio
 
779
  if original_audio:
780
+ logging.info("Combining narration audio with BGM.")
781
+ # Ensure original audio has same duration as video clip for composite
782
+ if abs(original_audio.duration - video_duration) > 0.1:
783
+ logging.warning("Original audio duration doesn't match video, trimming/padding original audio.")
784
+ # This shouldn't happen if clips were created correctly, but as a safeguard:
785
+ original_audio = original_audio.subclip(0, video_duration) # Trim if longer
786
+ # Padding if shorter is harder, CompositeAudioClip might handle it
787
+
788
  combined_audio = CompositeAudioClip([original_audio, bgm_clip])
789
  else:
 
790
  logging.warning("Video clip has no primary audio. Adding BGM only.")
791
  combined_audio = bgm_clip
792
 
793
+ video_clip_with_bgm = video_clip.set_audio(combined_audio)
794
  logging.info("✅ Background music added.")
795
+
796
+ # Close intermediate clips AFTER successful composition
797
+ # bgm_clip.close() # CompositeAudioClip might still need it? Test this.
798
+ # if original_audio: original_audio.close() # Same potential issue
799
+
800
+ return video_clip_with_bgm
801
 
802
  except Exception as e:
803
  logging.error(f"❌ Failed to add background music: {e}")
804
  traceback.print_exc()
805
+ # Clean up BGM clip if it was loaded
806
+ if bgm_clip and hasattr(bgm_clip, 'close'): bgm_clip.close()
807
+ # Return original clip without BGM on failure
808
+ return video_clip
809
 
810
 
811
  # --- Main Gradio Function ---
812
+ def generate_video_process(topic, resolution_choice, add_captions_option, add_bgm_option, progress=gr.Progress(track_tqdm=True)):
813
  """The main function called by Gradio to generate the video."""
814
  start_time = time.time()
815
+ status_log = ["--- Starting Video Generation ---"]
816
  temp_dir = None
817
  final_video_path = None
818
+ clips = [] # Keep track of created clips for cleanup
819
 
820
+ # --- Setup ---
821
  try:
822
  temp_dir = tempfile.mkdtemp(prefix=TEMP_FOLDER_BASE + "_")
823
+ status_log.append(f"Temporary directory created: {temp_dir}")
824
  logging.info(f"Using temp directory: {temp_dir}")
825
 
826
+ target_size = (1920, 1080) if resolution_choice == "Full HD (16:9)" else (1080, 1920) # W, H
827
+ pexels_orientation = "landscape" if resolution_choice == "Full HD (16:9)" else "portrait"
828
+ status_log.append(f"⚙️ Target resolution: {target_size[0]}x{target_size[1]}")
829
+ status_log.append(f"⚙️ Pexels orientation: {pexels_orientation}")
830
+ status_log.append(f"⚙️ Add Captions: {add_captions_option}")
831
+ status_log.append(f"⚙️ Add BGM: {add_bgm_option}")
832
+
833
  # --- 1. Generate Script ---
834
  progress(0.1, desc="Generating script...")
835
+ status_log.append("\n🔄 Generating script...")
836
  script = generate_script(topic, OPENROUTER_API_KEY, OPENROUTER_MODEL)
837
  if not script:
838
+ status_log.append("❌ Script generation failed. Check API key, model, and connection.")
839
+ # No cleanup needed yet, just return
840
  return "\n".join(status_log), None
841
  status_log.append("✅ Script generated.")
842
+ # logging.debug(f"Raw Script:\n{script}") # Log full script for debugging
843
 
844
  # --- 2. Parse Script ---
845
  progress(0.2, desc="Parsing script...")
846
+ status_log.append("\n🔄 Parsing script...")
847
  elements = parse_script(script)
848
+ if not elements or len(elements) < 2:
849
+ status_log.append("❌ Script parsing failed. Check script format from LLM.")
850
  return "\n".join(status_log), None
851
  num_segments = len(elements) // 2
852
  status_log.append(f"✅ Script parsed into {num_segments} segments.")
853
 
854
+ # --- 3. Process Segments ---
855
+ total_duration = 0
 
 
 
856
  for i in range(0, len(elements), 2):
857
  segment_index = i // 2
858
+ progress_val = 0.2 + (0.6 * (segment_index / num_segments))
859
+ progress(progress_val, desc=f"Processing segment {segment_index + 1}/{num_segments}")
860
+
861
+ # Check if elements exist before accessing
862
+ if i + 1 >= len(elements):
863
+ logging.warning(f"⚠️ Found scene element at index {i} but no corresponding narration. Skipping.")
864
+ continue
865
 
866
  scene_elem = elements[i]
867
  narration_elem = elements[i+1]
868
+
869
+ # Validate element types (optional but good practice)
870
+ if scene_elem.get("type") != "scene" or narration_elem.get("type") != "narration":
871
+ logging.warning(f"⚠️ Unexpected element types at index {i}/{i+1}. Skipping segment.")
872
+ continue
873
+
874
+ scene_prompt = scene_elem.get('prompt', '').strip()
875
+ narration_text = narration_elem.get('text', '').strip()
876
+
877
+ if not scene_prompt or not narration_text:
878
+ logging.warning(f"⚠️ Segment {segment_index + 1} has empty scene prompt or narration. Skipping.")
879
+ status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments}: SKIPPED (Empty prompt/narration) ---")
880
+ continue
881
 
882
  status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments} ---")
883
+ status_log.append(f"📝 Scene: {scene_prompt}")
884
+ status_log.append(f"🗣️ Narration: {narration_text[:100]}...")
885
 
886
  # 3a. Generate TTS
887
+ status_log.append("🔄 Generating narration...")
888
  tts_path, tts_duration = generate_tts(narration_text, 'en', temp_dir, segment_index)
889
+ if not tts_path or tts_duration <= 0.1:
890
+ status_log.append(f"⚠️ TTS failed. Skipping segment.")
891
  logging.warning(f"Skipping segment {segment_index+1} due to TTS failure.")
892
  continue
893
+ status_log.append(f"✅ Narration generated ({tts_duration:.2f}s)")
894
+ total_duration += tts_duration
895
 
896
+ # 3b. Search & Download Media
897
+ status_log.append("🔄 Finding media...")
898
  media_path = None
899
  media_type = None
900
 
901
+ # Try Pexels Video first with correct orientation
902
+ video_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="videos", orientation=pexels_orientation)
903
  if video_results:
904
  selected_media = random.choice(video_results)
905
+ status_log.append(f"⬇️ Downloading Pexels video...")
906
  media_path = download_media(selected_media['url'], temp_dir)
907
  if media_path:
908
  media_type = 'video'
909
+ status_log.append(f"✅ Video downloaded.")
910
  else:
911
  status_log.append("⚠️ Video download failed.")
912
 
913
+ # Try Pexels Image if video fails/not found (correct orientation)
914
  if not media_path:
915
+ status_log.append("🔄 No suitable video. Searching images...")
916
+ image_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="photos", orientation=pexels_orientation)
917
  if image_results:
918
  selected_media = random.choice(image_results)
919
+ status_log.append(f"⬇️ Downloading Pexels image...")
920
  media_path = download_media(selected_media['url'], temp_dir)
921
  if media_path:
922
  media_type = 'image'
923
+ status_log.append(f"✅ Image downloaded.")
924
  else:
925
  status_log.append("⚠️ Image download failed.")
926
 
927
+ # Fallback: Black screen
928
  if not media_path:
929
+ status_log.append(f"⚠️ No media found for '{scene_prompt}'. Using black screen.")
930
+ media_type = 'color'
931
+ media_path = None # No path needed
932
 
933
  # 3c. Create Clip
934
+ status_log.append(f"🎬 Creating clip...")
935
  clip = create_clip(
936
+ media_path=media_path,
937
  media_type=media_type,
938
  audio_path=tts_path,
939
  audio_duration=tts_duration,
 
945
 
946
  if clip:
947
  clips.append(clip)
948
+ status_log.append(f"✅ Clip created.")
949
  else:
950
+ status_log.append(f"❌ Failed to create clip. Skipping segment.")
951
  logging.error(f"Failed to create clip {segment_index+1}, skipping.")
952
 
953
 
954
  if not clips:
955
  status_log.append("\n❌ No valid clips were created. Cannot generate video.")
956
+ # No cleanup needed beyond temp dir removal in finally block
957
  return "\n".join(status_log), None
958
 
959
+ status_log.append(f"\n✅ Successfully created {len(clips)} video clips.")
960
+ status_log.append(f"⏱️ Estimated total video duration: {total_duration:.2f} seconds.")
961
+
962
  # --- 4. Concatenate Clips ---
963
  progress(0.85, desc="Combining video clips...")
964
  status_log.append("\n🔄 Combining video clips...")
965
+ final_clip = None # Define final_clip before try block
966
  try:
967
+ # Use method="compose" - might be better for clips with varying sources/codecs
968
+ final_clip = concatenate_videoclips(clips, method="compose", padding = -0.1) # Small overlap?
969
  status_log.append("✅ Clips combined successfully.")
970
  except Exception as e:
971
  status_log.append(f"❌ Error concatenating clips: {e}")
972
  logging.error(f"Concatenation failed: {e}")
973
  traceback.print_exc()
974
+ # Ensure final_clip is None if concatenation fails
975
+ final_clip = None
976
+ # Fall through to finally block for cleanup
 
 
977
 
978
  # --- 5. Add Background Music (Optional) ---
979
+ if final_clip and add_bgm_option:
980
  progress(0.9, desc="Adding background music...")
981
+ status_log.append("\n🔄 Adding background music...")
982
  final_clip = add_background_music(final_clip, music_file=BGM_FILE, volume=BGM_VOLUME)
983
+ # Status logged within the function
984
 
985
  # --- 6. Write Final Video ---
986
+ if final_clip:
987
+ progress(0.95, desc="Writing final video file...")
988
+ status_log.append("\n💾 Writing final video file (this may take time)...")
989
+ output_path = os.path.join(temp_dir, OUTPUT_VIDEO_FILENAME)
990
+ writer_logger = logging.getLogger("moviepy_writer")
991
+ writer_logger.setLevel(logging.WARNING) # Reduce moviepy verbosity during write
992
+
993
+ try:
994
+ final_clip.write_videofile(
995
+ output_path,
996
+ codec='libx264',
997
+ audio_codec='aac',
998
+ temp_audiofile=os.path.join(temp_dir, 'temp_audio.aac'), # Explicit temp audio file
999
+ remove_temp=True,
1000
+ preset='medium', # 'medium' is good balance, 'fast' or 'ultrafast' for speed
1001
+ fps=24,
1002
+ threads=max(1, os.cpu_count() // 2), # Use half available cores
1003
+ logger=None # Use None or 'bar', avoid default verbose logger
1004
+ )
1005
+ status_log.append(f" Final video saved: {os.path.basename(output_path)}")
1006
+ final_video_path = output_path # Set the path to be returned
1007
+ except Exception as e:
1008
+ status_log.append(f"❌ Error writing final video file: {e}")
1009
+ logging.error(f"Final video write failed: {e}")
1010
+ traceback.print_exc()
1011
+ final_video_path = None # Ensure no path is returned on failure
1012
+ else:
1013
+ status_log.append("\n❌ Skipping final video write because clip combination failed.")
1014
+ final_video_path = None
 
 
 
1015
 
1016
 
1017
  except Exception as e:
1018
+ status_log.append(f"\n❌ An critical error occurred during video generation: {e}")
1019
+ logging.error("An critical error occurred in generate_video_process:")
1020
  logging.error(traceback.format_exc())
1021
  final_video_path = None # Ensure failure state
1022
 
1023
  finally:
1024
  # --- 7. Cleanup ---
1025
+ status_log.append("\n🧹 Cleaning up resources...")
1026
+ # Close all individual clips first
1027
+ for i, clip in enumerate(clips):
1028
+ try:
1029
+ if clip: clip.close()
1030
+ logging.debug(f"Closed clip {i+1}")
1031
+ except Exception as e_close:
1032
+ logging.warning(f"Error closing clip {i+1}: {e_close}")
1033
+ # Close the final concatenated clip if it exists
1034
+ try:
1035
+ if final_clip: final_clip.close()
1036
+ logging.debug("Closed final clip")
1037
+ except Exception as e_final_close:
1038
+ logging.warning(f"Error closing final clip: {e_final_close}")
1039
+
1040
+ # Remove the temporary directory
1041
  if temp_dir and os.path.exists(temp_dir):
1042
  try:
1043
+ # Add retries for shutil.rmtree on potential lingering file handles
1044
+ attempts = 3
1045
+ for attempt in range(attempts):
1046
+ try:
1047
+ shutil.rmtree(temp_dir)
1048
+ status_log.append(f" Temporary directory removed: {os.path.basename(temp_dir)}")
1049
+ logging.info(f"Cleaned up temp directory: {temp_dir}")
1050
+ break # Success
1051
+ except OSError as e_rm:
1052
+ if attempt < attempts - 1:
1053
+ logging.warning(f"Attempt {attempt+1} failed to remove temp dir {temp_dir}: {e_rm}. Retrying in 1s...")
1054
+ time.sleep(1)
1055
+ else:
1056
+ raise # Raise the error on the last attempt
1057
+ except Exception as e_clean:
1058
+ status_log.append(f"⚠️ Error cleaning up temporary directory {temp_dir}: {e_clean}")
1059
+ logging.error(f"Cleanup failed for {temp_dir}: {e_clean}")
1060
+ else:
1061
+ status_log.append("ℹ️ No temporary directory to remove or already removed.")
1062
+
1063
 
1064
  end_time = time.time()
1065
  total_time = end_time - start_time
1066
+ status_log.append(f"\n--- Generation Finished ---")
1067
+ status_log.append(f"⏱️ Total time: {total_time:.2f} seconds")
1068
 
1069
  progress(1.0, desc="Finished!")
1070
  return "\n".join(status_log), final_video_path
1071
 
1072
 
1073
  # --- Gradio Interface Definition ---
1074
+ with gr.Blocks(css="footer {display: none !important}") as iface: # Hide Gradio footer
1075
+ gr.Markdown("# 🤖 AI Documentary Generator v2")
1076
+ gr.Markdown("Enter a topic, choose settings, and let AI create a short video. Uses OpenRouter for script, Pexels for media, gTTS for narration, and MoviePy for assembly.")
1077
 
1078
  with gr.Row():
1079
  with gr.Column(scale=1):
1080
  topic_input = gr.Textbox(
1081
  label="Video Topic",
1082
+ placeholder="e.g., The History of Coffee, Secrets of the Deep Ocean",
1083
  lines=2
1084
  )
1085
  resolution_input = gr.Radio(
 
1087
  choices=["Short (9:16)", "Full HD (16:9)"],
1088
  value="Short (9:16)"
1089
  )
1090
+ captions_input = gr.Checkbox(label="Add Captions (with background)", value=True)
1091
+
1092
+ # Check for BGM file and enable checkbox accordingly
1093
+ bgm_exists = os.path.exists(BGM_FILE) and os.path.getsize(BGM_FILE) > 1024
1094
+ bgm_label = f"Add Background Music ({os.path.basename(BGM_FILE)})" if bgm_exists else f"Add Background Music (File '{BGM_FILE}' not found or empty)"
1095
+ bgm_input = gr.Checkbox(label=bgm_label, value=bgm_exists, interactive=bgm_exists)
1096
+
1097
 
1098
+ generate_button = gr.Button("Generate Video", variant="primary")
1099
 
1100
  with gr.Column(scale=2):
1101
+ status_output = gr.Textbox(label="📜 Status Log", lines=20, interactive=False, autoscroll=True)
1102
+ video_output = gr.Video(label="🎬 Generated Video")
1103
 
1104
  generate_button.click(
1105
  fn=generate_video_process,
 
1109
 
1110
  gr.Examples(
1111
  examples=[
1112
+ ["The lifecycle of a monarch butterfly", "Short (9:16)", True, True],
1113
+ ["The construction of the Eiffel Tower", "Full HD (16:9)", True, False],
1114
+ ["The impact of renewable energy sources", "Short (9:16)", True, True],
1115
+ ["A brief history of the internet", "Full HD (16:9)", True, True],
1116
  ],
1117
+ inputs=[topic_input, resolution_input, captions_input, bgm_input],
1118
+ label="Example Topics"
1119
  )
1120
 
1121
  # --- Launch the App ---
1122
  if __name__ == "__main__":
1123
+ # Create a silent placeholder BGM file if needed and BGM checkbox requires it
1124
+ if not os.path.exists(BGM_FILE) or os.path.getsize(BGM_FILE) < 1024:
1125
+ logging.warning(f"Background music file '{BGM_FILE}' not found or empty. Creating a silent placeholder.")
 
 
 
 
 
 
 
 
1126
  try:
 
1127
  silent_segment = AudioSegment.silent(duration=1000) # 1 second silence
1128
  silent_segment.export(BGM_FILE, format="mp3")
1129
  logging.info(f"Created silent placeholder BGM file: {BGM_FILE}")
 
1131
  logging.error(f"Could not create placeholder BGM file: {e}")
1132
 
1133
 
1134
+ # Check for API keys (already hardcoded, but good practice)
1135
+ if not PEXELS_API_KEY or len(PEXELS_API_KEY) < 50: # Basic length check
1136
+ logging.warning("PEXELS_API_KEY seems invalid or missing.")
1137
+ if not OPENROUTER_API_KEY or not OPENROUTER_API_KEY.startswith("sk-or-v1-"):
1138
+ logging.warning("OPENROUTER_API_KEY seems invalid or missing.")
 
 
 
 
 
 
 
 
 
1139
 
1140
+ # Launch Gradio app
1141
+ iface.queue().launch(debug=False, share=False) # Use queue for handling multiple requests, disable debug/share for production