testdeep123 commited on
Commit
40088c2
·
verified ·
1 Parent(s): e7c93cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +885 -785
app.py CHANGED
@@ -1,173 +1,118 @@
1
-
2
-
3
  # Import necessary libraries
4
- from kokoro import KPipeline
5
 
6
  import soundfile as sf
7
  import torch
8
-
9
- import soundfile as sf
10
  import os
11
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
12
- from PIL import Image
13
  import tempfile
14
  import random
15
- import cv2
16
  import math
17
- import os, requests, io, time, re, random
18
- from moviepy.editor import (
19
- VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
20
- CompositeVideoClip, TextClip, CompositeAudioClip
21
- )
22
- import gradio as gr
23
  import shutil
24
- import os
25
- import moviepy.video.fx.all as vfx
26
- import moviepy.config as mpy_config
27
- from pydub import AudioSegment
28
- from pydub.generators import Sine
29
-
30
- from PIL import Image, ImageDraw, ImageFont
31
  import numpy as np
32
  from bs4 import BeautifulSoup
33
  import base64
34
- from urllib.parse import quote
35
- import pysrt
36
  from gtts import gTTS
37
- import gradio as gr # Import Gradio
 
 
38
 
39
- # Initialize Kokoro TTS pipeline (using American English)
40
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
41
- # Ensure ImageMagick binary is set
42
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 
 
 
 
 
 
 
43
 
44
  # ---------------- Global Configuration ---------------- #
 
45
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
46
  OPENROUTER_API_KEY = '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'
47
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
48
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 
49
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
50
 
51
-
52
-
53
- # Additional global variables needed for the Gradio interface
54
- selected_voice = 'af_heart' # Default voice
55
- voice_speed = 0.9 # Default voice speed
56
- font_size = 45 # Default font size
57
- video_clip_probability = 0.25 # Default probability for video clips
58
- bg_music_volume = 0.08 # Default background music volume
59
- fps = 30 # Default FPS
60
- preset = "veryfast" # Default preset
61
- TARGET_RESOLUTION = None
62
- CAPTION_COLOR = None
63
- TEMP_FOLDER = None
64
-
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # ---------------- Helper Functions ---------------- #
67
- # (Your existing helper functions remain unchanged: generate_script, parse_script,
68
- # search_pexels_videos, search_pexels_images, search_google_images, download_image,
69
- # download_video, generate_media, generate_tts, apply_kenburns_effect,
70
- # resize_to_fill, find_mp3_files, add_background_music, create_clip,
71
- # fix_imagemagick_policy)
72
-
73
- # Define these globally as they were in your original code but will be set per run
74
- TARGET_RESOLUTION = None
75
- CAPTION_COLOR = None
76
- TEMP_FOLDER = None
77
 
78
  def generate_script(user_input):
79
- """Generate documentary script with proper OpenRouter handling."""
80
  headers = {
81
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
82
- 'HTTP-Referer': 'https://your-domain.com',
83
- 'X-Title': 'AI Documentary Maker'
 
 
84
  }
85
 
86
- prompt = f"""Short Documentary Script GeneratorInstructions:
87
 
88
- If I say "use this," just output the script exactly as I gave it.
89
- If I only give topics, generate a script based on them.
90
- If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
91
- And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
92
  Formatting Rules:
93
-
94
-
95
- Title in Square Brackets:
96
-
97
-
98
- Each section starts with a one-word title inside [ ] (max two words if necessary).
99
- This title will be used as a search term for Pexels footage.
100
-
101
-
102
-
103
- Casual & Funny Narration:
104
-
105
-
106
- Each section has 5-10 words of narration.
107
- Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
108
-
109
-
110
-
111
- No Special Formatting:
112
-
113
-
114
- No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
115
-
116
-
117
-
118
- Generalized Search Terms:
119
-
120
-
121
- If a term is too specific, make it more general for Pexels search.
122
-
123
-
124
-
125
- Scene-Specific Writing:
126
-
127
-
128
- Each section describes only what should be shown in the video.
129
-
130
-
131
-
132
- Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
133
-
134
-
135
- No extra text, just the script.
136
-
137
-
138
-
139
- Example Output:
140
- [North Korea]
141
-
142
- Top 5 unknown facts about North Korea.
143
-
144
- [Invisibility]
145
-
146
- North Korea’s internet speed is so fast… it doesn’t exist.
147
-
148
- [Leadership]
149
-
150
- Kim Jong-un once won an election with 100% votes… against himself.
151
-
152
- [Magic]
153
-
154
- North Korea discovered time travel. That’s why their news is always from the past.
155
-
156
- [Warning]
157
-
158
- Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
159
-
160
- [Freedom]
161
-
162
- North Korean citizens can do anything… as long as it's government-approved.
163
- Now here is the Topic/scrip: {user_input}
164
  """
165
 
166
  data = {
167
  'model': OPENROUTER_MODEL,
168
  'messages': [{'role': 'user', 'content': prompt}],
169
- 'temperature': 0.4,
170
- 'max_tokens': 5000
171
  }
172
 
173
  try:
@@ -175,806 +120,961 @@ Now here is the Topic/scrip: {user_input}
175
  'https://openrouter.ai/api/v1/chat/completions',
176
  headers=headers,
177
  json=data,
178
- timeout=30
179
  )
180
 
181
- if response.status_code == 200:
182
- response_data = response.json()
183
- if 'choices' in response_data and len(response_data['choices']) > 0:
184
- return response_data['choices'][0]['message']['content']
185
- else:
186
- print("Unexpected response format:", response_data)
187
- return None
 
 
 
188
  else:
189
- print(f"API Error {response.status_code}: {response.text}")
190
  return None
191
 
 
 
 
 
 
 
 
 
 
192
  except Exception as e:
193
- print(f"Request failed: {str(e)}")
194
  return None
195
 
196
  def parse_script(script_text):
197
  """
198
- Parse the generated script into a list of elements.
199
- For each section, create two elements:
200
- - A 'media' element using the section title as the visual prompt.
201
- - A 'tts' element with the narration text, voice info, and computed duration.
202
  """
203
- sections = {}
204
  current_title = None
205
- current_text = ""
206
 
207
- try:
208
- for line in script_text.splitlines():
209
- line = line.strip()
210
- if line.startswith("[") and "]" in line:
211
- bracket_start = line.find("[")
212
- bracket_end = line.find("]", bracket_start)
213
- if bracket_start != -1 and bracket_end != -1:
214
- if current_title is not None:
215
- sections[current_title] = current_text.strip()
216
- current_title = line[bracket_start+1:bracket_end]
217
- current_text = line[bracket_end+1:].strip()
218
- elif current_title:
219
- current_text += line + " "
220
-
221
- if current_title:
222
- sections[current_title] = current_text.strip()
223
-
224
- elements = []
225
- for title, narration in sections.items():
226
- if not title or not narration:
227
- continue
228
-
229
- media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
230
- words = narration.split()
231
- duration = max(3, len(words) * 0.5)
232
- tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
233
- elements.append(media_element)
234
- elements.append(tts_element)
235
-
236
- return elements
237
- except Exception as e:
238
- print(f"Error parsing script: {e}")
239
  return []
240
 
241
- def search_pexels_videos(query, pexels_api_key):
242
- """Search for a video on Pexels by query and return a random HD video."""
243
- headers = {'Authorization': pexels_api_key}
244
- base_url = "https://api.pexels.com/videos/search"
245
- num_pages = 3
246
- videos_per_page = 15
247
 
248
- max_retries = 3
249
- retry_delay = 1
 
 
250
 
251
- search_query = query
252
- all_videos = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- for page in range(1, num_pages + 1):
255
- for attempt in range(max_retries):
256
- try:
257
- params = {"query": search_query, "per_page": videos_per_page, "page": page}
258
- response = requests.get(base_url, headers=headers, params=params, timeout=10)
259
-
260
- if response.status_code == 200:
261
- data = response.json()
262
- videos = data.get("videos", [])
263
-
264
- if not videos:
265
- print(f"No videos found on page {page}.")
266
- break
267
-
268
- for video in videos:
269
- video_files = video.get("video_files", [])
270
- for file in video_files:
271
- if file.get("quality") == "hd":
272
- all_videos.append(file.get("link"))
273
- break
274
-
275
- break
276
-
277
- elif response.status_code == 429:
278
- print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
279
- time.sleep(retry_delay)
280
- retry_delay *= 2
281
- else:
282
- print(f"Error fetching videos: {response.status_code} {response.text}")
283
- if attempt < max_retries - 1:
284
- print(f"Retrying in {retry_delay} seconds...")
285
- time.sleep(retry_delay)
286
- retry_delay *= 2
287
- else:
288
- break
289
-
290
- except requests.exceptions.RequestException as e:
291
- print(f"Request exception: {e}")
292
- if attempt < max_retries - 1:
293
- print(f"Retrying in {retry_delay} seconds...")
294
- time.sleep(retry_delay)
295
- retry_delay *= 2
296
- else:
297
- break
298
 
299
- if all_videos:
300
- random_video = random.choice(all_videos)
301
- print(f"Selected random video from {len(all_videos)} HD videos")
302
- return random_video
303
- else:
304
- print("No suitable videos found after searching all pages.")
 
305
  return None
306
 
307
- def search_pexels_images(query, pexels_api_key):
308
- """Search for an image on Pexels by query."""
309
- headers = {'Authorization': pexels_api_key}
310
- url = "https://api.pexels.com/v1/search"
311
- params = {"query": query, "per_page": 5, "orientation": "landscape"}
312
 
313
  max_retries = 3
314
  retry_delay = 1
315
 
316
  for attempt in range(max_retries):
317
  try:
318
- response = requests.get(url, headers=headers, params=params, timeout=10)
319
-
320
- if response.status_code == 200:
321
- data = response.json()
322
- photos = data.get("photos", [])
323
- if photos:
324
- photo = random.choice(photos[:min(5, len(photos))])
325
- img_url = photo.get("src", {}).get("original")
326
- return img_url
327
- else:
328
- print(f"No images found for query: {query}")
329
- return None
330
 
331
- elif response.status_code == 429:
332
- print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
333
- time.sleep(retry_delay)
334
- retry_delay *= 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  else:
336
- print(f"Error fetching images: {response.status_code} {response.text}")
337
- if attempt < max_retries - 1:
338
- print(f"Retrying in {retry_delay} seconds...")
339
- time.sleep(retry_delay)
340
- retry_delay *= 2
341
 
 
 
 
 
342
  except requests.exceptions.RequestException as e:
343
- print(f"Request exception: {e}")
344
- if attempt < max_retries - 1:
345
- print(f"Retrying in {retry_delay} seconds...")
346
- time.sleep(retry_delay)
347
- retry_delay *= 2
 
 
 
 
 
 
348
 
349
- print(f"No Pexels images found for query: {query} after all attempts")
350
  return None
351
 
352
  def search_google_images(query):
353
- """Search for images on Google Images (for news-related queries)"""
 
354
  try:
355
- search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
 
356
  headers = {"User-Agent": USER_AGENT}
357
  response = requests.get(search_url, headers=headers, timeout=10)
 
358
  soup = BeautifulSoup(response.text, "html.parser")
359
 
360
  img_tags = soup.find_all("img")
361
  image_urls = []
 
 
362
  for img in img_tags:
363
- src = img.get("src", "")
364
- if src.startswith("http") and "gstatic" not in src:
365
- image_urls.append(src)
 
 
366
 
367
  if image_urls:
368
- return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
 
 
369
  else:
370
- print(f"No Google Images found for query: {query}")
371
  return None
 
 
 
372
  except Exception as e:
373
- print(f"Error in Google Images search: {e}")
374
  return None
375
 
376
- def download_image(image_url, filename):
377
- """Download an image from a URL to a local file with enhanced error handling."""
378
  try:
379
  headers = {"User-Agent": USER_AGENT}
380
- print(f"Downloading image from: {image_url} to {filename}")
381
- response = requests.get(image_url, headers=headers, stream=True, timeout=15)
382
  response.raise_for_status()
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  with open(filename, 'wb') as f:
385
  for chunk in response.iter_content(chunk_size=8192):
386
  f.write(chunk)
387
 
388
- print(f"Image downloaded successfully to: {filename}")
389
 
390
- try:
391
- img = Image.open(filename)
392
- img.verify()
393
- img = Image.open(filename)
394
- if img.mode != 'RGB':
395
- img = img.convert('RGB')
396
- img.save(filename)
397
- print(f"Image validated and processed: {filename}")
398
- return filename
399
- except Exception as e_validate:
400
- print(f"Downloaded file is not a valid image: {e_validate}")
401
- if os.path.exists(filename):
402
- os.remove(filename)
403
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
  except requests.exceptions.RequestException as e_download:
406
- print(f"Image download error: {e_download}")
407
- if os.path.exists(filename):
408
- os.remove(filename)
409
  return None
410
  except Exception as e_general:
411
- print(f"General error during image processing: {e_general}")
412
- if os.path.exists(filename):
413
- os.remove(filename)
414
- return None
415
-
416
- def download_video(video_url, filename):
417
- """Download a video from a URL to a local file."""
418
- try:
419
- response = requests.get(video_url, stream=True, timeout=30)
420
- response.raise_for_status()
421
- with open(filename, 'wb') as f:
422
- for chunk in response.iter_content(chunk_size=8192):
423
- f.write(chunk)
424
- print(f"Video downloaded successfully to: {filename}")
425
- return filename
426
- except Exception as e:
427
- print(f"Video download error: {e}")
428
- if os.path.exists(filename):
429
- os.remove(filename)
430
  return None
431
 
432
- def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
433
  """
434
- Generate a visual asset by first searching for a video or using a specific search strategy.
435
- For news-related queries, use Google Images.
436
- Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
437
  """
438
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
 
439
 
440
- if "news" in prompt.lower():
441
- print(f"News-related query detected: {prompt}. Using Google Images...")
442
- image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
443
- image_url = search_google_images(prompt)
444
- if image_url:
445
- downloaded_image = download_image(image_url, image_file)
446
- if downloaded_image:
447
- print(f"News image saved to {downloaded_image}")
448
- return {"path": downloaded_image, "asset_type": "image"}
449
- else:
450
- print(f"Google Images search failed for prompt: {prompt}")
451
-
452
  if random.random() < video_clip_probability:
453
- video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
454
- video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
455
  if video_url:
456
- downloaded_video = download_video(video_url, video_file)
457
- if downloaded_video:
458
- print(f"Video asset saved to {downloaded_video}")
459
- return {"path": downloaded_video, "asset_type": "video"}
460
  else:
461
- print(f"Pexels video search failed for prompt: {prompt}")
462
 
463
- image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
464
- image_url = search_pexels_images(prompt, PEXELS_API_KEY)
 
465
  if image_url:
466
- downloaded_image = download_image(image_url, image_file)
467
- if downloaded_image:
468
- print(f"Image asset saved to {downloaded_image}")
469
- return {"path": downloaded_image, "asset_type": "image"}
470
- else:
471
- print(f"Pexels image download failed for prompt: {prompt}")
472
-
473
- fallback_terms = ["nature", "people", "landscape", "technology", "business"]
474
- for term in fallback_terms:
475
- print(f"Trying fallback image search with term: {term}")
476
- fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
477
- fallback_url = search_pexels_images(term, PEXELS_API_KEY)
478
- if fallback_url:
479
- downloaded_fallback = download_image(fallback_url, fallback_file)
480
- if downloaded_fallback:
481
- print(f"Fallback image saved to {downloaded_fallback}")
482
- return {"path": downloaded_fallback, "asset_type": "image"}
483
- else:
484
- print(f"Fallback image download failed for term: {term}")
485
- else:
486
- print(f"Fallback image search failed for term: {term}")
487
 
488
- print(f"Failed to generate visual asset for prompt: {prompt}")
489
- return None
 
 
 
 
 
490
 
491
- def generate_silent_audio(duration, sample_rate=24000):
492
- """Generate a silent WAV audio file lasting 'duration' seconds."""
493
- num_samples = int(duration * sample_rate)
494
- silence = np.zeros(num_samples, dtype=np.float32)
495
- silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
496
- sf.write(silent_path, silence, sample_rate)
497
- print(f"Silent audio generated: {silent_path}")
498
- return silent_path
499
 
500
- def generate_tts(text, voice):
501
  """
502
- Generate TTS audio using Kokoro, falling back to gTTS or silent audio if needed.
503
  """
504
- safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
505
- file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
 
506
 
507
- if os.path.exists(file_path):
508
- print(f"Using cached TTS for text '{text[:10]}...'")
509
- return file_path
510
-
511
- try:
512
- kokoro_voice = selected_voice if voice == 'en' else voice
513
- generator = pipeline(text, voice=kokoro_voice, speed=voice_speed, split_pattern=r'\n+')
514
- audio_segments = []
515
- for i, (gs, ps, audio) in enumerate(generator):
516
- audio_segments.append(audio)
517
- full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
518
- sf.write(file_path, full_audio, 24000)
519
- print(f"TTS audio saved to {file_path} (Kokoro)")
520
- return file_path
521
- except Exception as e:
522
- print(f"Error with Kokoro TTS: {e}")
523
  try:
524
- print("Falling back to gTTS...")
525
- tts = gTTS(text=text, lang='en')
526
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
527
- tts.save(mp3_path)
528
- audio = AudioSegment.from_mp3(mp3_path)
529
- audio.export(file_path, format="wav")
530
- os.remove(mp3_path)
531
- print(f"Fallback TTS saved to {file_path} (gTTS)")
532
- return file_path
533
- except Exception as fallback_error:
534
- print(f"Both TTS methods failed: {fallback_error}")
535
- return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
536
-
537
- def apply_kenburns_effect(clip, target_resolution, effect_type=None):
538
- """Apply a smooth Ken Burns effect with a single movement pattern."""
539
- target_w, target_h = target_resolution
540
- clip_aspect = clip.w / clip.h
541
- target_aspect = target_w / target_h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
 
543
- if clip_aspect > target_aspect:
544
- new_height = target_h
545
- new_width = int(new_height * clip_aspect)
546
- else:
547
- new_width = target_w
548
- new_height = int(new_width / clip_aspect)
549
-
550
- clip = clip.resize(newsize=(new_width, new_height))
551
- base_scale = 1.15
552
- new_width = int(new_width * base_scale)
553
- new_height = int(new_height * base_scale)
554
- clip = clip.resize(newsize=(new_width, new_height))
555
-
556
- max_offset_x = new_width - target_w
557
- max_offset_y = new_height - target_h
558
-
559
- available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
560
- if effect_type is None or effect_type == "random":
561
- effect_type = random.choice(available_effects)
562
-
563
- if effect_type == "zoom-in":
564
- start_zoom = 0.9
565
- end_zoom = 1.1
566
- start_center = (new_width / 2, new_height / 2)
567
- end_center = start_center
568
- elif effect_type == "zoom-out":
569
- start_zoom = 1.1
570
- end_zoom = 0.9
571
- start_center = (new_width / 2, new_height / 2)
572
- end_center = start_center
573
- elif effect_type == "pan-left":
574
- start_zoom = 1.0
575
- end_zoom = 1.0
576
- start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
577
- end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
578
- elif effect_type == "pan-right":
579
- start_zoom = 1.0
580
- end_zoom = 1.0
581
- start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
582
- end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
583
- elif effect_type == "up-left":
584
- start_zoom = 1.0
585
- end_zoom = 1.0
586
- start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
587
- end_center = (target_w / 2, target_h / 2)
588
- else:
589
- raise ValueError(f"Unsupported effect_type: {effect_type}")
590
-
591
- def transform_frame(get_frame, t):
592
- frame = get_frame(t)
593
- ratio = t / clip.duration if clip.duration > 0 else 0
594
- ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
595
- current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
596
- crop_w = int(target_w / current_zoom)
597
- crop_h = int(target_h / current_zoom)
598
- current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
599
- current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
600
- min_center_x = crop_w / 2
601
- max_center_x = new_width - crop_w / 2
602
- min_center_y = crop_h / 2
603
- max_center_y = new_height - crop_h / 2
604
- current_center_x = max(min_center_x, min(current_center_x, max_center_x))
605
- current_center_y = max(min_center_y, min(current_center_y, max_center_y))
606
- cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
607
- resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
608
- return resized_frame
609
-
610
- return clip.fl(transform_frame)
611
 
612
- def resize_to_fill(clip, target_resolution):
613
- """Resize and crop a clip to fill the target resolution while maintaining aspect ratio."""
614
  target_w, target_h = target_resolution
615
- clip_aspect = clip.w / clip.h
616
- target_aspect = target_w / target_h
617
-
618
- if clip_aspect > target_aspect:
619
- clip = clip.resize(height=target_h)
620
- crop_amount = (clip.w - target_w) / 2
621
- clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
 
 
 
 
 
622
  else:
623
- clip = clip.resize(width=target_w)
624
- crop_amount = (clip.h - target_h) / 2
625
- clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
626
-
627
- return clip
628
-
629
- def find_mp3_files():
630
- """Search for any MP3 files in the current directory and subdirectories."""
631
- mp3_files = []
632
- for root, dirs, files in os.walk('.'):
633
- for file in files:
634
- if file.endswith('.mp3'):
635
- mp3_path = os.path.join(root, file)
636
- mp3_files.append(mp3_path)
637
- print(f"Found MP3 file: {mp3_path}")
638
- return mp3_files[0] if mp3_files else None
639
-
640
- def add_background_music(final_video, bg_music_volume=0.10):
641
- """Add background music to the final video using any MP3 file found."""
642
  try:
643
- bg_music_path = "music.mp3"
644
- if bg_music_path and os.path.exists(bg_music_path):
645
- print(f"Adding background music from: {bg_music_path}")
646
- bg_music = AudioFileClip(bg_music_path)
647
- if bg_music.duration < final_video.duration:
648
- loops_needed = math.ceil(final_video.duration / bg_music.duration)
649
- bg_segments = [bg_music] * loops_needed
650
- bg_music = concatenate_audioclips(bg_segments)
651
- bg_music = bg_music.subclip(0, final_video.duration)
652
- bg_music = bg_music.volumex(bg_music_volume)
653
- video_audio = final_video.audio
654
- mixed_audio = CompositeAudioClip([video_audio, bg_music])
655
- final_video = final_video.set_audio(mixed_audio)
656
- print("Background music added successfully")
657
- else:
658
- print("No MP3 files found, skipping background music")
659
- return final_video
660
  except Exception as e:
661
- print(f"Error adding background music: {e}")
662
- print("Continuing without background music")
663
- return final_video
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664
 
 
 
 
 
665
 
 
 
 
 
 
 
 
 
666
 
 
 
 
667
 
 
 
668
 
 
 
 
 
 
669
 
670
- def fix_imagemagick_policy():
671
- """Fix ImageMagick security policies."""
672
  try:
673
- print("Attempting to fix ImageMagick security policies...")
674
- policy_paths = [
675
- "/etc/ImageMagick-6/policy.xml",
676
- "/etc/ImageMagick-7/policy.xml",
677
- "/etc/ImageMagick/policy.xml",
678
- "/usr/local/etc/ImageMagick-7/policy.xml"
679
- ]
680
- found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
681
- if not found_policy:
682
- print("No policy.xml found. Using alternative subtitle method.")
683
- return False
684
- print(f"Modifying policy file at {found_policy}")
685
- os.system(f"sudo cp {found_policy} {found_policy}.bak")
686
- os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
687
- os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
688
- os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
689
- print("ImageMagick policies updated successfully.")
690
- return True
691
- except Exception as e:
692
- print(f"Error fixing policies: {e}")
693
- return False
694
-
695
-
696
-
697
-
698
 
 
 
 
699
 
700
 
701
- def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
702
- """Create a video clip with synchronized subtitles and narration."""
703
  try:
704
- print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
705
- if not os.path.exists(media_path) or not os.path.exists(tts_path):
706
- print("Missing media or TTS file")
707
- return None
708
 
709
- audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
710
- audio_duration = audio_clip.duration
711
- target_duration = audio_duration + 0.2
 
 
 
 
712
 
713
- # Handle media (video or image)
714
  if asset_type == "video":
715
- clip = VideoFileClip(media_path)
716
- clip = resize_to_fill(clip, TARGET_RESOLUTION)
717
- if clip.duration < target_duration:
718
- clip = clip.loop(duration=target_duration)
719
- else:
720
- clip = clip.subclip(0, target_duration)
 
 
 
 
 
 
 
 
721
  elif asset_type == "image":
722
- img = Image.open(media_path)
723
- if img.mode != 'RGB':
724
- with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp:
725
- img.convert('RGB').save(temp.name)
726
- media_path = temp.name
727
- img.close()
728
- clip = ImageClip(media_path).set_duration(target_duration)
729
- clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
730
- clip = clip.fadein(0.3).fadeout(0.3)
 
731
  else:
 
732
  return None
733
 
734
- # Add subtitles if enabled
735
- if narration_text and CAPTION_COLOR != "transparent":
736
- try:
737
- words = narration_text.split()
738
- chunks = []
739
- current_chunk = []
740
- for word in words:
741
- current_chunk.append(word)
742
- if len(current_chunk) >= 5:
743
- chunks.append(' '.join(current_chunk))
744
- current_chunk = []
745
- if current_chunk:
746
- chunks.append(' '.join(current_chunk))
747
-
748
- chunk_duration = audio_duration / len(chunks)
749
- subtitle_clips = []
750
- subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
 
 
 
 
 
 
 
 
 
751
 
752
  for i, chunk_text in enumerate(chunks):
753
- start_time = i * chunk_duration
754
- end_time = (i + 1) * chunk_duration
 
 
755
  txt_clip = TextClip(
756
- chunk_text,
757
  fontsize=font_size,
758
- font='DejaVu-Sans', # Hugging Face friendly
759
- color=CAPTION_COLOR,
760
- stroke_width=2,
761
- stroke_color='black' # Outline for readability
762
- ).set_start(start_time).set_end(end_time)
763
- txt_clip = txt_clip.set_position(('center', subtitle_y_position))
764
- subtitle_clips.append(txt_clip)
765
-
766
- clip = CompositeVideoClip([clip] + subtitle_clips)
767
-
768
- except Exception as sub_error:
769
- print(f"Subtitle error: {sub_error}")
770
- txt_clip = TextClip(
771
- narration_text,
772
- fontsize=font_size,
773
- font='DejaVu-Sans',
774
- color=CAPTION_COLOR,
775
- stroke_width=2,
776
- stroke_color='black'
777
- ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
778
- clip = CompositeVideoClip([clip, txt_clip])
779
-
780
- # Set audio
781
- clip = clip.set_audio(audio_clip)
782
- print(f"Clip created: {clip.duration:.1f}s")
783
- return clip
784
-
785
- except Exception as e:
786
- print(f"Error in create_clip: {str(e)}")
787
- return None
788
-
789
 
 
 
790
 
 
 
 
 
 
791
 
 
 
792
 
 
 
793
 
 
 
 
 
 
794
 
 
795
 
 
 
 
 
 
 
796
 
 
 
797
 
 
 
 
 
 
 
 
798
 
 
 
799
 
 
 
 
800
 
 
 
 
 
 
 
 
 
 
 
 
 
801
 
802
 
 
 
803
 
804
 
 
 
 
 
 
 
 
 
805
 
806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807
 
808
 
809
- # ---------------- Main Video Generation Function ---------------- #
810
- def generate_video(user_input, resolution, caption_option):
811
- """Generate a video based on user input via Gradio."""
812
- global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
813
-
814
- # Set resolution
815
- if resolution == "Full":
816
- TARGET_RESOLUTION = (1920, 1080)
817
- elif resolution == "Short":
818
- TARGET_RESOLUTION = (1080, 1920)
819
- else:
820
- TARGET_RESOLUTION = (1920, 1080) # Default
821
-
822
- # Set caption color
823
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
824
 
825
- # Create a unique temporary folder
826
- TEMP_FOLDER = tempfile.mkdtemp()
827
 
828
- # Fix ImageMagick policy
829
- fix_success = fix_imagemagick_policy()
 
830
 
831
- if not fix_success:
832
- print("Will use alternative methods if needed")
833
 
834
- print("Generating script from API...")
835
- script = generate_script(user_input)
836
- if not script:
837
- print("Failed to generate script.")
838
- shutil.rmtree(TEMP_FOLDER)
839
- return None
840
- print("Generated Script:\n", script)
841
- elements = parse_script(script)
842
- if not elements:
843
- print("Failed to parse script into elements.")
844
- shutil.rmtree(TEMP_FOLDER)
845
- return None
846
- print(f"Parsed {len(elements)//2} script segments.")
 
 
 
 
 
 
847
 
848
- paired_elements = []
849
- for i in range(0, len(elements), 2):
850
- if i + 1 < len(elements):
851
- paired_elements.append((elements[i], elements[i + 1]))
852
 
853
- if not paired_elements:
854
- print("No valid script segments found.")
 
855
  shutil.rmtree(TEMP_FOLDER)
856
- return None
857
-
858
- clips = []
859
- for idx, (media_elem, tts_elem) in enumerate(paired_elements):
860
- print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
861
- media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
862
- if not media_asset:
863
- print(f"Skipping segment {idx+1} due to missing media asset.")
864
- continue
865
- tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
866
- if not tts_path:
867
- print(f"Skipping segment {idx+1} due to TTS generation failure.")
868
- continue
869
- clip = create_clip(
870
- media_path=media_asset['path'],
871
- asset_type=media_asset['asset_type'],
872
- tts_path=tts_path,
873
- duration=tts_elem['duration'],
874
- effects=media_elem.get('effects', 'fade-in'),
875
- narration_text=tts_elem['text'],
876
- segment_index=idx
877
- )
878
- if clip:
879
- clips.append(clip)
880
- else:
881
- print(f"Clip creation failed for segment {idx+1}.")
882
 
883
- if not clips:
884
- print("No clips were successfully created.")
885
- shutil.rmtree(TEMP_FOLDER)
886
- return None
887
 
888
- print("\nConcatenating clips...")
889
- final_video = concatenate_videoclips(clips, method="compose")
890
- final_video = add_background_music(final_video, bg_music_volume=bg_music_volume)
 
891
 
892
- print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
893
- final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
894
- print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
 
895
 
896
- # Clean up
897
- print("Cleaning up temporary files...")
898
- shutil.rmtree(TEMP_FOLDER)
899
- print("Temporary files removed.")
900
 
901
- return OUTPUT_VIDEO_FILENAME
902
 
903
- # ---------------- Gradio Interface ---------------- #
904
  VOICE_CHOICES = {
905
- 'Emma (Female)': 'af_heart',
906
- 'Bella (Female)': 'af_bella',
907
- 'Nicole (Female)': 'af_nicole',
908
- 'Aoede (Female)': 'af_aoede',
909
- 'Kore (Female)': 'af_kore',
910
- 'Sarah (Female)': 'af_sarah',
911
- 'Nova (Female)': 'af_nova',
912
- 'Sky (Female)': 'af_sky',
913
- 'Alloy (Female)': 'af_alloy',
914
- 'Jessica (Female)': 'af_jessica',
915
- 'River (Female)': 'af_river',
916
- 'Michael (Male)': 'am_michael',
917
- 'Fenrir (Male)': 'am_fenrir',
918
- 'Puck (Male)': 'am_puck',
919
- 'Echo (Male)': 'am_echo',
920
- 'Eric (Male)': 'am_eric',
921
- 'Liam (Male)': 'am_liam',
922
- 'Onyx (Male)': 'am_onyx',
923
- 'Santa (Male)': 'am_santa',
924
- 'Adam (Male)': 'am_adam',
925
- 'Emma 🇬🇧 (Female)': 'bf_emma',
926
- 'Isabella 🇬🇧 (Female)': 'bf_isabella',
927
- 'Alice 🇬🇧 (Female)': 'bf_alice',
928
- 'Lily 🇬🇧 (Female)': 'bf_lily',
929
- 'George 🇬🇧 (Male)': 'bm_george',
930
- 'Fable 🇬🇧 (Male)': 'bm_fable',
931
- 'Lewis 🇬🇧 (Male)': 'bm_lewis',
932
- 'Daniel 🇬🇧 (Male)': 'bm_daniel'
933
  }
934
 
935
- def generate_video_with_options(user_input, resolution, caption_option, music_file, voice, vclip_prob, bg_vol, video_fps, video_preset, v_speed, caption_size):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936
  global selected_voice, voice_speed, font_size, video_clip_probability, bg_music_volume, fps, preset
937
-
938
- # Update global variables with user selections
939
- selected_voice = VOICE_CHOICES[voice]
940
- voice_speed = v_speed
941
  font_size = caption_size
942
- video_clip_probability = vclip_prob / 100 # Convert from percentage to decimal
943
- bg_music_volume = bg_vol
944
  fps = video_fps
945
- preset = video_preset
946
-
947
- # Handle music upload
948
- if music_file is not None:
949
- target_path = "music.mp3"
950
- shutil.copy(music_file.name, target_path)
951
- print(f"Uploaded music saved as: {target_path}")
952
-
953
- # Generate the video
954
- return generate_video(user_input, resolution, caption_option)
955
-
956
- # Create the Gradio interface
957
- iface = gr.Interface(
958
- fn=generate_video_with_options,
959
- inputs=[
960
- gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
961
- gr.Radio(["Full", "Short"], label="Resolution", value="Full"),
962
- gr.Radio(["Yes", "No"], label="Captions", value="Yes"),
963
- gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"]),
964
- gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Choose Voice", value="Emma (Female)"),
965
- gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)"),
966
- gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
967
- gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
968
- gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
969
- value="veryfast", label="Export Preset"),
970
- gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
971
- gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
972
- ],
973
- outputs=gr.Video(label="Generated Video"),
974
- title="AI Documentary Video Generator",
975
- description="Create short documentary videos with AI. Upload music, choose voice, and customize settings."
976
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
977
 
978
  # Launch the interface
979
  if __name__ == "__main__":
980
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Import necessary libraries
2
+ from kokoro import KPipeline # Assuming kokoro is installed and working
3
 
4
  import soundfile as sf
5
  import torch
 
 
6
  import os
 
 
7
  import tempfile
8
  import random
 
9
  import math
10
+ import time
11
+ import re
12
+ import requests
13
+ import io
 
 
14
  import shutil
15
+ from urllib.parse import quote
 
 
 
 
 
 
16
  import numpy as np
17
  from bs4 import BeautifulSoup
18
  import base64
 
 
19
  from gtts import gTTS
20
+ import gradio as gr
21
+ from PIL import Image, ImageDraw, ImageFont
22
+ import cv2 # OpenCV for image processing in Ken Burns
23
 
24
+ # MoviePy imports
25
+ from moviepy.editor import (
26
+ VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip, TextClip,
27
+ concatenate_videoclips, CompositeAudioClip
28
+ )
29
+ import moviepy.video.fx.all as vfx
30
+ # No longer importing moviepy.config or calling change_settings for ImageMagick
31
+
32
+ # Pydub imports (for potential gTTS fallback format conversion)
33
+ from pydub import AudioSegment
34
+ from pydub.generators import Sine # Might not be needed if TTS works
35
 
36
  # ---------------- Global Configuration ---------------- #
37
+ # --- API Keys (Replace with your actual keys) ---
38
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
39
  OPENROUTER_API_KEY = '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'
40
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
41
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
42
+ # --- Web Request Settings ---
43
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
44
 
45
+ # --- Gradio Controlled Variables (with defaults) ---
46
+ selected_voice = 'af_heart' # Default voice (Kokoro American Female)
47
+ voice_speed = 0.9 # Default voice speed
48
+ font_size = 45 # Default caption font size
49
+ video_clip_probability = 0.25 # Default probability for using video clips (25%)
50
+ bg_music_volume = 0.08 # Default background music volume
51
+ fps = 30 # Default video frames per second
52
+ preset = "veryfast" # Default video export preset (faster, lower quality)
53
+ caption_style_bg_color = 'rgba(0, 0, 0, 0.6)' # Background for captions
54
+ caption_style_text_color = 'yellow' # Text color for captions
55
+ caption_font = 'Arial-Bold' # Font for captions (ensure it's available)
56
+
57
+ # --- Runtime Variables (set per execution) ---
58
+ TARGET_RESOLUTION = None # Will be set based on Gradio input (e.g., (1920, 1080))
59
+ TEMP_FOLDER = None # Will be created temporarily for each run
60
+ USE_CAPTIONS = True # Will be set based on Gradio input
61
+
62
+ # ---------------- Kokoro TTS Initialization ---------------- #
63
+ try:
64
+ # Initialize Kokoro TTS pipeline (using American English default)
65
+ pipeline = KPipeline(lang_code='a')
66
+ print("Kokoro TTS Pipeline initialized.")
67
+ except Exception as e:
68
+ print(f"Warning: Failed to initialize Kokoro TTS Pipeline: {e}")
69
+ print("TTS generation will rely on gTTS.")
70
+ pipeline = None
71
 
72
  # ---------------- Helper Functions ---------------- #
 
 
 
 
 
 
 
 
 
 
73
 
74
  def generate_script(user_input):
75
+ """Generate documentary script using OpenRouter API."""
76
  headers = {
77
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
78
+ 'Content-Type': 'application/json',
79
+ # Optional but recommended:
80
+ 'HTTP-Referer': 'http://localhost:7860', # Or your app's URL
81
+ 'X-Title': 'AI Documentary Maker Gradio'
82
  }
83
 
84
+ prompt = f"""Create a short, humorous, slightly negative, and conversational documentary-style script based on the following topic or instructions: '{user_input}'.
85
 
 
 
 
 
86
  Formatting Rules:
87
+ 1. Start each distinct visual scene/idea with a title in square brackets `[Like This]`. This title will be used for searching visuals. Keep titles concise (1-3 words).
88
+ 2. After the bracketed title, write 1-2 short sentences (5-15 words total) of narration for that scene.
89
+ 3. Keep the narration casual, funny, maybe a bit sarcastic or critical, and human-like. Avoid sounding like a robotic AI.
90
+ 4. Do NOT use any other formatting like bold, italics, or bullet points.
91
+ 5. Ensure search terms in brackets are general enough for stock footage searches (e.g., use "[Technology]" instead of "[Quantum Supercomputer]").
92
+ 6. End the *entire* script with a funny, topic-related call to subscribe, also enclosed in brackets like `[Subscribe CTA]`.
93
+ 7. Focus on one core topic for the entire script.
94
+ 8. Output *only* the formatted script, nothing else.
95
+
96
+ Example:
97
+ [Cats]
98
+ So, you think cats are cute? Let's investigate.
99
+ [Sleeping]
100
+ They spend 90% of their lives asleep. Lazy, much?
101
+ [Judgment]
102
+ The other 10%? Judging your life choices. Harsh.
103
+ [Boxes]
104
+ Their obsession with boxes remains unexplained. Weirdos.
105
+ [Subscribe CTA]
106
+ Subscribe now, or a cat will knock your coffee over.
107
+
108
+ Now generate the script based on: {user_input}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  """
110
 
111
  data = {
112
  'model': OPENROUTER_MODEL,
113
  'messages': [{'role': 'user', 'content': prompt}],
114
+ 'temperature': 0.6, # Slightly more creative for humor
115
+ 'max_tokens': 600 # Adjust as needed
116
  }
117
 
118
  try:
 
120
  'https://openrouter.ai/api/v1/chat/completions',
121
  headers=headers,
122
  json=data,
123
+ timeout=45 # Increased timeout
124
  )
125
 
126
+ response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
127
+
128
+ response_data = response.json()
129
+ if 'choices' in response_data and len(response_data['choices']) > 0:
130
+ script_content = response_data['choices'][0]['message']['content']
131
+ # Basic cleaning: remove potential preamble/postamble if AI didn't follow instructions perfectly
132
+ script_content = re.sub(r"^.*?\n?\[", "[", script_content, flags=re.DOTALL) # Remove anything before the first bracket
133
+ script_content = script_content.strip()
134
+ print("Script generated successfully.")
135
+ return script_content
136
  else:
137
+ print(f"API Error: Unexpected response format: {response_data}")
138
  return None
139
 
140
+ except requests.exceptions.Timeout:
141
+ print("API Error: Request timed out.")
142
+ return None
143
+ except requests.exceptions.RequestException as e:
144
+ print(f"API Error: Request failed: {e}")
145
+ if hasattr(e, 'response') and e.response is not None:
146
+ print(f"API Response Status Code: {e.response.status_code}")
147
+ print(f"API Response Text: {e.response.text}")
148
+ return None
149
  except Exception as e:
150
+ print(f"Error during script generation: {str(e)}")
151
  return None
152
 
153
  def parse_script(script_text):
154
  """
155
+ Parse the generated script into a list of dictionaries,
156
+ each representing a segment with a visual prompt and narration.
 
 
157
  """
158
+ segments = []
159
  current_title = None
160
+ current_narration = ""
161
 
162
+ if not script_text:
163
+ print("Error: Script text is empty.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  return []
165
 
166
+ lines = script_text.strip().splitlines()
 
 
 
 
 
167
 
168
+ for line in lines:
169
+ line = line.strip()
170
+ if not line:
171
+ continue
172
 
173
+ title_match = re.match(r'^\[(.*?)\](.*)', line)
174
+
175
+ if title_match:
176
+ # If we were processing a previous title, save it
177
+ if current_title is not None and current_narration.strip():
178
+ segments.append({
179
+ "prompt": current_title,
180
+ "narration": current_narration.strip()
181
+ })
182
+
183
+ # Start the new segment
184
+ current_title = title_match.group(1).strip()
185
+ current_narration = title_match.group(2).strip() + " " # Add space for potential multi-line narration
186
+ elif current_title is not None:
187
+ # Append to the current narration if it's not a new title line
188
+ current_narration += line + " "
189
+
190
+ # Add the last segment
191
+ if current_title is not None and current_narration.strip():
192
+ segments.append({
193
+ "prompt": current_title,
194
+ "narration": current_narration.strip()
195
+ })
196
+
197
+ if not segments:
198
+ print("Error: Could not parse any segments from the script.")
199
+ # Attempt a simpler parse if the strict format failed
200
+ simple_segments = []
201
+ for i, line in enumerate(lines):
202
+ if line.strip():
203
+ simple_segments.append({"prompt": f"Scene {i+1}", "narration": line.strip()})
204
+ if simple_segments:
205
+ print("Warning: Using simplified script parsing.")
206
+ return simple_segments
207
+ else:
208
+ return []
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ print(f"Parsed {len(segments)} segments from script.")
212
+ return segments
213
+
214
+ def search_pexels(query, api_key, search_type="videos"):
215
+ """Search Pexels for videos or images."""
216
+ if not api_key:
217
+ print(f"Pexels API key not provided. Skipping Pexels {search_type} search.")
218
  return None
219
 
220
+ base_url = f"https://api.pexels.com/{search_type}/search"
221
+ headers = {'Authorization': api_key}
222
+ params = {"query": query, "per_page": 15, "orientation": "landscape"}
223
+ if search_type == "videos":
224
+ params["size"] = "medium" # Request medium or large, HD might not always be available
225
 
226
  max_retries = 3
227
  retry_delay = 1
228
 
229
  for attempt in range(max_retries):
230
  try:
231
+ response = requests.get(base_url, headers=headers, params=params, timeout=15)
232
+ response.raise_for_status() # Check for HTTP errors
 
 
 
 
 
 
 
 
 
 
233
 
234
+ data = response.json()
235
+ items = data.get(search_type, [])
236
+
237
+ if not items:
238
+ print(f"No Pexels {search_type} found for query: {query}")
239
+ return None
240
+
241
+ valid_items = []
242
+ if search_type == "videos":
243
+ for video in items:
244
+ # Prefer HD or Large, fallback to Medium
245
+ hd_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'hd' and f.get('width', 0) >= 1080), None)
246
+ large_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'large' and f.get('width', 0) >= 1080), None)
247
+ medium_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'medium'), None)
248
+ link = hd_link or large_link or medium_link
249
+ if link:
250
+ valid_items.append(link)
251
+ else: # images
252
+ for photo in items:
253
+ link = photo.get("src", {}).get("large2x") or photo.get("src", {}).get("original")
254
+ if link:
255
+ valid_items.append(link)
256
+
257
+ if valid_items:
258
+ print(f"Found {len(valid_items)} Pexels {search_type} for '{query}'. Selecting one.")
259
+ return random.choice(valid_items)
260
  else:
261
+ print(f"No suitable quality Pexels {search_type} found for query: {query}")
262
+ return None
 
 
 
263
 
264
+ except requests.exceptions.Timeout:
265
+ print(f"Pexels API timeout (attempt {attempt+1}/{max_retries})...")
266
+ time.sleep(retry_delay)
267
+ retry_delay *= 2
268
  except requests.exceptions.RequestException as e:
269
+ print(f"Pexels API error (attempt {attempt+1}/{max_retries}): {e}")
270
+ if response is not None and response.status_code == 429: # Rate limit
271
+ print("Rate limit hit, waiting longer...")
272
+ time.sleep(retry_delay * 2)
273
+ retry_delay *= 2
274
+ else:
275
+ time.sleep(retry_delay)
276
+ retry_delay *= 2
277
+ except Exception as e:
278
+ print(f"Unexpected error during Pexels search: {e}")
279
+ break # Don't retry on unexpected errors
280
 
281
+ print(f"Pexels {search_type} search failed for '{query}' after {max_retries} attempts.")
282
  return None
283
 
284
  def search_google_images(query):
285
+ """Search for images on Google Images (use sparingly and ethically)."""
286
+ print(f"Attempting Google Image search for: {query} (Use with caution)")
287
  try:
288
+ # Using a simple search URL, might be fragile
289
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch&safe=active"
290
  headers = {"User-Agent": USER_AGENT}
291
  response = requests.get(search_url, headers=headers, timeout=10)
292
+ response.raise_for_status()
293
  soup = BeautifulSoup(response.text, "html.parser")
294
 
295
  img_tags = soup.find_all("img")
296
  image_urls = []
297
+
298
+ # Look for potential image URLs, prioritizing data-src or src containing http
299
  for img in img_tags:
300
+ src = img.get("data-src") or img.get("src")
301
+ if src and src.startswith("http") and not "gstatic" in src and not src.startswith("data:image"):
302
+ # Basic check for likely image file extensions
303
+ if any(ext in src.lower() for ext in ['.jpg', '.jpeg', '.png', '.webp']):
304
+ image_urls.append(src)
305
 
306
  if image_urls:
307
+ # Return a random one from the first few results
308
+ print(f"Found {len(image_urls)} potential Google Images for '{query}'.")
309
+ return random.choice(image_urls[:min(len(image_urls), 10)])
310
  else:
311
+ print(f"No suitable Google Images found for query: {query}")
312
  return None
313
+ except requests.exceptions.RequestException as e:
314
+ print(f"Error during Google Images search request: {e}")
315
+ return None
316
  except Exception as e:
317
+ print(f"Error parsing Google Images search results: {e}")
318
  return None
319
 
320
+ def download_media(media_url, filename_prefix, target_folder):
321
+ """Download media (image or video) from a URL."""
322
  try:
323
  headers = {"User-Agent": USER_AGENT}
324
+ response = requests.get(media_url, headers=headers, stream=True, timeout=30)
 
325
  response.raise_for_status()
326
 
327
+ # Try to determine file extension from URL or Content-Type
328
+ content_type = response.headers.get('content-type')
329
+ file_extension = ".jpg" # Default
330
+ if 'video' in content_type:
331
+ file_extension = ".mp4"
332
+ elif 'image/jpeg' in content_type:
333
+ file_extension = ".jpg"
334
+ elif 'image/png' in content_type:
335
+ file_extension = ".png"
336
+ elif 'image/webp' in content_type:
337
+ file_extension = ".webp"
338
+ else: # Guess from URL
339
+ if '.mp4' in media_url: file_extension = ".mp4"
340
+ elif '.mov' in media_url: file_extension = ".mov" # May need conversion later
341
+ elif '.jpg' in media_url or '.jpeg' in media_url: file_extension = ".jpg"
342
+ elif '.png' in media_url: file_extension = ".png"
343
+ elif '.webp' in media_url: file_extension = ".webp"
344
+
345
+
346
+ filename = os.path.join(target_folder, f"{filename_prefix}{file_extension}")
347
+
348
  with open(filename, 'wb') as f:
349
  for chunk in response.iter_content(chunk_size=8192):
350
  f.write(chunk)
351
 
352
+ print(f"Media downloaded successfully to: {filename}")
353
 
354
+ # Basic validation for images
355
+ if file_extension in [".jpg", ".png", ".webp"]:
356
+ try:
357
+ img = Image.open(filename)
358
+ img.verify() # Check if it's a valid image file
359
+ img.close()
360
+ # Re-open and convert to RGB if necessary (MoviePy prefers RGB)
361
+ img = Image.open(filename)
362
+ if img.mode != 'RGB':
363
+ print(f"Converting image {filename} to RGB.")
364
+ rgb_img = img.convert('RGB')
365
+ # Overwrite if JPG, save as JPG if PNG/WEBP for consistency
366
+ jpg_filename = os.path.join(target_folder, f"{filename_prefix}.jpg")
367
+ rgb_img.save(jpg_filename, "JPEG")
368
+ rgb_img.close()
369
+ img.close()
370
+ if filename != jpg_filename: # Remove original if format changed
371
+ os.remove(filename)
372
+ return jpg_filename # Return path to the JPG
373
+ else:
374
+ img.close()
375
+
376
+ except Exception as e_validate:
377
+ print(f"Downloaded file {filename} is not a valid image or conversion failed: {e_validate}")
378
+ if os.path.exists(filename): os.remove(filename)
379
+ return None
380
+
381
+ return filename # Return original path for videos or already RGB images
382
 
383
  except requests.exceptions.RequestException as e_download:
384
+ print(f"Media download error from {media_url}: {e_download}")
 
 
385
  return None
386
  except Exception as e_general:
387
+ print(f"General error during media download/processing: {e_general}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  return None
389
 
390
+ def generate_media(prompt):
391
  """
392
+ Find and download a visual asset (video or image) based on the prompt.
393
+ Prioritizes Pexels Video, then Pexels Image, then Google Image as fallback.
 
394
  """
395
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
396
+ filename_prefix = f"{safe_prompt}_{int(time.time())}" # Add timestamp for uniqueness
397
 
398
+ # 1. Try Pexels Video (if probability met)
 
 
 
 
 
 
 
 
 
 
 
399
  if random.random() < video_clip_probability:
400
+ print(f"Searching Pexels Video for: {prompt}")
401
+ video_url = search_pexels(prompt, PEXELS_API_KEY, search_type="videos")
402
  if video_url:
403
+ downloaded_path = download_media(video_url, filename_prefix + "_vid", TEMP_FOLDER)
404
+ if downloaded_path:
405
+ return {"path": downloaded_path, "asset_type": "video"}
 
406
  else:
407
+ print("Pexels video search failed or no suitable video found.")
408
 
409
+ # 2. Try Pexels Image
410
+ print(f"Searching Pexels Image for: {prompt}")
411
+ image_url = search_pexels(prompt, PEXELS_API_KEY, search_type="photos")
412
  if image_url:
413
+ downloaded_path = download_media(image_url, filename_prefix + "_img", TEMP_FOLDER)
414
+ if downloaded_path:
415
+ return {"path": downloaded_path, "asset_type": "image"}
416
+ else:
417
+ print("Pexels image search failed.")
418
+
419
+ # 3. Fallback: Try Google Images (especially for specific/newsy terms)
420
+ print(f"Falling back to Google Image search for: {prompt}")
421
+ google_image_url = search_google_images(prompt)
422
+ if google_image_url:
423
+ downloaded_path = download_media(google_image_url, filename_prefix + "_gimg", TEMP_FOLDER)
424
+ if downloaded_path:
425
+ return {"path": downloaded_path, "asset_type": "image"}
426
+ else:
427
+ print("Google image search failed.")
 
 
 
 
 
 
428
 
429
+ # 4. Absolute Fallback: Generic Image
430
+ print("All searches failed. Using absolute fallback: 'technology'")
431
+ fallback_url = search_pexels("technology", PEXELS_API_KEY, search_type="photos")
432
+ if fallback_url:
433
+ downloaded_path = download_media(fallback_url, filename_prefix + "_fallback", TEMP_FOLDER)
434
+ if downloaded_path:
435
+ return {"path": downloaded_path, "asset_type": "image"}
436
 
437
+ print(f"FATAL: Could not retrieve any media for prompt: {prompt}")
438
+ return None # Indicate failure
 
 
 
 
 
 
439
 
440
+ def generate_tts(text, voice_id, speed):
441
  """
442
+ Generate TTS audio using Kokoro, falling back to gTTS.
443
  """
444
+ # Sanitize text for filename (simple approach)
445
+ safe_text_prefix = re.sub(r'[^\w\s-]', '', text[:20]).strip().replace(' ', '_')
446
+ output_filename = os.path.join(TEMP_FOLDER, f"tts_{safe_text_prefix}_{voice_id}.wav")
447
 
448
+ # --- Try Kokoro TTS First ---
449
+ if pipeline: # Check if Kokoro was initialized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  try:
451
+ print(f"Generating TTS with Kokoro (Voice: {voice_id}, Speed: {speed}) for: '{text[:30]}...'")
452
+ # Generate audio segment(s)
453
+ generator = pipeline(text, voice=voice_id, speed=speed) # Removed split_pattern for simplicity
454
+ audio_segments = []
455
+ # Kokoro's generator might yield differently depending on version/setup
456
+ # Assuming it yields tuples or directly the audio array
457
+ for item in generator:
458
+ if isinstance(item, tuple) and len(item) > 0 and isinstance(item[-1], np.ndarray):
459
+ audio_segments.append(item[-1]) # Assume audio is the last element
460
+ elif isinstance(item, np.ndarray):
461
+ audio_segments.append(item) # Assume it yields audio directly
462
+ # Add more checks if Kokoro's output structure is different
463
+
464
+ if not audio_segments:
465
+ raise ValueError("Kokoro TTS returned no audio segments.")
466
+
467
+ # Concatenate if multiple segments
468
+ full_audio = np.concatenate(audio_segments) if len(audio_segments) > 0 else audio_segments[0]
469
+
470
+ # Ensure audio is float32 for soundfile if needed (Kokoro usually outputs float32)
471
+ if full_audio.dtype != np.float32:
472
+ full_audio = full_audio.astype(np.float32)
473
+ # Normalize if necessary after type conversion
474
+ max_val = np.max(np.abs(full_audio))
475
+ if max_val > 1.0:
476
+ full_audio /= max_val
477
+
478
+ sf.write(output_filename, full_audio, 24000) # Kokoro default sample rate
479
+ print(f"Kokoro TTS audio saved to {output_filename}")
480
+ return output_filename
481
+ except Exception as e:
482
+ print(f"Error with Kokoro TTS: {e}. Falling back to gTTS.")
483
+ # Fall through to gTTS block
484
+
485
+ # --- Fallback to gTTS ---
486
+ try:
487
+ print(f"Generating TTS with gTTS for: '{text[:30]}...'")
488
+ tts = gTTS(text=text, lang='en', slow= (speed < 0.9) ) # gTTS speed is boolean (slow/normal)
489
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text_prefix}_gtts.mp3")
490
+ wav_path = output_filename # Reuse the intended output filename
491
+ tts.save(mp3_path)
492
+
493
+ # Convert MP3 to WAV using pydub
494
+ audio = AudioSegment.from_mp3(mp3_path)
495
+ audio.export(wav_path, format="wav")
496
+ os.remove(mp3_path) # Clean up temporary mp3
497
+
498
+ print(f"gTTS audio saved and converted to {wav_path}")
499
+ return wav_path
500
+ except ImportError:
501
+ print("Error: gTTS or pydub might not be installed. Cannot use gTTS fallback.")
502
+ return None
503
+ except Exception as fallback_error:
504
+ print(f"Error with gTTS fallback: {fallback_error}")
505
+ return None
506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
 
508
+ def apply_kenburns_effect(clip, target_resolution, duration):
509
+ """Apply a randomized Ken Burns effect (zoom/pan) to an image clip."""
510
  target_w, target_h = target_resolution
511
+ img_w, img_h = clip.size
512
+
513
+ # Resize image slightly larger than target to allow movement
514
+ # Maintain aspect ratio, fit to cover target dimensions + margin
515
+ scale_factor = 1.2 # Zoom margin
516
+ scaled_w, scaled_h = img_w * scale_factor, img_h * scale_factor
517
+
518
+ # Determine resize dimensions to cover target + margin
519
+ if scaled_w / scaled_h > target_w / target_h:
520
+ # Wider than target aspect ratio -> fit height
521
+ final_h = target_h * scale_factor
522
+ final_w = final_h * (img_w / img_h)
523
  else:
524
+ # Taller than target aspect ratio -> fit width
525
+ final_w = target_w * scale_factor
526
+ final_h = final_w * (img_h / img_w)
527
+
528
+ final_w, final_h = int(final_w), int(final_h)
529
+
530
+ # Create a resized version for the animation base
531
+ # Using Pillow for resizing before passing to MoviePy might be more reliable for high-res
 
 
 
 
 
 
 
 
 
 
 
532
  try:
533
+ pil_img = Image.fromarray(clip.get_frame(0)) # Get frame as numpy array, convert to PIL
534
+ resized_pil = pil_img.resize((final_w, final_h), Image.Resampling.LANCZOS)
535
+ resized_clip = ImageClip(np.array(resized_pil)).set_duration(duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  except Exception as e:
537
+ print(f"Warning: Error during high-quality resize for Ken Burns, using MoviePy default: {e}")
538
+ # Fallback to moviepy resize (might be lower quality for stills)
539
+ resized_clip = clip.resize(newsize=(final_w, final_h)).set_duration(duration)
540
+
541
+
542
+ # Define start and end zoom/position parameters randomly
543
+ max_move_x = final_w - target_w
544
+ max_move_y = final_h - target_h
545
+
546
+ # Choose effect type
547
+ effect = random.choice(['zoom_in', 'zoom_out', 'pan_lr', 'pan_rl', 'pan_td', 'pan_dt'])
548
+
549
+ if effect == 'zoom_in':
550
+ zoom_start, zoom_end = 1.0, scale_factor
551
+ x_start, x_end = max_move_x / 2, max_move_x / 2
552
+ y_start, y_end = max_move_y / 2, max_move_y / 2
553
+ elif effect == 'zoom_out':
554
+ zoom_start, zoom_end = scale_factor, 1.0
555
+ x_start, x_end = max_move_x / 2, max_move_x / 2
556
+ y_start, y_end = max_move_y / 2, max_move_y / 2
557
+ elif effect == 'pan_lr': # Pan Left to Right
558
+ zoom_start, zoom_end = scale_factor, scale_factor # Constant zoom while panning
559
+ x_start, x_end = 0, max_move_x
560
+ y_start, y_end = max_move_y / 2, max_move_y / 2 # Center vertically
561
+ elif effect == 'pan_rl': # Pan Right to Left
562
+ zoom_start, zoom_end = scale_factor, scale_factor
563
+ x_start, x_end = max_move_x, 0
564
+ y_start, y_end = max_move_y / 2, max_move_y / 2
565
+ elif effect == 'pan_td': # Pan Top to Down
566
+ zoom_start, zoom_end = scale_factor, scale_factor
567
+ x_start, x_end = max_move_x / 2, max_move_x / 2 # Center horizontally
568
+ y_start, y_end = 0, max_move_y
569
+ else: # Pan Down to Top
570
+ zoom_start, zoom_end = scale_factor, scale_factor
571
+ x_start, x_end = max_move_x / 2, max_move_x / 2
572
+ y_start, y_end = max_move_y, 0
573
+
574
+ # Define the function for fl_image
575
+ def make_frame(t):
576
+ # Linear interpolation for time
577
+ interp = t / duration if duration else 0
578
+
579
+ # Interpolate zoom and position
580
+ current_zoom = zoom_start + (zoom_end - zoom_start) * interp
581
+ current_x = x_start + (x_end - x_start) * interp
582
+ current_y = y_start + (y_end - y_start) * interp
583
+
584
+ # Calculate the crop box dimensions based on current zoom
585
+ # The size of the box to crop from the *resized_clip*
586
+ crop_w = target_w / (current_zoom / scale_factor) # Adjust target by zoom relative to initial scale
587
+ crop_h = target_h / (current_zoom / scale_factor)
588
+
589
+ # Ensure crop dimensions are valid
590
+ crop_w = max(1, int(crop_w))
591
+ crop_h = max(1, int(crop_h))
592
+
593
+ # Calculate the top-left corner of the crop box
594
+ # current_x/y is the center offset in the scaled image
595
+ # We need top-left corner (x1, y1)
596
+ x1 = current_x # Assume current_x/y are already top-left offsets
597
+ y1 = current_y
598
+
599
+ # Clamp coordinates to stay within bounds
600
+ x1 = max(0, min(x1, final_w - crop_w))
601
+ y1 = max(0, min(y1, final_h - crop_h))
602
+
603
+ # Get the frame from the *potentially pre-resized* clip
604
+ frame = resized_clip.get_frame(t)
605
+
606
+ # Crop the frame
607
+ # Note: MoviePy's crop uses x1,y1,x2,y2 or x_center, y_center, width, height
608
+ # Using numpy slicing might be easier here
609
+ cropped_frame = frame[int(y1):int(y1 + crop_h), int(x1):int(x1 + crop_w)]
610
+
611
+ # Resize the cropped frame to the final target resolution
612
+ # Using OpenCV for potentially better resizing quality
613
+ final_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
614
+
615
+ return final_frame
616
+
617
+ # Apply the transformation using fl
618
+ return resized_clip.fl(make_frame, apply_to=['mask']) # Apply to mask if exists
619
 
620
+ def resize_to_fill(clip, target_resolution):
621
+ """Resize and crop a video clip to fill the target resolution."""
622
+ target_w, target_h = target_resolution
623
+ target_aspect = target_w / target_h
624
 
625
+ # Use MoviePy's resize and crop fx
626
+ # Resize to fit the smaller dimension to cover the target area
627
+ if clip.w / clip.h > target_aspect:
628
+ # Clip is wider than target -> resize based on height
629
+ resized_clip = clip.resize(height=target_h)
630
+ else:
631
+ # Clip is taller or equal aspect -> resize based on width
632
+ resized_clip = clip.resize(width=target_w)
633
 
634
+ # Crop the excess from the center
635
+ crop_x = max(0, (resized_clip.w - target_w) / 2)
636
+ crop_y = max(0, (resized_clip.h - target_h) / 2)
637
 
638
+ cropped_clip = resized_clip.crop(x1=crop_x, y1=crop_y, width=target_w, height=target_h)
639
+ return cropped_clip
640
 
641
+ def add_background_music(video_clip, music_file_path, volume):
642
+ """Add background music, looping if necessary."""
643
+ if not music_file_path or not os.path.exists(music_file_path):
644
+ print("No background music file found or provided. Skipping.")
645
+ return video_clip
646
 
 
 
647
  try:
648
+ print(f"Adding background music from: {music_file_path}")
649
+ bg_music = AudioFileClip(music_file_path)
650
+
651
+ if bg_music.duration > video_clip.duration:
652
+ # Trim music to video duration
653
+ bg_music = bg_music.subclip(0, video_clip.duration)
654
+ elif bg_music.duration < video_clip.duration:
655
+ # Loop music to fit video duration
656
+ loops_needed = math.ceil(video_clip.duration / bg_music.duration)
657
+ bg_music = concatenate_audioclips([bg_music] * loops_needed)
658
+ bg_music = bg_music.subclip(0, video_clip.duration) # Trim precisely
659
+
660
+ # Apply volume adjustment
661
+ bg_music = bg_music.volumex(volume)
662
+
663
+ # Combine with existing audio (if any)
664
+ final_audio = CompositeAudioClip([video_clip.audio, bg_music])
665
+ video_clip = video_clip.set_audio(final_audio)
666
+ print("Background music added successfully.")
667
+ return video_clip
 
 
 
 
 
668
 
669
+ except Exception as e:
670
+ print(f"Error adding background music: {e}. Skipping.")
671
+ return video_clip # Return original clip
672
 
673
 
674
+ def create_segment_clip(media_info, tts_path, narration_text):
675
+ """Create a single video segment (clip) with visuals, audio, and subtitles."""
676
  try:
677
+ media_path = media_info['path']
678
+ asset_type = media_info['asset_type']
679
+ print(f"Creating clip segment: Type={asset_type}, Media={os.path.basename(media_path)}")
 
680
 
681
+ # --- 1. Load Audio ---
682
+ if not os.path.exists(tts_path):
683
+ print(f"Error: TTS file not found: {tts_path}")
684
+ return None
685
+ audio_clip = AudioFileClip(tts_path)
686
+ # Add tiny silence buffer at start/end for transitions
687
+ segment_duration = audio_clip.duration + 0.3 # Add buffer
688
 
689
+ # --- 2. Create Visual Clip ---
690
  if asset_type == "video":
691
+ if not os.path.exists(media_path):
692
+ print(f"Error: Video file not found: {media_path}")
693
+ return None
694
+ video_clip = VideoFileClip(media_path)
695
+ # Ensure video covers segment duration, looping if needed
696
+ if video_clip.duration < segment_duration:
697
+ # Loop the video
698
+ loops = math.ceil(segment_duration / video_clip.duration)
699
+ video_clip = concatenate_videoclips([video_clip] * loops)
700
+ # Trim to exact segment duration
701
+ video_clip = video_clip.subclip(0, segment_duration)
702
+ # Resize/crop to fit target resolution
703
+ visual_clip = resize_to_fill(video_clip, TARGET_RESOLUTION)
704
+
705
  elif asset_type == "image":
706
+ if not os.path.exists(media_path):
707
+ print(f"Error: Image file not found: {media_path}")
708
+ return None
709
+ # Create ImageClip and apply Ken Burns
710
+ img_clip = ImageClip(media_path).set_duration(segment_duration)
711
+ # Apply Ken Burns effect
712
+ visual_clip = apply_kenburns_effect(img_clip, TARGET_RESOLUTION, segment_duration)
713
+ # Ensure final size is correct after effects
714
+ visual_clip = visual_clip.resize(newsize=TARGET_RESOLUTION)
715
+
716
  else:
717
+ print(f"Error: Unknown asset type: {asset_type}")
718
  return None
719
 
720
+ # Add fade-in/out for smoother transitions between segments
721
+ visual_clip = visual_clip.fadein(0.15).fadeout(0.15)
722
+
723
+ # --- 3. Add Subtitles (Chunk-based, No ImageMagick) ---
724
+ subtitle_clips = []
725
+ if USE_CAPTIONS and narration_text:
726
+ words = narration_text.split()
727
+ # Simple chunking: group words, max ~5 per chunk or based on duration
728
+ max_words_per_chunk = 5
729
+ chunks = []
730
+ current_chunk = []
731
+ for word in words:
732
+ current_chunk.append(word)
733
+ if len(current_chunk) >= max_words_per_chunk:
734
+ chunks.append(" ".join(current_chunk))
735
+ current_chunk = []
736
+ if current_chunk: # Add any remaining words
737
+ chunks.append(" ".join(current_chunk))
738
+
739
+ if not chunks: # Handle empty narration case
740
+ print("Warning: Narration text is empty, skipping subtitles for this segment.")
741
+
742
+ else:
743
+ num_chunks = len(chunks)
744
+ chunk_duration = audio_clip.duration / num_chunks # Duration per text chunk
745
+ start_time = 0.1 # Small delay before first subtitle
746
 
747
  for i, chunk_text in enumerate(chunks):
748
+ end_time = start_time + chunk_duration
749
+ # Ensure end_time doesn't exceed the visual clip duration slightly
750
+ end_time = min(end_time, segment_duration - 0.1)
751
+
752
  txt_clip = TextClip(
753
+ txt=chunk_text,
754
  fontsize=font_size,
755
+ font=caption_font, # Ensure this font is available or use default
756
+ color=caption_style_text_color,
757
+ bg_color=caption_style_bg_color, # Semi-transparent background
758
+ method='label', # Use 'label' - simpler, less likely ImageMagick dependency
759
+ align='center',
760
+ size=(TARGET_RESOLUTION[0] * 0.8, None) # Width constraint
761
+ )
762
+ # Position: center horizontally, lower part of the screen
763
+ txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] * 0.80))
764
+ txt_clip = txt_clip.set_start(start_time).set_duration(end_time - start_time)
765
+ # txt_clip = txt_clip.fadein(0.1).fadeout(0.1) # Optional fade for text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766
 
767
+ subtitle_clips.append(txt_clip)
768
+ start_time = end_time # Next chunk starts where the last one ended
769
 
770
+ # --- 4. Combine Visuals, Audio, and Subtitles ---
771
+ if subtitle_clips:
772
+ final_clip = CompositeVideoClip([visual_clip] + subtitle_clips)
773
+ else:
774
+ final_clip = visual_clip # No subtitles
775
 
776
+ # Set the audio for the final segment clip
777
+ final_clip = final_clip.set_audio(audio_clip.set_start(0.15)) # Start audio slightly after visual fade-in
778
 
779
+ print(f"Clip segment created successfully. Duration: {final_clip.duration:.2f}s")
780
+ return final_clip
781
 
782
+ except Exception as e:
783
+ print(f"Error creating clip segment: {e}")
784
+ import traceback
785
+ traceback.print_exc() # Print detailed traceback for debugging
786
+ return None
787
 
788
+ # ---------------- Main Video Generation Function ---------------- #
789
 
790
+ def generate_full_video(user_input, resolution_choice, caption_choice, music_file_info):
791
+ """
792
+ Main function orchestrating the video generation process.
793
+ Uses global variables set by Gradio for configuration.
794
+ """
795
+ global TARGET_RESOLUTION, TEMP_FOLDER, USE_CAPTIONS
796
 
797
+ print("\n--- Starting Video Generation ---")
798
+ start_time = time.time()
799
 
800
+ # 1. Setup Environment
801
+ if resolution_choice == "Short (9:16)":
802
+ TARGET_RESOLUTION = (1080, 1920)
803
+ print("Resolution set to: Short (1080x1920)")
804
+ else: # Default to Full HD
805
+ TARGET_RESOLUTION = (1920, 1080)
806
+ print("Resolution set to: Full HD (1920x1080)")
807
 
808
+ USE_CAPTIONS = (caption_choice == "Yes")
809
+ print(f"Captions Enabled: {USE_CAPTIONS}")
810
 
811
+ # Create unique temporary folder for this run
812
+ TEMP_FOLDER = tempfile.mkdtemp(prefix="aivideo_")
813
+ print(f"Temporary folder created: {TEMP_FOLDER}")
814
 
815
+ # Handle uploaded music file
816
+ music_file_path = None
817
+ if music_file_info is not None:
818
+ try:
819
+ # Copy uploaded file to a predictable location (or use directly if safe)
820
+ # Using a fixed name in the temp folder is often easier
821
+ music_file_path = os.path.join(TEMP_FOLDER, "background_music.mp3")
822
+ shutil.copy(music_file_info.name, music_file_path)
823
+ print(f"Background music copied to: {music_file_path}")
824
+ except Exception as e:
825
+ print(f"Error handling uploaded music file: {e}")
826
+ music_file_path = None # Ensure it's None if copy failed
827
 
828
 
829
+ # --- No ImageMagick Fix Needed ---
830
+ # print("Skipping ImageMagick policy check as alternative subtitles are used.")
831
 
832
 
833
+ # 2. Generate Script
834
+ print("\nStep 1: Generating script...")
835
+ script_text = generate_script(user_input)
836
+ if not script_text:
837
+ print("ERROR: Failed to generate script. Aborting.")
838
+ shutil.rmtree(TEMP_FOLDER)
839
+ return None, "Error: Script generation failed. Check API keys and OpenRouter status." # Return error message for Gradio
840
+ print("Script Generated:\n", script_text)
841
 
842
 
843
+ # 3. Parse Script
844
+ print("\nStep 2: Parsing script...")
845
+ segments = parse_script(script_text)
846
+ if not segments:
847
+ print("ERROR: Failed to parse script into segments. Aborting.")
848
+ shutil.rmtree(TEMP_FOLDER)
849
+ return None, "Error: Script parsing failed. Check script format."
850
+ print(f"Successfully parsed {len(segments)} segments.")
851
+
852
+
853
+ # 4. Generate Media and TTS for each segment
854
+ print("\nStep 3: Generating media and TTS for each segment...")
855
+ segment_clips = []
856
+ total_segments = len(segments)
857
+ for i, segment in enumerate(segments):
858
+ print(f"\n--- Processing Segment {i+1}/{total_segments} ---")
859
+ print(f" Prompt: {segment['prompt']}")
860
+ print(f" Narration: {segment['narration']}")
861
+
862
+ # Generate Media
863
+ media_info = generate_media(segment['prompt'])
864
+ if not media_info:
865
+ print(f"Warning: Failed to get media for segment {i+1}. Skipping segment.")
866
+ continue # Skip this segment if media fails
867
+
868
+ # Generate TTS
869
+ tts_path = generate_tts(segment['narration'], selected_voice, voice_speed)
870
+ if not tts_path:
871
+ print(f"Warning: Failed to generate TTS for segment {i+1}. Skipping segment.")
872
+ # Clean up downloaded media if TTS failed
873
+ if media_info and os.path.exists(media_info['path']):
874
+ try: os.remove(media_info['path'])
875
+ except OSError: pass
876
+ continue # Skip this segment if TTS fails
877
+
878
+ # Create the actual MoviePy clip for this segment
879
+ clip = create_segment_clip(media_info, tts_path, segment['narration'])
880
+ if clip:
881
+ segment_clips.append(clip)
882
+ else:
883
+ print(f"Warning: Failed to create video clip for segment {i+1}. Skipping.")
884
+ # Clean up files for this failed segment
885
+ if media_info and os.path.exists(media_info['path']):
886
+ try: os.remove(media_info['path'])
887
+ except OSError: pass
888
+ if tts_path and os.path.exists(tts_path):
889
+ try: os.remove(tts_path)
890
+ except OSError: pass
891
+
892
+
893
+ if not segment_clips:
894
+ print("ERROR: No video clips were successfully created. Aborting.")
895
+ shutil.rmtree(TEMP_FOLDER)
896
+ return None, "Error: Failed to create any video segments. Check logs."
897
 
898
 
899
+ # 5. Concatenate Clips
900
+ print("\nStep 4: Concatenating video segments...")
901
+ try:
902
+ final_video = concatenate_videoclips(segment_clips, method="compose")
903
+ print("Segments concatenated successfully.")
904
+ except Exception as e:
905
+ print(f"ERROR: Failed to concatenate video clips: {e}")
906
+ shutil.rmtree(TEMP_FOLDER)
907
+ return None, f"Error: Concatenation failed: {e}"
 
 
 
 
 
 
908
 
 
 
909
 
910
+ # 6. Add Background Music
911
+ print("\nStep 5: Adding background music...")
912
+ final_video = add_background_music(final_video, music_file_path, bg_music_volume)
913
 
 
 
914
 
915
+ # 7. Export Final Video
916
+ print(f"\nStep 6: Exporting final video to '{OUTPUT_VIDEO_FILENAME}'...")
917
+ try:
918
+ final_video.write_videofile(
919
+ OUTPUT_VIDEO_FILENAME,
920
+ codec='libx264', # Good balance of quality/compatibility
921
+ audio_codec='aac', # Standard audio codec
922
+ fps=fps,
923
+ preset=preset, # Controls encoding speed vs compression
924
+ threads=4, # Use multiple threads for faster encoding
925
+ logger='bar' # Show progress bar
926
+ )
927
+ print(f"Final video saved successfully as {OUTPUT_VIDEO_FILENAME}")
928
+ export_success = True
929
+ except Exception as e:
930
+ print(f"ERROR: Failed to write final video file: {e}")
931
+ import traceback
932
+ traceback.print_exc()
933
+ export_success = False
934
 
 
 
 
 
935
 
936
+ # 8. Cleanup
937
+ print("\nStep 7: Cleaning up temporary files...")
938
+ try:
939
  shutil.rmtree(TEMP_FOLDER)
940
+ print(f"Temporary folder {TEMP_FOLDER} removed.")
941
+ except Exception as e:
942
+ print(f"Warning: Failed to remove temporary folder {TEMP_FOLDER}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
943
 
 
 
 
 
944
 
945
+ end_time = time.time()
946
+ total_time = end_time - start_time
947
+ print(f"\n--- Video Generation Finished ---")
948
+ print(f"Total time: {total_time:.2f} seconds")
949
 
950
+ if export_success:
951
+ return OUTPUT_VIDEO_FILENAME, f"Video generation complete! Time: {total_time:.2f}s"
952
+ else:
953
+ return None, f"Error: Video export failed. Check logs. Time: {total_time:.2f}s"
954
 
 
 
 
 
955
 
956
+ # ---------------- Gradio Interface Definition ---------------- #
957
 
958
+ # Kokoro Voice Choices (Example subset, add more as needed)
959
  VOICE_CHOICES = {
960
+ # American English
961
+ 'Emma (US Female)': 'af_heart',
962
+ 'Bella (US Female)': 'af_bella',
963
+ 'Nicole (US Female)': 'af_nicole',
964
+ 'Sarah (US Female)': 'af_sarah',
965
+ 'Michael (US Male)': 'am_michael',
966
+ 'Eric (US Male)': 'am_eric',
967
+ 'Adam (US Male)': 'am_adam',
968
+ # British English
969
+ 'Emma (UK Female)': 'bf_emma',
970
+ 'Alice (UK Female)': 'bf_alice',
971
+ 'George (UK Male)': 'bm_george',
972
+ 'Daniel (UK Male)': 'bm_daniel',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973
  }
974
 
975
+ def gradio_interface_handler(
976
+ user_prompt, resolution, captions, bg_music,
977
+ voice_name, video_prob, music_vol, video_fps, export_preset,
978
+ tts_speed, caption_size):
979
+ """Wrapper function to connect Gradio inputs to the main generation logic."""
980
+
981
+ print("\n--- Received Request from Gradio ---")
982
+ print(f"Prompt: {user_prompt[:50]}...")
983
+ print(f"Resolution: {resolution}")
984
+ print(f"Captions: {captions}")
985
+ print(f"Music File: {'Provided' if bg_music else 'None'}")
986
+ print(f"Voice: {voice_name}")
987
+ print(f"Video Probability: {video_prob}%")
988
+ print(f"Music Volume: {music_vol}")
989
+ print(f"FPS: {video_fps}")
990
+ print(f"Preset: {export_preset}")
991
+ print(f"TTS Speed: {tts_speed}")
992
+ print(f"Caption Size: {caption_size}")
993
+
994
+ # Update global settings based on Gradio inputs
995
  global selected_voice, voice_speed, font_size, video_clip_probability, bg_music_volume, fps, preset
996
+ selected_voice = VOICE_CHOICES.get(voice_name, 'af_heart') # Get voice ID from name
997
+ voice_speed = tts_speed
 
 
998
  font_size = caption_size
999
+ video_clip_probability = video_prob / 100.0 # Convert percentage to probability
1000
+ bg_music_volume = music_vol
1001
  fps = video_fps
1002
+ preset = export_preset
1003
+
1004
+ # Call the main video generation function
1005
+ video_path, status_message = generate_full_video(user_prompt, resolution, captions, bg_music)
1006
+
1007
+ print(f"Gradio Handler Status: {status_message}")
1008
+
1009
+ # Return the video path (or None if failed) and the status message
1010
+ return video_path, status_message
1011
+
1012
+
1013
+ # Create Gradio Interface
1014
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
1015
+ gr.Markdown("# 🎬 AI Documentary Video Generator")
1016
+ gr.Markdown("Enter a topic or detailed instructions, customize settings, and generate a short documentary-style video. Uses AI for script, TTS, and finds relevant stock footage.")
1017
+
1018
+ with gr.Row():
1019
+ with gr.Column(scale=2):
1020
+ prompt_input = gr.Textbox(
1021
+ label="Video Concept / Topic / Script",
1022
+ placeholder="e.g., 'The history of coffee', 'Top 5 strangest animals', or paste a full script following the format.",
1023
+ lines=4
1024
+ )
1025
+ submit_button = gr.Button("Generate Video", variant="primary")
1026
+ status_output = gr.Textbox(label="Status", interactive=False)
1027
+
1028
+ with gr.Column(scale=1):
1029
+ video_output = gr.Video(label="Generated Video")
1030
+
1031
+
1032
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
1033
+ gr.Markdown("### Video & Audio Settings")
1034
+ with gr.Row():
1035
+ resolution_dd = gr.Dropdown(["Full HD (16:9)", "Short (9:16)"], label="Resolution", value="Full HD (16:9)")
1036
+ caption_dd = gr.Radio(["Yes", "No"], label="Generate Captions", value="Yes")
1037
+ music_upload = gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"])
1038
+
1039
+ gr.Markdown("### Voice & Narration")
1040
+ with gr.Row():
1041
+ voice_dd = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Narration Voice", value="Emma (US Female)")
1042
+ speed_slider = gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed")
1043
+
1044
+ gr.Markdown("### Visuals & Style")
1045
+ with gr.Row():
1046
+ video_prob_slider = gr.Slider(0, 100, value=35, step=5, label="Video Clip % (vs. Images)")
1047
+ caption_size_slider = gr.Slider(20, 80, value=45, step=1, label="Caption Font Size")
1048
+
1049
+ gr.Markdown("### Export Settings")
1050
+ with gr.Row():
1051
+ music_vol_slider = gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume")
1052
+ fps_slider = gr.Slider(15, 60, value=30, step=1, label="Video FPS")
1053
+ preset_dd = gr.Dropdown(
1054
+ choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"],
1055
+ value="veryfast", label="Export Quality/Speed Preset"
1056
+ )
1057
+
1058
+ # Connect components
1059
+ submit_button.click(
1060
+ fn=gradio_interface_handler,
1061
+ inputs=[
1062
+ prompt_input, resolution_dd, caption_dd, music_upload,
1063
+ voice_dd, video_prob_slider, music_vol_slider, fps_slider, preset_dd,
1064
+ speed_slider, caption_size_slider
1065
+ ],
1066
+ outputs=[video_output, status_output]
1067
+ )
1068
 
1069
  # Launch the interface
1070
  if __name__ == "__main__":
1071
+ print("Launching Gradio Interface...")
1072
+ # Make sure to replace API keys at the top before running!
1073
+ if PEXELS_API_KEY == 'YOUR_PEXELS_API_KEY' or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY':
1074
+ print("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
1075
+ print("!!! WARNING: API Keys not set in the script. !!!")
1076
+ print("!!! Please replace 'YOUR_PEXELS_API_KEY' and !!!")
1077
+ print("!!! 'YOUR_OPENROUTER_API_KEY' with your actual keys. !!!")
1078
+ print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
1079
+
1080
+ iface.launch(share=True, debug=True) # Share=True for public link, Debug=True for more logs