testdeep123 commited on
Commit
479d5b6
·
verified ·
1 Parent(s): aaf5d9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +911 -327
app.py CHANGED
@@ -1,501 +1,1085 @@
1
- from kokoro import KPipeline
2
- import IPython
3
- from IPython.display import display, Audio
4
- import soundfile as sf
5
- import torch
6
- from IPython.display import display, Audio, HTML
7
- import soundfile as sf
8
  import os
9
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
10
- from PIL import Image
11
- import tempfile
 
 
12
  import random
13
- import cv2
14
  import math
15
- import os, requests, io, time, re, random
16
- from moviepy.editor import (
17
- VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
18
- CompositeVideoClip, TextClip
19
- )
20
- import moviepy.video.fx.all as vfx
21
- import moviepy.config as mpy_config
22
- from pydub import AudioSegment
23
- from pydub.generators import Sine
24
- from google.colab import files
25
- from PIL import Image, ImageDraw, ImageFont
26
  import numpy as np
27
- from bs4 import BeautifulSoup
28
- import base64
29
- from urllib.parse import quote
30
  import pysrt
31
- from gtts import gTTS
32
- from gtts import gTTS
33
- from PIL import Image, ImageDraw, ImageFont
34
- import numpy as np
35
  from bs4 import BeautifulSoup
36
  from urllib.parse import quote
37
- import pysrt
38
- import soundfile as sf
39
- from kokoro import KPipeline
40
- import cv2
41
- import gradio as gr
42
- import tempfile # Added for use in create_clip
43
-
44
-
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
-
53
-
54
- import subprocess
55
- print(subprocess.check_output(['pip', 'list']).decode())
56
-
57
 
 
58
  try:
59
- import moviepy.editor
60
- print("moviepy imported successfully")
61
- except ImportError as e:
62
- print("Error importing moviepy:", e)
63
-
64
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
 
68
- # Initialize Kokoro TTS pipeline
69
- pipeline = KPipeline(lang_code='a') # 'a' is from original code; adjust if needed
 
 
 
70
 
71
- # Set ImageMagick binary
72
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
73
 
74
- # Global Configuration
75
- PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
76
- OPENROUTER_API_KEY = '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'
77
- OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
78
  TEMP_FOLDER = "temp_video_processing"
79
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
80
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
 
 
 
 
81
 
82
  # Helper Functions
 
83
  def generate_script(user_input):
 
 
 
 
 
84
  headers = {
85
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
86
- 'HTTP-Referer': 'https://your-domain.com',
87
- 'X-Title': 'AI Documentary Maker'
88
  }
89
- prompt = f"""Short Documentary Script GeneratorInstructions:
 
90
  If I say "use this," just output the script exactly as I gave it.
91
  If I only give topics, generate a script based on them.
92
- If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
93
- And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
 
 
 
 
 
 
94
  Formatting Rules:
95
- Title in Square Brackets:
96
- Each section starts with a one-word title inside [ ] (max two words if necessary).
97
- This title will be used as a search term for Pexels footage.
98
- Casual & Funny Narration:
99
- Each section has 5-10 words of narration.
100
- Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
101
- No Special Formatting:
102
- No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
103
- Generalized Search Terms:
104
- If a term is too specific, make it more general for Pexels search.
105
- Scene-Specific Writing:
106
- Each section describes only what should be shown in the video.
107
- Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
108
- No extra text, just the script.
109
  Example Output:
110
  [North Korea]
111
- Top 5 unknown facts about North Korea.
112
  [Invisibility]
113
- North Korea’s internet speed is so fast… it doesnt exist.
114
  [Leadership]
115
- Kim Jong-un once won an election with 100% votes… against himself.
116
  [Magic]
117
- North Korea discovered time travel. That’s why their news is always from the past.
118
  [Warning]
119
- Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
120
  [Freedom]
121
- North Korean citizens can do anything… as long as it's government-approved.
122
- Now here is the Topic/scrip: {user_input}
 
123
  """
124
  data = {
125
  'model': OPENROUTER_MODEL,
126
  'messages': [{'role': 'user', 'content': prompt}],
127
- 'temperature': 0.4,
128
- 'max_tokens': 5000
129
  }
130
  try:
131
  response = requests.post(
132
  'https://openrouter.ai/api/v1/chat/completions',
133
  headers=headers,
134
  json=data,
135
- timeout=30
136
  )
137
- if response.status_code == 200:
138
- response_data = response.json()
139
- if 'choices' in response_data and len(response_data['choices']) > 0:
140
- return response_data['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
 
 
141
  return None
142
- except Exception:
 
143
  return None
144
 
145
  def parse_script(script_text):
 
 
 
146
  sections = {}
147
  current_title = None
148
  current_text = ""
149
  try:
150
  for line in script_text.splitlines():
151
  line = line.strip()
152
- if line.startswith("[") and "]" in line:
153
- bracket_start = line.find("[")
154
- bracket_end = line.find("]", bracket_start)
155
- if bracket_start != -1 and bracket_end != -1:
156
- if current_title is not None:
157
- sections[current_title] = current_text.strip()
158
- current_title = line[bracket_start+1:bracket_end]
159
- current_text = line[bracket_end+1:].strip()
160
- elif current_title:
161
- current_text += line + " "
162
- if current_title:
 
 
 
 
163
  sections[current_title] = current_text.strip()
 
164
  elements = []
 
 
 
 
 
 
 
 
 
 
 
 
165
  for title, narration in sections.items():
166
  if not title or not narration:
 
167
  continue
168
- media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
 
 
169
  words = narration.split()
170
- duration = max(3, len(words) * 0.5)
171
- tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
 
172
  elements.append(media_element)
173
  elements.append(tts_element)
 
 
 
174
  return elements
175
- except Exception:
 
176
  return []
177
 
178
- def search_pexels_videos(query, pexels_api_key):
179
- headers = {'Authorization': pexels_api_key}
180
- base_url = "https://api.pexels.com/videos/search"
181
- num_pages = 3
182
- videos_per_page = 15
183
- all_videos = []
184
- for page in range(1, num_pages + 1):
 
 
 
 
185
  try:
186
- params = {"query": query, "per_page": videos_per_page, "page": page}
187
- response = requests.get(base_url, headers=headers, params=params, timeout=10)
188
- if response.status_code == 200:
189
- data = response.json()
190
- videos = data.get("videos", [])
191
- for video in videos:
192
- video_files = video.get("video_files", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  for file in video_files:
194
- if file.get("quality") == "hd":
195
- all_videos.append(file.get("link"))
 
196
  break
197
- except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  continue
199
- return random.choice(all_videos) if all_videos else None
200
 
201
- def search_pexels_images(query, pexels_api_key):
202
- headers = {'Authorization': pexels_api_key}
203
- url = "https://api.pexels.com/v1/search"
204
- params = {"query": query, "per_page": 5, "orientation": "landscape"}
205
- try:
206
- response = requests.get(url, headers=headers, params=params, timeout=10)
207
- if response.status_code == 200:
208
- data = response.json()
209
- photos = data.get("photos", [])
210
- if photos:
211
- photo = random.choice(photos[:min(5, len(photos))])
212
- return photo.get("src", {}).get("original")
213
- return None
214
- except Exception:
215
  return None
216
 
217
  def search_google_images(query):
 
 
218
  try:
219
- search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
220
  headers = {"User-Agent": USER_AGENT}
221
- response = requests.get(search_url, headers=headers, timeout=10)
 
222
  soup = BeautifulSoup(response.text, "html.parser")
 
 
 
 
 
223
  img_tags = soup.find_all("img")
224
- image_urls = [img.get("src", "") for img in img_tags if img.get("src", "").startswith("http") and "gstatic" not in img.get("src", "")]
225
- return random.choice(image_urls[:5]) if image_urls else None
226
- except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  return None
228
 
229
- def download_image(image_url, filename):
 
230
  try:
231
- headers = {"User-Agent": USER_AGENT}
232
- response = requests.get(image_url, headers=headers, stream=True, timeout=15)
233
  response.raise_for_status()
234
  with open(filename, 'wb') as f:
235
  for chunk in response.iter_content(chunk_size=8192):
236
  f.write(chunk)
237
- img = Image.open(filename)
238
- img.verify()
239
- img = Image.open(filename)
240
- if img.mode != 'RGB':
241
- img = img.convert('RGB')
242
- img.save(filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  return filename
244
- except Exception:
 
245
  if os.path.exists(filename):
246
  os.remove(filename)
247
  return None
248
-
249
- def download_video(video_url, filename):
250
- try:
251
- response = requests.get(video_url, stream=True, timeout=30)
252
- response.raise_for_status()
253
- with open(filename, 'wb') as f:
254
- for chunk in response.iter_content(chunk_size=8192):
255
- f.write(chunk)
256
- return filename
257
- except Exception:
258
  if os.path.exists(filename):
259
  os.remove(filename)
260
  return None
261
 
262
  def generate_media(prompt, current_index=0, total_segments=1):
 
263
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
264
- if "news" in prompt.lower():
265
- image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
266
- image_url = search_google_images(prompt)
267
- if image_url and download_image(image_url, image_file):
268
- return {"path": image_file, "asset_type": "image"}
269
- if random.random() < 0.25:
 
 
 
 
 
 
270
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
271
- video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
272
- if video_url and download_video(video_url, video_file):
273
  return {"path": video_file, "asset_type": "video"}
274
- image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
275
- image_url = search_pexels_images(prompt, PEXELS_API_KEY)
276
- if image_url and download_image(image_url, image_file):
277
- return {"path": image_file, "asset_type": "image"}
278
- fallback_terms = ["nature", "people", "landscape", "technology", "business"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  for term in fallback_terms:
280
- fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
281
- fallback_url = search_pexels_images(term, PEXELS_API_KEY)
282
- if fallback_url and download_image(fallback_url, fallback_file):
283
- return {"path": fallback_file, "asset_type": "image"}
284
- return None
285
-
286
- def generate_tts(text, voice):
287
- safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '')
288
- file_path = os.path.join(TEMP_FOLDER, f"tts{safe_text}.wav")
289
- if os.path.exists(file_path):
290
- return file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  try:
292
- kokoro_voice = 'af_heart' if voice == 'en' else voice
293
- generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
294
- audio_segments = [audio for _, _, audio in generator]
295
- full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
296
- sf.write(file_path, full_audio, 24000)
 
 
 
 
 
297
  return file_path
298
- except Exception:
 
 
299
  try:
300
- tts = gTTS(text=text, lang='en')
301
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
302
- tts.save(mp3_path)
303
- audio = AudioSegment.from_mp3(mp3_path)
304
- audio.export(file_path, format="wav")
305
- os.remove(mp3_path)
306
- return file_path
307
- except Exception:
308
- num_samples = int(max(3, len(text.split()) * 0.5) * 24000)
309
  silence = np.zeros(num_samples, dtype=np.float32)
310
- sf.write(file_path, silence, 24000)
 
311
  return file_path
 
 
 
312
 
313
- def apply_kenburns_effect(clip, target_resolution, effect_type=None):
 
314
  target_w, target_h = target_resolution
 
 
 
 
 
 
 
 
 
 
 
315
  clip_aspect = clip.w / clip.h
316
  target_aspect = target_w / target_h
317
- if clip_aspect > target_aspect:
 
318
  new_height = target_h
319
  new_width = int(new_height * clip_aspect)
320
- else:
321
  new_width = target_w
322
  new_height = int(new_width / clip_aspect)
323
- clip = clip.resize(newsize=(new_width, new_height))
324
- base_scale = 1.15
325
- new_width = int(new_width * base_scale)
326
- new_height = int(new_height * base_scale)
327
- clip = clip.resize(newsize=(new_width, new_height))
328
- max_offset_x = new_width - target_w
329
- max_offset_y = new_height - target_h
330
- available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
331
- effect_type = random.choice(available_effects) if not effect_type or effect_type == "random" else effect_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  if effect_type == "zoom-in":
333
- start_zoom, end_zoom = 0.9, 1.1
334
- start_center = end_center = (new_width / 2, new_height / 2)
335
  elif effect_type == "zoom-out":
336
- start_zoom, end_zoom = 1.1, 0.9
337
- start_center = end_center = (new_width / 2, new_height / 2)
 
 
 
338
  elif effect_type == "pan-left":
339
- start_zoom = end_zoom = 1.0
340
- start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
341
- end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
342
  elif effect_type == "pan-right":
343
- start_zoom = end_zoom = 1.0
344
- start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
345
- end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
346
- elif effect_type == "up-left":
347
- start_zoom = end_zoom = 1.0
348
- start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
349
- end_center = (target_w / 2, target_h / 2)
350
- else:
351
- raise ValueError(f"Unsupported effect_type: {effect_type}")
 
 
 
 
352
  def transform_frame(get_frame, t):
353
- frame = get_frame(t)
 
354
  ratio = 0.5 - 0.5 * math.cos(math.pi * t / clip.duration) if clip.duration > 0 else 0
 
355
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
356
- crop_w, crop_h = int(target_w / current_zoom), int(target_h / current_zoom)
357
- current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
358
- current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
359
- min_center_x, max_center_x = crop_w / 2, new_width - crop_w / 2
360
- min_center_y, max_center_y = crop_h / 2, new_height - crop_h / 2
 
 
 
 
 
 
 
 
 
 
 
361
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
362
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
363
- cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
364
- return cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
365
- return clip.fl(transform_frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
  def resize_to_fill(clip, target_resolution):
 
368
  target_w, target_h = target_resolution
369
- clip_aspect = clip.w / clip.h
 
 
 
 
 
 
 
 
370
  target_aspect = target_w / target_h
371
- if clip_aspect > target_aspect:
 
 
 
 
 
 
 
372
  clip = clip.resize(height=target_h)
 
373
  crop_amount = (clip.w - target_w) / 2
374
- clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
 
 
 
375
  else:
 
 
376
  clip = clip.resize(width=target_w)
 
377
  crop_amount = (clip.h - target_h) / 2
378
- clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
379
- return clip
 
 
380
 
381
- def add_background_music(final_video, bg_music_volume=0.08):
382
- bg_music_path = "background_music.mp3"
383
- if os.path.exists(bg_music_path):
 
 
 
 
 
 
384
  bg_music = AudioFileClip(bg_music_path)
 
 
 
 
 
 
 
 
 
 
385
  if bg_music.duration < final_video.duration:
386
  loops_needed = math.ceil(final_video.duration / bg_music.duration)
387
- bg_segments = [bg_music] * loops_needed
388
- bg_music = concatenate_audioclips(bg_segments)
 
 
389
  bg_music = bg_music.subclip(0, final_video.duration)
 
 
390
  bg_music = bg_music.volumex(bg_music_volume)
 
 
391
  video_audio = final_video.audio
392
- mixed_audio = CompositeAudioClip([video_audio, bg_music])
 
 
 
 
 
 
 
 
 
393
  final_video = final_video.set_audio(mixed_audio)
394
- return final_video
 
 
 
 
 
 
 
395
 
396
  def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
 
 
 
 
 
 
397
  try:
398
- if not os.path.exists(media_path) or not os.path.exists(tts_path):
 
 
399
  return None
400
- audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
401
- target_duration = audio_clip.duration + 0.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  if asset_type == "video":
403
- clip = VideoFileClip(media_path)
404
- clip = resize_to_fill(clip, TARGET_RESOLUTION)
405
- clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  elif asset_type == "image":
407
- img = Image.open(media_path)
408
- if img.mode != 'RGB':
409
- with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp:
410
- img.convert('RGB').save(temp.name)
411
- media_path = temp.name
412
- img.close()
413
- clip = ImageClip(media_path).set_duration(target_duration)
414
- clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
415
- clip = clip.fadein(0.3).fadeout(0.3)
 
 
 
 
 
 
416
  else:
417
- return None
 
 
 
 
 
 
418
  subtitle_clips = []
419
  if narration_text and CAPTION_COLOR != "transparent":
420
- words = narration_text.split()
421
- chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
422
- chunk_duration = audio_clip.duration / len(chunks)
423
- subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
424
- for i, chunk_text in enumerate(chunks):
425
- start_time = i * chunk_duration
426
- end_time = (i + 1) * chunk_duration
427
- txt_clip = TextClip(
428
- chunk_text,
429
- fontsize=45,
430
- font='Arial-Bold',
431
- color=CAPTION_COLOR,
432
- bg_color='rgba(0, 0, 0, 0.25)',
433
- method='caption',
434
- align='center',
435
- stroke_width=2,
436
- stroke_color=CAPTION_COLOR,
437
- size=(TARGET_RESOLUTION[0] * 0.8, None)
438
- ).set_start(start_time).set_end(end_time).set_position(('center', subtitle_y_position))
439
- subtitle_clips.append(txt_clip)
440
- clip = CompositeVideoClip([clip] + subtitle_clips)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  clip = clip.set_audio(audio_clip)
 
 
442
  return clip
443
- except Exception:
444
- return None
 
 
 
 
 
 
445
 
446
  # Main Gradio Function
447
- def generate_video(video_concept, resolution, caption_option):
 
 
 
 
 
 
448
  global TARGET_RESOLUTION, CAPTION_COLOR
449
- TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
450
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
 
 
 
 
 
 
451
  if os.path.exists(TEMP_FOLDER):
 
452
  shutil.rmtree(TEMP_FOLDER)
453
- os.makedirs(TEMP_FOLDER)
 
 
 
 
 
 
 
 
454
  script = generate_script(video_concept)
455
  if not script:
456
- return "Failed to generate script."
 
 
 
 
 
457
  elements = parse_script(script)
458
  if not elements:
459
- return "Failed to parse script."
460
- paired_elements = [(elements[i], elements[i+1]) for i in range(0, len(elements), 2) if i+1 < len(elements)]
 
 
 
 
 
 
 
 
 
 
 
461
  if not paired_elements:
462
- return "No valid script segments found."
 
 
 
 
 
 
463
  clips = []
 
464
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
465
- media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
466
- if not media_asset:
467
- continue
 
 
 
 
 
 
 
 
468
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
469
  if not tts_path:
470
- continue
 
 
 
 
471
  clip = create_clip(
472
  media_path=media_asset['path'],
473
  asset_type=media_asset['asset_type'],
474
  tts_path=tts_path,
475
- duration=tts_elem['duration'],
476
- effects=media_elem.get('effects', 'fade-in'),
477
  narration_text=tts_elem['text'],
478
  segment_index=idx
479
  )
 
480
  if clip:
481
  clips.append(clip)
 
 
 
 
 
482
  if not clips:
483
- return "No clips were successfully created."
484
- final_video = concatenate_videoclips(clips, method="compose")
485
- final_video = add_background_music(final_video, bg_music_volume=0.08)
486
- final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
487
- shutil.rmtree(TEMP_FOLDER)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  return OUTPUT_VIDEO_FILENAME
489
 
490
- # Gradio Interface
491
- with gr.Blocks() as demo:
492
- gr.Markdown("# AI Documentary Video Generator")
 
 
 
 
 
 
493
  with gr.Row():
494
- video_concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept here...")
495
- resolution = gr.Dropdown(["Full", "Short"], label="Resolution", value="Full")
496
- caption_option = gr.Dropdown(["Yes", "No"], label="Caption", value="Yes")
497
- generate_btn = gr.Button("Generate Video")
498
- output_video = gr.Video(label="Generated Video")
499
- generate_btn.click(generate_video, inputs=[video_concept, resolution, caption_option], outputs=output_video)
500
-
501
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import shutil # Added for directory cleanup
3
+ import requests
4
+ import io
5
+ import time
6
+ import re
7
  import random
8
+ import tempfile # Added for use in create_clip
9
  import math
10
+ import cv2
 
 
 
 
 
 
 
 
 
 
11
  import numpy as np
12
+ import soundfile as sf
13
+ import torch
14
+ import gradio as gr
15
  import pysrt
 
 
 
 
16
  from bs4 import BeautifulSoup
17
  from urllib.parse import quote
18
+ from PIL import Image, ImageDraw, ImageFont
19
+ from gtts import gTTS
20
+ from pydub import AudioSegment
21
+ from pydub.generators import Sine
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Import moviepy components correctly
24
  try:
25
+ from moviepy.editor import (
26
+ VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
27
+ CompositeVideoClip, TextClip, CompositeAudioClip
28
+ )
29
+ import moviepy.video.fx.all as vfx
30
+ import moviepy.config as mpy_config
31
+ # Set ImageMagick binary (adjust path if necessary for your environment)
32
+ # Check if ImageMagick is available, otherwise TextClip might fail
33
+ try:
34
+ # Attempt to find ImageMagick automatically or use a common path
35
+ # If running locally, ensure ImageMagick is installed and in your PATH
36
+ # If on Hugging Face Spaces, add 'imagemagick' to a packages.txt file
37
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"}) # Common Linux path
38
+ print("ImageMagick path set.")
39
+ # You might need to verify this path works in your specific deployment environment
40
+ except Exception as e:
41
+ print(f"Warning: Could not configure ImageMagick path. TextClip might fail. Error: {e}")
42
+ # Consider adding a fallback or disabling text if ImageMagick is essential and not found
43
+ except ImportError:
44
+ print("Error: moviepy library not found. Please install it using 'pip install moviepy'.")
45
+ # Optionally, exit or raise a more specific error if moviepy is critical
46
+ exit() # Exit if moviepy is absolutely required
47
 
48
+ # Import Kokoro (ensure it's installed)
49
+ try:
50
+ from kokoro import KPipeline
51
+ # Initialize Kokoro TTS pipeline
52
+ # Using 'en' as a placeholder, adjust 'a' if it was intentional and valid for Kokoro
53
+ pipeline = KPipeline(lang_code='en')
54
+ print("Kokoro Pipeline Initialized.")
55
+ except ImportError:
56
+ print("Warning: Kokoro library not found. TTS generation will rely solely on gTTS.")
57
+ pipeline = None
58
+ except Exception as e:
59
+ print(f"Warning: Failed to initialize Kokoro Pipeline. TTS generation will rely solely on gTTS. Error: {e}")
60
+ pipeline = None
61
 
62
 
63
+ # Global Configuration
64
+ # --- IMPORTANT: Replace placeholders with your actual keys or use environment variables ---
65
+ PEXELS_API_KEY = os.getenv('PEXELS_API_KEY', 'YOUR_PEXELS_API_KEY_HERE') # Use environment variable or replace
66
+ OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', 'YOUR_OPENROUTER_API_KEY_HERE') # Use environment variable or replace
67
+ # --- ---
68
 
69
+ if PEXELS_API_KEY == 'YOUR_PEXELS_API_KEY_HERE' or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY_HERE':
70
+ print("\n*** WARNING: API keys are not set. Please set PEXELS_API_KEY and OPENROUTER_API_KEY environment variables or replace the placeholders in the script. ***\n")
71
 
72
+ OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free" # Or choose another model
 
 
 
73
  TEMP_FOLDER = "temp_video_processing"
74
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
75
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
76
+ BACKGROUND_MUSIC_PATH = "background_music.mp3" # Ensure this file exists
77
+
78
+ # --- Global variables to be set by Gradio input ---
79
+ TARGET_RESOLUTION = (1920, 1080) # Default, will be updated
80
+ CAPTION_COLOR = "white" # Default, will be updated
81
 
82
  # Helper Functions
83
+
84
  def generate_script(user_input):
85
+ """Generates a script using the OpenRouter API based on user input."""
86
+ if not OPENROUTER_API_KEY or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY_HERE':
87
+ print("Error: OpenRouter API Key not configured.")
88
+ return None
89
+
90
  headers = {
91
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
92
+ 'HTTP-Referer': 'https://your-app-name.hf.space', # Optional: Replace with your app's URL if deployed
93
+ 'X-Title': 'AI Documentary Maker' # Optional
94
  }
95
+ # Updated prompt based on original instructions
96
+ prompt = f"""Short Documentary Script Generator Instructions:
97
  If I say "use this," just output the script exactly as I gave it.
98
  If I only give topics, generate a script based on them.
99
+ If I provide a full script, rewrite it without any changes.
100
+ Make everything short, simple, humorous, funny, and act serious but humorous.
101
+ Don't say anything off-topic.
102
+ Always include a funny statement to subscribe based on the video topic at the end.
103
+ Use normal conversational text like a normal person talking, avoid AI phrases, make statements humanized and normal conversational.
104
+ Always talk negative views, don't really want any positive statements.
105
+ The full script will be one video about one topic, so keep that in mind.
106
+
107
  Formatting Rules:
108
+ Title in Square Brackets: Each section starts with a one-word title inside [ ] (max two words if necessary). This title will be used as a search term for Pexels footage.
109
+ Casual & Funny Narration: Each section has 5-15 words of narration. Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
110
+ No Special Formatting: No bold, italics, or special characters. You are an assistant AI; your task is to create the script. You aren't a chatbot. So, don't write extra text.
111
+ Generalized Search Terms: If a term is too specific, make it more general for Pexels search.
112
+ Scene-Specific Writing: Each section describes only what should be shown in the video.
113
+ Output Only the Script: No extra text, just the script.
114
+
 
 
 
 
 
 
 
115
  Example Output:
116
  [North Korea]
117
+ Top 5 unknown facts about North Korea, maybe.
118
  [Invisibility]
119
+ North Korea’s internet speed is so fast… it’s basically dial-up from 1998.
120
  [Leadership]
121
+ Kim Jong-un once won an election with 100% votes… because who would vote against him?
122
  [Magic]
123
+ North Korea discovered unicorns. They're delicious, apparently.
124
  [Warning]
125
+ Subscribe now, or Kim Jong-un might send you a strongly worded letter.
126
  [Freedom]
127
+ North Korean citizens enjoy unparalleled freedom... to agree with the government.
128
+
129
+ Now here is the Topic/script: {user_input}
130
  """
131
  data = {
132
  'model': OPENROUTER_MODEL,
133
  'messages': [{'role': 'user', 'content': prompt}],
134
+ 'temperature': 0.5, # Slightly increased for more variety in humor
135
+ 'max_tokens': 1000 # Reduced slightly, adjust if scripts get cut off
136
  }
137
  try:
138
  response = requests.post(
139
  'https://openrouter.ai/api/v1/chat/completions',
140
  headers=headers,
141
  json=data,
142
+ timeout=45 # Increased timeout
143
  )
144
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
145
+ response_data = response.json()
146
+ if 'choices' in response_data and len(response_data['choices']) > 0:
147
+ script_content = response_data['choices'][0]['message']['content']
148
+ # Basic cleaning: remove potential preamble/postamble if the model adds it
149
+ script_content = re.sub(r'^.*?\n*\[', '[', script_content, flags=re.DOTALL) # Remove text before first bracket
150
+ script_content = script_content.strip()
151
+ print(f"Generated Script:\n{script_content}") # Log the script
152
+ return script_content
153
+ else:
154
+ print(f"Error: No choices found in OpenRouter response. Response: {response_data}")
155
+ return None
156
+ except requests.exceptions.RequestException as e:
157
+ print(f"Error calling OpenRouter API: {e}")
158
  return None
159
+ except Exception as e:
160
+ print(f"An unexpected error occurred during script generation: {e}")
161
  return None
162
 
163
  def parse_script(script_text):
164
+ """Parses the generated script text into structured elements."""
165
+ if not script_text:
166
+ return []
167
  sections = {}
168
  current_title = None
169
  current_text = ""
170
  try:
171
  for line in script_text.splitlines():
172
  line = line.strip()
173
+ if not line: # Skip empty lines
174
+ continue
175
+ match = re.match(r'^\[([^\]]+)\](.*)', line)
176
+ if match:
177
+ # If a title was being processed, save it
178
+ if current_title is not None and current_text:
179
+ sections[current_title] = current_text.strip()
180
+
181
+ current_title = match.group(1).strip()
182
+ current_text = match.group(2).strip()
183
+ elif current_title: # Append to the text of the current title
184
+ current_text += " " + line # Add space between lines
185
+
186
+ # Save the last section
187
+ if current_title is not None and current_text:
188
  sections[current_title] = current_text.strip()
189
+
190
  elements = []
191
+ if not sections:
192
+ print("Warning: Script parsing resulted in no sections.")
193
+ # Maybe try a simpler split if the regex fails?
194
+ lines = [l.strip() for l in script_text.splitlines() if l.strip()]
195
+ if len(lines) >= 2: # Basic fallback: assume first line is title, second is text
196
+ print("Attempting basic fallback parsing.")
197
+ title = lines[0].replace('[','').replace(']','')
198
+ narration = ' '.join(lines[1:])
199
+ sections[title] = narration
200
+
201
+ print(f"Parsed Sections: {sections}") # Log parsed sections
202
+
203
  for title, narration in sections.items():
204
  if not title or not narration:
205
+ print(f"Skipping empty section: Title='{title}', Narration='{narration}'")
206
  continue
207
+ # Use title as media prompt
208
+ media_element = {"type": "media", "prompt": title, "effects": "random"} # Use random Ken Burns
209
+ # Calculate rough duration based on words
210
  words = narration.split()
211
+ # Duration: Base 2s + 0.4s per word, capped at ~10s unless very long
212
+ duration = min(10.0, max(3.0, 2.0 + len(words) * 0.4))
213
+ tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration} # Duration is approximate here
214
  elements.append(media_element)
215
  elements.append(tts_element)
216
+
217
+ if not elements:
218
+ print("Error: No elements created after parsing.")
219
  return elements
220
+ except Exception as e:
221
+ print(f"Error parsing script: {e}\nScript content was:\n{script_text}")
222
  return []
223
 
224
+ def search_pexels(query, api_key, media_type="videos"):
225
+ """Searches Pexels for videos or images."""
226
+ if not api_key or api_key == 'YOUR_PEXELS_API_KEY_HERE':
227
+ print("Error: Pexels API Key not configured.")
228
+ return None
229
+
230
+ headers = {'Authorization': api_key}
231
+ base_url = f"https://api.pexels.com/{media_type}/search"
232
+ results = []
233
+ # Search multiple pages for better results
234
+ for page in range(1, 4): # Check first 3 pages
235
  try:
236
+ params = {"query": query, "per_page": 15, "page": page}
237
+ if media_type == "videos":
238
+ params["orientation"] = "landscape" if TARGET_RESOLUTION[0] > TARGET_RESOLUTION[1] else "portrait"
239
+ else: # images
240
+ params["orientation"] = "landscape" if TARGET_RESOLUTION[0] > TARGET_RESOLUTION[1] else "portrait"
241
+
242
+ response = requests.get(base_url, headers=headers, params=params, timeout=15)
243
+ response.raise_for_status()
244
+ data = response.json()
245
+
246
+ if media_type == "videos":
247
+ media_items = data.get("videos", [])
248
+ for item in media_items:
249
+ video_files = item.get("video_files", [])
250
+ # Prioritize HD or FHD based on target resolution, fallback to highest available
251
+ target_quality = "hd" # 1280x720 or 1920x1080
252
+ if TARGET_RESOLUTION[0] >= 1920 or TARGET_RESOLUTION[1] >= 1920:
253
+ target_quality = "fhd" # Often not available, but check anyway
254
+
255
+ link = None
256
  for file in video_files:
257
+ # Pexels uses 'hd' for 1920x1080 too sometimes
258
+ if file.get("quality") == target_quality or file.get("quality") == "hd":
259
+ link = file.get("link")
260
  break
261
+ if not link and video_files: # Fallback to the first link if specific quality not found
262
+ link = video_files[0].get("link")
263
+
264
+ if link:
265
+ results.append(link)
266
+
267
+ else: # images
268
+ media_items = data.get("photos", [])
269
+ for item in media_items:
270
+ # Get original size, resizing happens later
271
+ link = item.get("src", {}).get("original")
272
+ if link:
273
+ results.append(link)
274
+
275
+ except requests.exceptions.RequestException as e:
276
+ print(f"Warning: Pexels API request failed for '{query}' (page {page}, {media_type}): {e}")
277
+ # Don't stop searching on a single page failure
278
+ continue
279
+ except Exception as e:
280
+ print(f"Warning: Unexpected error during Pexels search for '{query}': {e}")
281
  continue
 
282
 
283
+ if results:
284
+ print(f"Found {len(results)} Pexels {media_type} for '{query}'. Choosing one randomly.")
285
+ return random.choice(results)
286
+ else:
287
+ print(f"Warning: No Pexels {media_type} found for query: '{query}'")
 
 
 
 
 
 
 
 
 
288
  return None
289
 
290
  def search_google_images(query):
291
+ """Searches Google Images (use cautiously, scraping can be fragile)."""
292
+ print(f"Attempting Google Image search for: '{query}' (Use with caution)")
293
  try:
294
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch&safe=active" # Added safe search
295
  headers = {"User-Agent": USER_AGENT}
296
+ response = requests.get(search_url, headers=headers, timeout=15)
297
+ response.raise_for_status()
298
  soup = BeautifulSoup(response.text, "html.parser")
299
+
300
+ image_urls = []
301
+ # Google changes its structure often, this might need updates
302
+ # Look for image data embedded in script tags or specific img tags
303
+ # This is a common pattern, but highly unstable
304
  img_tags = soup.find_all("img")
305
+ for img in img_tags:
306
+ src = img.get("src") or img.get("data-src")
307
+ if src and src.startswith("http") and "gstatic" not in src and "googlelogo" not in src:
308
+ # Basic check for valid image extensions or base64
309
+ if re.search(r'\.(jpg|jpeg|png|webp)$', src, re.IGNORECASE) or src.startswith('data:image'):
310
+ image_urls.append(src)
311
+
312
+ # Limit the number of results to avoid processing too many
313
+ image_urls = image_urls[:10] # Consider first 10 potential images
314
+
315
+ if image_urls:
316
+ print(f"Found {len(image_urls)} potential Google Images for '{query}'. Choosing one.")
317
+ return random.choice(image_urls)
318
+ else:
319
+ print(f"Warning: No suitable Google Images found for query: '{query}'")
320
+ return None
321
+ except requests.exceptions.RequestException as e:
322
+ print(f"Warning: Google Image search failed for '{query}': {e}")
323
+ return None
324
+ except Exception as e:
325
+ print(f"Warning: Error parsing Google Image search results for '{query}': {e}")
326
  return None
327
 
328
+ def download_media(media_url, filename):
329
+ """Downloads media (image or video) from a URL."""
330
  try:
331
+ headers = {"User-Agent": USER_AGENT} # Use User-Agent for downloads too
332
+ response = requests.get(media_url, headers=headers, stream=True, timeout=30) # Increased timeout for large files
333
  response.raise_for_status()
334
  with open(filename, 'wb') as f:
335
  for chunk in response.iter_content(chunk_size=8192):
336
  f.write(chunk)
337
+ print(f"Successfully downloaded media to {filename}")
338
+
339
+ # Verify image integrity and convert if necessary
340
+ if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
341
+ try:
342
+ img = Image.open(filename)
343
+ img.verify() # Verify that it is, in fact an image
344
+ # Re-open image for conversion check
345
+ img = Image.open(filename)
346
+ if img.mode != 'RGB':
347
+ print(f"Converting image {filename} to RGB.")
348
+ img = img.convert('RGB')
349
+ img.save(filename, "JPEG") # Save as JPEG for compatibility
350
+ img.close()
351
+ except (IOError, SyntaxError, Image.UnidentifiedImageError) as img_e:
352
+ print(f"Warning: Downloaded file {filename} is not a valid image or is corrupted: {img_e}. Removing.")
353
+ os.remove(filename)
354
+ return None
355
+ # Basic video check (can be expanded)
356
+ elif filename.lower().endswith(('.mp4', '.mov', '.avi')):
357
+ if os.path.getsize(filename) < 1024: # Check if file is too small (likely error)
358
+ print(f"Warning: Downloaded video file {filename} is suspiciously small. Removing.")
359
+ os.remove(filename)
360
+ return None
361
+
362
  return filename
363
+ except requests.exceptions.RequestException as e:
364
+ print(f"Error downloading media from {media_url}: {e}")
365
  if os.path.exists(filename):
366
  os.remove(filename)
367
  return None
368
+ except Exception as e:
369
+ print(f"An unexpected error occurred during media download: {e}")
 
 
 
 
 
 
 
 
370
  if os.path.exists(filename):
371
  os.remove(filename)
372
  return None
373
 
374
  def generate_media(prompt, current_index=0, total_segments=1):
375
+ """Generates media (video or image) based on the prompt."""
376
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
377
+ if not safe_prompt: safe_prompt = f"media_{current_index}" # Fallback filename
378
+ print(f"\n--- Generating Media for Prompt: '{prompt}' ---")
379
+
380
+ # --- Strategy ---
381
+ # 1. Try Pexels Video
382
+ # 2. Try Pexels Image
383
+ # 3. If prompt contains 'news' or similar, try Google Image as fallback
384
+ # 4. Use generic Pexels image as last resort
385
+
386
+ # 1. Try Pexels Video
387
+ video_url = search_pexels(prompt, PEXELS_API_KEY, media_type="videos")
388
+ if video_url:
389
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
390
+ if download_media(video_url, video_file):
391
+ print(f"Using Pexels video for '{prompt}'")
392
  return {"path": video_file, "asset_type": "video"}
393
+ else:
394
+ print(f"Failed to download Pexels video for '{prompt}'.")
395
+
396
+ # 2. Try Pexels Image
397
+ image_url = search_pexels(prompt, PEXELS_API_KEY, media_type="photos")
398
+ if image_url:
399
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_image.jpg")
400
+ if download_media(image_url, image_file):
401
+ print(f"Using Pexels image for '{prompt}'")
402
+ return {"path": image_file, "asset_type": "image"}
403
+ else:
404
+ print(f"Failed to download Pexels image for '{prompt}'.")
405
+
406
+ # 3. Try Google Image (especially for specific/newsy terms)
407
+ if "news" in prompt.lower() or "breaking" in prompt.lower() or len(prompt.split()) > 4: # Heuristic for specific terms
408
+ google_image_url = search_google_images(prompt)
409
+ if google_image_url:
410
+ google_image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_google_image.jpg")
411
+ if download_media(google_image_url, google_image_file):
412
+ print(f"Using Google image for '{prompt}' as fallback.")
413
+ return {"path": google_image_file, "asset_type": "image"}
414
+ else:
415
+ print(f"Failed to download Google image for '{prompt}'.")
416
+
417
+ # 4. Fallback to generic Pexels images
418
+ print(f"Could not find specific media for '{prompt}'. Using generic fallback.")
419
+ fallback_terms = ["abstract", "technology", "texture", "nature", "cityscape"]
420
+ random.shuffle(fallback_terms) # Try different fallbacks
421
  for term in fallback_terms:
422
+ fallback_url = search_pexels(term, PEXELS_API_KEY, media_type="photos")
423
+ if fallback_url:
424
+ fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}_{current_index}.jpg")
425
+ if download_media(fallback_url, fallback_file):
426
+ print(f"Using fallback Pexels image ('{term}')")
427
+ return {"path": fallback_file, "asset_type": "image"}
428
+ else:
429
+ print(f"Failed to download fallback Pexels image ('{term}')")
430
+
431
+ print(f"Error: Failed to generate any media for prompt: '{prompt}'")
432
+ return None # Failed to get any media
433
+
434
+ def generate_tts(text, voice="en"):
435
+ """Generates Text-to-Speech audio using Kokoro or gTTS."""
436
+ safe_text = re.sub(r'[^\w\s-]', '', text[:15]).strip().replace(' ', '_')
437
+ if not safe_text: safe_text = f"tts_{random.randint(1000, 9999)}"
438
+ file_path = os.path.join(TEMP_FOLDER, f"{safe_text}.wav")
439
+
440
+ # Attempt Kokoro first if available
441
+ if pipeline:
442
+ try:
443
+ print(f"Generating TTS with Kokoro for: '{text[:30]}...'")
444
+ # Kokoro specific voice if needed, 'en' might map internally or use a default
445
+ # The original code used 'af_heart' for 'en', let's try that logic
446
+ kokoro_voice = 'af_heart' if voice == 'en' else voice # Adjust if Kokoro has different voice codes
447
+ generator = pipeline(text, voice=kokoro_voice, speed=0.95, split_pattern=r'\n+|[.!?]+') # Adjust speed/split
448
+ audio_segments = [audio for _, _, audio in generator]
449
+
450
+ if not audio_segments:
451
+ raise ValueError("Kokoro returned no audio segments.")
452
+
453
+ # Ensure segments are numpy arrays before concatenating
454
+ valid_segments = [seg for seg in audio_segments if isinstance(seg, np.ndarray) and seg.size > 0]
455
+
456
+ if not valid_segments:
457
+ raise ValueError("Kokoro returned empty or invalid audio segments.")
458
+
459
+ full_audio = np.concatenate(valid_segments) if len(valid_segments) > 0 else valid_segments[0]
460
+
461
+ # Ensure audio is float32 for soundfile
462
+ if full_audio.dtype != np.float32:
463
+ full_audio = full_audio.astype(np.float32)
464
+ # Normalize if needed (Kokoro might output integers)
465
+ max_val = np.max(np.abs(full_audio))
466
+ if max_val > 1.0:
467
+ full_audio /= max_val
468
+
469
+ sf.write(file_path, full_audio, 24000) # Kokoro typically outputs at 24kHz
470
+ print(f"Kokoro TTS generated successfully: {file_path}")
471
+ return file_path
472
+ except Exception as e:
473
+ print(f"Warning: Kokoro TTS failed: {e}. Falling back to gTTS.")
474
+ # Fall through to gTTS
475
+
476
+ # Fallback to gTTS
477
  try:
478
+ print(f"Generating TTS with gTTS for: '{text[:30]}...'")
479
+ tts = gTTS(text=text, lang=voice, slow=False) # Use voice as language code for gTTS
480
+ # Save as mp3 first, then convert
481
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
482
+ tts.save(mp3_path)
483
+ audio = AudioSegment.from_mp3(mp3_path)
484
+ # Export as WAV for consistency with moviepy
485
+ audio.export(file_path, format="wav")
486
+ os.remove(mp3_path) # Clean up temporary mp3
487
+ print(f"gTTS TTS generated successfully: {file_path}")
488
  return file_path
489
+ except Exception as e:
490
+ print(f"Error: gTTS also failed: {e}. Generating silence.")
491
+ # Final fallback: generate silence
492
  try:
493
+ # Estimate duration based on text length (similar to parsing logic)
494
+ words = text.split()
495
+ duration_seconds = min(10.0, max(3.0, 2.0 + len(words) * 0.4))
496
+ samplerate = 24000 # Match Kokoro's typical rate
497
+ num_samples = int(duration_seconds * samplerate)
 
 
 
 
498
  silence = np.zeros(num_samples, dtype=np.float32)
499
+ sf.write(file_path, silence, samplerate)
500
+ print(f"Generated silence fallback: {file_path} ({duration_seconds:.1f}s)")
501
  return file_path
502
+ except Exception as silence_e:
503
+ print(f"Error: Failed even to generate silence: {silence_e}")
504
+ return None # Complete failure
505
 
506
+ def apply_kenburns_effect(clip, target_resolution, effect_type="random"):
507
+ """Applies a Ken Burns effect (zoom/pan) to an ImageClip."""
508
  target_w, target_h = target_resolution
509
+ # Ensure clip has dimensions (might be needed if ImageClip wasn't fully initialized)
510
+ if not hasattr(clip, 'w') or not hasattr(clip, 'h') or clip.w == 0 or clip.h == 0:
511
+ print("Warning: Clip dimensions not found for Ken Burns effect. Using target resolution.")
512
+ # Attempt to get frame to determine size, or default
513
+ try:
514
+ frame = clip.get_frame(0)
515
+ clip.w, clip.h = frame.shape[1], frame.shape[0]
516
+ except:
517
+ clip.w, clip.h = target_w, target_h # Fallback
518
+
519
+ # Resize image to cover target area while maintaining aspect ratio
520
  clip_aspect = clip.w / clip.h
521
  target_aspect = target_w / target_h
522
+
523
+ if clip_aspect > target_aspect: # Image is wider than target
524
  new_height = target_h
525
  new_width = int(new_height * clip_aspect)
526
+ else: # Image is taller than target
527
  new_width = target_w
528
  new_height = int(new_width / clip_aspect)
529
+
530
+ # Resize slightly larger than needed for the effect
531
+ base_scale = 1.20 # Zoom factor range
532
+ zoom_width = int(new_width * base_scale)
533
+ zoom_height = int(new_height * base_scale)
534
+
535
+ # Use PIL for initial resize - often better quality for large changes
536
+ try:
537
+ pil_img = Image.fromarray(clip.get_frame(0)) # Get frame as PIL image
538
+ resized_pil = pil_img.resize((zoom_width, zoom_height), Image.Resampling.LANCZOS)
539
+ resized_clip = ImageClip(np.array(resized_pil)).set_duration(clip.duration)
540
+ clip = resized_clip # Use the better resized clip
541
+ clip.w, clip.h = zoom_width, zoom_height # Update dimensions
542
+ except Exception as pil_e:
543
+ print(f"Warning: PIL resize failed ({pil_e}). Using moviepy resize.")
544
+ clip = clip.resize(newsize=(zoom_width, zoom_height))
545
+
546
+
547
+ # Calculate max offsets for panning
548
+ max_offset_x = max(0, clip.w - target_w)
549
+ max_offset_y = max(0, clip.h - target_h)
550
+
551
+ # Define effect types
552
+ available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "pan-up", "pan-down", "slow-zoom"]
553
+ if effect_type == "random":
554
+ effect_type = random.choice(available_effects)
555
+ print(f"Applying Ken Burns effect: {effect_type}")
556
+
557
+ # Determine start/end zoom and center positions based on effect
558
+ start_zoom, end_zoom = 1.0, 1.0
559
+ start_center_x, start_center_y = clip.w / 2, clip.h / 2
560
+ end_center_x, end_center_y = clip.w / 2, clip.h / 2
561
+
562
  if effect_type == "zoom-in":
563
+ start_zoom = 1.0
564
+ end_zoom = 1 / base_scale # Zoom factor applied to crop size
565
  elif effect_type == "zoom-out":
566
+ start_zoom = 1 / base_scale
567
+ end_zoom = 1.0
568
+ elif effect_type == "slow-zoom":
569
+ start_zoom = 1.0
570
+ end_zoom = 1 / 1.05 # Very subtle zoom in
571
  elif effect_type == "pan-left":
572
+ start_center_x = target_w / 2
573
+ end_center_x = clip.w - target_w / 2
574
+ start_center_y = end_center_y = clip.h / 2 # Center vertically
575
  elif effect_type == "pan-right":
576
+ start_center_x = clip.w - target_w / 2
577
+ end_center_x = target_w / 2
578
+ start_center_y = end_center_y = clip.h / 2
579
+ elif effect_type == "pan-up":
580
+ start_center_y = target_h / 2
581
+ end_center_y = clip.h - target_h / 2
582
+ start_center_x = end_center_x = clip.w / 2 # Center horizontally
583
+ elif effect_type == "pan-down":
584
+ start_center_y = clip.h - target_h / 2
585
+ end_center_y = target_h / 2
586
+ start_center_x = end_center_x = clip.w / 2
587
+ # Add more effects like diagonal pans if desired
588
+
589
  def transform_frame(get_frame, t):
590
+ frame = get_frame(t) # Get the frame from the (potentially PIL-resized) clip
591
+ # Smooth interpolation (ease-in, ease-out)
592
  ratio = 0.5 - 0.5 * math.cos(math.pi * t / clip.duration) if clip.duration > 0 else 0
593
+
594
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
595
+ crop_w = int(target_w / current_zoom)
596
+ crop_h = int(target_h / current_zoom)
597
+
598
+ # Ensure crop dimensions are not larger than the frame itself
599
+ crop_w = min(crop_w, clip.w)
600
+ crop_h = min(crop_h, clip.h)
601
+
602
+ current_center_x = start_center_x + (end_center_x - start_center_x) * ratio
603
+ current_center_y = start_center_y + (end_center_y - start_center_y) * ratio
604
+
605
+ # Clamp center position to avoid cropping outside the image boundaries
606
+ min_center_x = crop_w / 2
607
+ max_center_x = clip.w - crop_w / 2
608
+ min_center_y = crop_h / 2
609
+ max_center_y = clip.h - crop_h / 2
610
+
611
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
612
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
613
+
614
+ # Perform the crop using cv2.getRectSubPix for subpixel accuracy
615
+ # Ensure frame is contiguous array for cv2
616
+ if not frame.flags['C_CONTIGUOUS']:
617
+ frame = np.ascontiguousarray(frame)
618
+
619
+ try:
620
+ cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
621
+ # Resize the cropped area to the final target resolution
622
+ # Using LANCZOS4 for potentially better quality resizing
623
+ final_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
624
+ return final_frame
625
+ except cv2.error as cv2_err:
626
+ print(f"Error during cv2 operation in Ken Burns: {cv2_err}")
627
+ print(f"Frame shape: {frame.shape}, Crop W/H: {crop_w}/{crop_h}, Center X/Y: {current_center_x}/{current_center_y}")
628
+ # Fallback: return uncropped frame resized? Or black frame?
629
+ return cv2.resize(frame, (target_w, target_h), interpolation=cv2.INTER_LINEAR) # Fallback resize
630
+
631
+
632
+ # Apply the transformation function to the clip
633
+ return clip.fl(transform_frame, apply_to=['mask']) # Apply to mask if it exists
634
+
635
 
636
  def resize_to_fill(clip, target_resolution):
637
+ """Resizes a video clip to fill the target resolution, cropping if necessary."""
638
  target_w, target_h = target_resolution
639
+ clip_w, clip_h = clip.w, clip.h
640
+
641
+ if clip_w == 0 or clip_h == 0:
642
+ print("Warning: Clip has zero dimensions before resize_to_fill. Cannot resize.")
643
+ # Return a black clip of the target size?
644
+ return ColorClip(size=target_resolution, color=(0,0,0), duration=clip.duration)
645
+
646
+
647
+ clip_aspect = clip_w / clip_h
648
  target_aspect = target_w / target_h
649
+
650
+ if math.isclose(clip_aspect, target_aspect, rel_tol=1e-3):
651
+ # Aspect ratios are close enough, just resize
652
+ print(f"Resizing video clip {clip.filename} to {target_resolution} (aspect match).")
653
+ return clip.resize(newsize=target_resolution)
654
+ elif clip_aspect > target_aspect:
655
+ # Clip is wider than target aspect ratio, resize to target height and crop width
656
+ print(f"Resizing video clip {clip.filename} to height {target_h} and cropping width.")
657
  clip = clip.resize(height=target_h)
658
+ # Calculate amount to crop from each side
659
  crop_amount = (clip.w - target_w) / 2
660
+ if crop_amount < 0: # Avoid negative crop
661
+ print("Warning: Negative crop amount calculated in resize_to_fill (width). Resizing only.")
662
+ return clip.resize(newsize=target_resolution)
663
+ return clip.crop(x1=crop_amount, width=target_w)
664
  else:
665
+ # Clip is taller than target aspect ratio, resize to target width and crop height
666
+ print(f"Resizing video clip {clip.filename} to width {target_w} and cropping height.")
667
  clip = clip.resize(width=target_w)
668
+ # Calculate amount to crop from top/bottom
669
  crop_amount = (clip.h - target_h) / 2
670
+ if crop_amount < 0: # Avoid negative crop
671
+ print("Warning: Negative crop amount calculated in resize_to_fill (height). Resizing only.")
672
+ return clip.resize(newsize=target_resolution)
673
+ return clip.crop(y1=crop_amount, height=target_h)
674
 
675
+
676
+ def add_background_music(final_video, bg_music_path=BACKGROUND_MUSIC_PATH, bg_music_volume=0.08):
677
+ """Adds background music to the final video."""
678
+ if not os.path.exists(bg_music_path):
679
+ print(f"Warning: Background music file not found at {bg_music_path}. Skipping.")
680
+ return final_video
681
+
682
+ try:
683
+ print("Adding background music...")
684
  bg_music = AudioFileClip(bg_music_path)
685
+
686
+ if final_video.duration is None or final_video.duration <= 0:
687
+ print("Warning: Final video has no duration. Cannot add background music.")
688
+ return final_video
689
+ if bg_music.duration is None or bg_music.duration <= 0:
690
+ print("Warning: Background music has no duration. Skipping.")
691
+ return final_video
692
+
693
+
694
+ # Loop or trim background music to match video duration
695
  if bg_music.duration < final_video.duration:
696
  loops_needed = math.ceil(final_video.duration / bg_music.duration)
697
+ print(f"Looping background music {loops_needed} times.")
698
+ bg_music = concatenate_audioclips([bg_music] * loops_needed)
699
+
700
+ # Trim to exact duration
701
  bg_music = bg_music.subclip(0, final_video.duration)
702
+
703
+ # Adjust volume
704
  bg_music = bg_music.volumex(bg_music_volume)
705
+
706
+ # Combine with existing audio (if any)
707
  video_audio = final_video.audio
708
+ if video_audio:
709
+ # Normalize main audio slightly? Optional.
710
+ # video_audio = video_audio.volumex(1.0) # Keep original volume
711
+ print("Mixing existing audio with background music.")
712
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
713
+ else:
714
+ print("No existing audio found. Using only background music.")
715
+ mixed_audio = bg_music
716
+
717
+ # Set the new audio track
718
  final_video = final_video.set_audio(mixed_audio)
719
+ print("Background music added successfully.")
720
+ return final_video
721
+
722
+ except Exception as e:
723
+ print(f"Error adding background music: {e}")
724
+ # Return the original video without crashing
725
+ return final_video
726
+
727
 
728
  def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
729
+ """Creates a single video clip segment with media, audio, and optional captions."""
730
+ print(f"\n--- Creating Clip Segment {segment_index} ---")
731
+ print(f"Media: {media_path} ({asset_type})")
732
+ print(f"TTS: {tts_path}")
733
+ print(f"Narration: '{narration_text[:50]}...'")
734
+
735
  try:
736
+ # Validate inputs
737
+ if not media_path or not os.path.exists(media_path):
738
+ print(f"Error: Media path not found or invalid: {media_path}")
739
  return None
740
+ if not tts_path or not os.path.exists(tts_path):
741
+ print(f"Error: TTS path not found or invalid: {tts_path}")
742
+ # Attempt to use media without audio? Or fail? Let's fail for now.
743
+ return None
744
+
745
+ # Load audio first to determine duration
746
+ audio_clip = AudioFileClip(tts_path)
747
+ # Add slight fade out to avoid abrupt cuts
748
+ audio_clip = audio_clip.audio_fadeout(0.2)
749
+ target_duration = audio_clip.duration
750
+ if target_duration is None or target_duration <= 0.1: # Check for valid duration
751
+ print(f"Warning: Audio clip {tts_path} has invalid duration ({target_duration}). Estimating 3 seconds.")
752
+ target_duration = 3.0 # Fallback duration
753
+ # Recreate audio clip with fixed duration if possible? Or just use the duration.
754
+ audio_clip = audio_clip.set_duration(target_duration)
755
+
756
+
757
+ print(f"Audio Duration: {target_duration:.2f}s")
758
+
759
+ # --- Create Video/Image Clip ---
760
+ clip = None
761
  if asset_type == "video":
762
+ try:
763
+ clip = VideoFileClip(media_path, target_resolution=TARGET_RESOLUTION[::-1]) # Provide target res hint
764
+ # Ensure video has audio track removed initially if we overlay TTS fully
765
+ clip = clip.without_audio()
766
+
767
+ # Resize/Crop to fill target resolution
768
+ clip = resize_to_fill(clip, TARGET_RESOLUTION)
769
+
770
+ # Loop or cut video to match audio duration
771
+ if clip.duration < target_duration:
772
+ print(f"Looping video (duration {clip.duration:.2f}s) to match audio.")
773
+ clip = clip.loop(duration=target_duration)
774
+ else:
775
+ # Start from a random point if video is longer? Or just take the start?
776
+ start_time = 0
777
+ # Optional: random start time if video is much longer
778
+ # if clip.duration > target_duration + 2:
779
+ # start_time = random.uniform(0, clip.duration - target_duration)
780
+ print(f"Subclipping video from {start_time:.2f}s to {start_time + target_duration:.2f}s.")
781
+ clip = clip.subclip(start_time, start_time + target_duration)
782
+
783
+ # Add fade in/out for smoother transitions
784
+ clip = clip.fadein(0.3).fadeout(0.3)
785
+
786
+ except Exception as video_e:
787
+ print(f"Error processing video file {media_path}: {video_e}")
788
+ # Fallback to a black screen?
789
+ clip = ColorClip(size=TARGET_RESOLUTION, color=(0,0,0), duration=target_duration)
790
+
791
  elif asset_type == "image":
792
+ try:
793
+ # Use tempfile for converted image if needed (handled in download now)
794
+ # Load image clip
795
+ clip = ImageClip(media_path).set_duration(target_duration)
796
+
797
+ # Apply Ken Burns effect
798
+ clip = apply_kenburns_effect(clip, TARGET_RESOLUTION, effect_type=effects or "random")
799
+
800
+ # Fades are good for images too
801
+ clip = clip.fadein(0.3).fadeout(0.3)
802
+
803
+ except Exception as img_e:
804
+ print(f"Error processing image file {media_path}: {img_e}")
805
+ # Fallback to a grey screen?
806
+ clip = ColorClip(size=TARGET_RESOLUTION, color=(50,50,50), duration=target_duration)
807
  else:
808
+ print(f"Error: Unknown asset type '{asset_type}'")
809
+ return None # Unknown type
810
+
811
+ # Ensure clip has the correct duration after processing
812
+ clip = clip.set_duration(target_duration)
813
+
814
+ # --- Add Captions ---
815
  subtitle_clips = []
816
  if narration_text and CAPTION_COLOR != "transparent":
817
+ print("Adding captions...")
818
+ try:
819
+ # Simple word splitting for timing (can be improved with proper SRT/timing info)
820
+ words = narration_text.split()
821
+ words_per_chunk = 5 # Adjust number of words per caption line
822
+ chunks = [' '.join(words[i:i+words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
823
+ if not chunks: chunks = [narration_text] # Handle empty or short text
824
+
825
+ chunk_duration = target_duration / len(chunks) if len(chunks) > 0 else target_duration
826
+
827
+ # Calculate font size based on resolution (heuristic)
828
+ font_size = int(TARGET_RESOLUTION[1] / 25) # Adjust divisor as needed
829
+
830
+ # Position captions towards the bottom
831
+ subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.85) # Lower position
832
+
833
+ for i, chunk_text in enumerate(chunks):
834
+ start_time = i * chunk_duration
835
+ # Ensure end time doesn't exceed clip duration
836
+ end_time = min((i + 1) * chunk_duration, target_duration)
837
+ # Avoid zero-duration captions
838
+ if end_time <= start_time: end_time = start_time + 0.1
839
+
840
+ # Create TextClip for the chunk
841
+ # Ensure font is available in the environment (Arial is common, but might need install)
842
+ # Added stroke for better visibility
843
+ txt_clip = TextClip(
844
+ chunk_text,
845
+ fontsize=font_size,
846
+ font='Arial-Bold', # Ensure this font is available or choose another like 'Liberation-Sans-Bold'
847
+ color=CAPTION_COLOR,
848
+ bg_color='rgba(0, 0, 0, 0.5)', # Slightly darker background
849
+ method='caption', # Wraps text
850
+ align='center',
851
+ stroke_color='black', # Black stroke
852
+ stroke_width=max(1, font_size // 20), # Stroke width relative to font size
853
+ size=(TARGET_RESOLUTION[0] * 0.85, None) # Limit width
854
+ ).set_start(start_time).set_duration(end_time - start_time).set_position(('center', subtitle_y_position))
855
+
856
+ subtitle_clips.append(txt_clip)
857
+
858
+ # Composite the main clip with subtitles
859
+ if subtitle_clips:
860
+ clip = CompositeVideoClip([clip] + subtitle_clips, size=TARGET_RESOLUTION)
861
+ print(f"Added {len(subtitle_clips)} caption segments.")
862
+
863
+ except Exception as caption_e:
864
+ # This often happens if ImageMagick or fonts are missing/misconfigured
865
+ print(f"ERROR: Failed to create captions: {caption_e}")
866
+ print("Check if ImageMagick is installed and configured, and if the font (e.g., Arial-Bold) is available.")
867
+ # Continue without captions if they fail
868
+
869
+ # Set the audio track
870
  clip = clip.set_audio(audio_clip)
871
+
872
+ print(f"Clip Segment {segment_index} created successfully.")
873
  return clip
874
+
875
+ except Exception as e:
876
+ print(f"FATAL ERROR creating clip segment {segment_index}: {e}")
877
+ import traceback
878
+ traceback.print_exc() # Print detailed traceback for debugging
879
+ # Return a short, silent black clip to avoid crashing the concatenation
880
+ return ColorClip(size=TARGET_RESOLUTION, color=(0,0,0), duration=1.0).set_audio(None)
881
+
882
 
883
  # Main Gradio Function
884
+ def generate_video(video_concept, resolution_choice, caption_option):
885
+ """The main function called by Gradio to generate the video."""
886
+ print("\n\n--- Starting Video Generation ---")
887
+ print(f"Concept: {video_concept}")
888
+ print(f"Resolution: {resolution_choice}")
889
+ print(f"Captions: {caption_option}")
890
+
891
  global TARGET_RESOLUTION, CAPTION_COLOR
892
+ # Set global config based on input
893
+ if resolution_choice == "Short (9:16)":
894
+ TARGET_RESOLUTION = (1080, 1920)
895
+ else: # Default to Full HD
896
+ TARGET_RESOLUTION = (1920, 1080)
897
+ CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent" # Use "transparent" to disable
898
+
899
+ # --- Cleanup and Setup ---
900
  if os.path.exists(TEMP_FOLDER):
901
+ print(f"Removing existing temp folder: {TEMP_FOLDER}")
902
  shutil.rmtree(TEMP_FOLDER)
903
+ try:
904
+ os.makedirs(TEMP_FOLDER)
905
+ print(f"Created temp folder: {TEMP_FOLDER}")
906
+ except OSError as e:
907
+ print(f"Error creating temp folder {TEMP_FOLDER}: {e}")
908
+ return f"Error: Could not create temporary directory. Check permissions. {e}" # Return error message to Gradio
909
+
910
+ # --- Script Generation ---
911
+ print("Generating script...")
912
  script = generate_script(video_concept)
913
  if not script:
914
+ print("Error: Failed to generate script.")
915
+ shutil.rmtree(TEMP_FOLDER) # Clean up
916
+ return "Error: Failed to generate script from AI. Please try a different concept or check API keys." # Return error message
917
+
918
+ # --- Script Parsing ---
919
+ print("Parsing script...")
920
  elements = parse_script(script)
921
  if not elements:
922
+ print("Error: Failed to parse script into elements.")
923
+ shutil.rmtree(TEMP_FOLDER) # Clean up
924
+ return "Error: Failed to parse the generated script. The script might be malformed." # Return error message
925
+
926
+ # Pair media prompts with TTS elements
927
+ paired_elements = []
928
+ if len(elements) >= 2:
929
+ for i in range(0, len(elements), 2):
930
+ if i + 1 < len(elements) and elements[i]['type'] == 'media' and elements[i+1]['type'] == 'tts':
931
+ paired_elements.append((elements[i], elements[i+1]))
932
+ else:
933
+ print(f"Warning: Skipping mismatched elements at index {i}")
934
+
935
  if not paired_elements:
936
+ print("Error: No valid media/TTS pairs found after parsing.")
937
+ shutil.rmtree(TEMP_FOLDER) # Clean up
938
+ return "Error: Could not find valid [Title]/Narration pairs in the script." # Return error message
939
+
940
+ print(f"Found {len(paired_elements)} pairs of media prompts and narrations.")
941
+
942
+ # --- Clip Generation Loop ---
943
  clips = []
944
+ total_segments = len(paired_elements)
945
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
946
+ print(f"\nProcessing Segment {idx+1}/{total_segments}: Prompt='{media_elem['prompt']}'")
947
+
948
+ # 1. Generate Media (Video/Image)
949
+ media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=total_segments)
950
+ if not media_asset or not media_asset.get('path'):
951
+ print(f"Warning: Failed to generate media for '{media_elem['prompt']}'. Skipping segment.")
952
+ # Option: Create a placeholder clip instead of skipping?
953
+ # clips.append(ColorClip(size=TARGET_RESOLUTION, color=(20,0,0), duration=3.0)) # Short red flash?
954
+ continue # Skip this segment
955
+
956
+ # 2. Generate TTS
957
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
958
  if not tts_path:
959
+ print(f"Warning: Failed to generate TTS for segment {idx}. Skipping segment.")
960
+ # Option: Create clip without audio? Requires adjusting create_clip
961
+ continue # Skip this segment
962
+
963
+ # 3. Create MoviePy Clip Segment
964
  clip = create_clip(
965
  media_path=media_asset['path'],
966
  asset_type=media_asset['asset_type'],
967
  tts_path=tts_path,
968
+ duration=tts_elem['duration'], # Duration hint (create_clip prioritizes actual audio length)
969
+ effects=media_elem.get('effects', 'random'),
970
  narration_text=tts_elem['text'],
971
  segment_index=idx
972
  )
973
+
974
  if clip:
975
  clips.append(clip)
976
+ else:
977
+ print(f"Warning: Failed to create clip for segment {idx}. Skipping.")
978
+ # Maybe add a fallback black clip here too?
979
+
980
+ # --- Final Video Assembly ---
981
  if not clips:
982
+ print("Error: No clips were successfully created.")
983
+ shutil.rmtree(TEMP_FOLDER) # Clean up
984
+ return "Error: Failed to create any video segments. Check logs for media/TTS/clip creation errors." # Return error message
985
+
986
+ print(f"\nConcatenating {len(clips)} video clips...")
987
+ try:
988
+ # Concatenate all the generated clips
989
+ final_video = concatenate_videoclips(clips, method="compose") # 'compose' handles transparency if needed
990
+ except Exception as concat_e:
991
+ print(f"Error during video concatenation: {concat_e}")
992
+ shutil.rmtree(TEMP_FOLDER)
993
+ return f"Error: Failed to combine video segments: {concat_e}"
994
+
995
+ # --- Add Background Music ---
996
+ final_video = add_background_music(final_video, bg_music_volume=0.08) # Adjust volume as needed
997
+
998
+ # --- Write Output File ---
999
+ print(f"Writing final video to {OUTPUT_VIDEO_FILENAME}...")
1000
+ try:
1001
+ # Write the final video file
1002
+ # Use preset 'medium' or 'slow' for better quality/compression ratio if time allows
1003
+ # Use 'libx264' for wide compatibility, 'aac' for audio codec
1004
+ # threads=4 can speed up encoding on multi-core CPUs
1005
+ final_video.write_videofile(
1006
+ OUTPUT_VIDEO_FILENAME,
1007
+ codec='libx264',
1008
+ audio_codec='aac',
1009
+ fps=24, # Standard frame rate
1010
+ preset='medium', # 'veryfast', 'fast', 'medium', 'slow', 'veryslow'
1011
+ threads=4, # Adjust based on CPU cores
1012
+ logger='bar' # Show progress bar
1013
+ )
1014
+ print("Final video written successfully.")
1015
+ except Exception as write_e:
1016
+ print(f"Error writing final video file: {write_e}")
1017
+ shutil.rmtree(TEMP_FOLDER)
1018
+ return f"Error: Failed to write the final video file: {write_e}"
1019
+ finally:
1020
+ # --- Cleanup ---
1021
+ # Close clips to release file handles (important on some OS)
1022
+ for clip in clips:
1023
+ clip.close()
1024
+ if final_video:
1025
+ final_video.close()
1026
+ if 'bg_music' in locals() and bg_music: # Close bg music if loaded
1027
+ bg_music.close()
1028
+ if 'audio_clip' in locals() and audio_clip: # Close last audio clip
1029
+ audio_clip.close()
1030
+
1031
+ print(f"Cleaning up temporary folder: {TEMP_FOLDER}")
1032
+ shutil.rmtree(TEMP_FOLDER)
1033
+
1034
+
1035
+ print("--- Video Generation Complete ---")
1036
+ # Return the path to the generated video for Gradio
1037
  return OUTPUT_VIDEO_FILENAME
1038
 
1039
+ # --- Gradio Interface Definition ---
1040
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
1041
+ gr.Markdown(
1042
+ """
1043
+ # 🎬 AI Documentary Video Generator 🎥
1044
+ Enter a concept or topic, and the AI will generate a short, humorous documentary-style video.
1045
+ Configure API keys (Pexels, OpenRouter) and ensure `background_music.mp3` exists before running.
1046
+ """
1047
+ )
1048
  with gr.Row():
1049
+ with gr.Column(scale=2):
1050
+ video_concept = gr.Textbox(
1051
+ label="Video Concept / Topic / Script",
1052
+ placeholder="e.g., 'The secret life of squirrels', 'Why cats secretly judge us', or paste a full script starting with [Title]...",
1053
+ lines=4
1054
+ )
1055
+ with gr.Row():
1056
+ resolution = gr.Dropdown(
1057
+ ["Full HD (16:9)", "Short (9:16)"],
1058
+ label="Resolution",
1059
+ value="Full HD (16:9)"
1060
+ )
1061
+ caption_option = gr.Dropdown(
1062
+ ["Yes", "No"],
1063
+ label="Add Captions",
1064
+ value="Yes"
1065
+ )
1066
+ generate_btn = gr.Button("✨ Generate Video ✨", variant="primary")
1067
+
1068
+ with gr.Column(scale=3):
1069
+ output_video = gr.Video(label="Generated Video")
1070
+ status_message = gr.Textbox(label="Status", interactive=False) # To show errors or progress
1071
+
1072
+ # Connect button click to the main function
1073
+ generate_btn.click(
1074
+ fn=generate_video,
1075
+ inputs=[video_concept, resolution, caption_option],
1076
+ outputs=[output_video] # Can also output to status_message if needed
1077
+ # Example with status: outputs=[output_video, status_message]
1078
+ )
1079
+
1080
+ # Launch the Gradio app
1081
+ if __name__ == "__main__":
1082
+ # Check for background music file on startup
1083
+ if not os.path.exists(BACKGROUND_MUSIC_PATH):
1084
+ print(f"\n*** WARNING: Background music file '{BACKGROUND_MUSIC_PATH}' not found. Background music will be skipped. ***\n")
1085
+ demo.launch(debug=True) # debug=True provides more detailed logs