testdeep123 commited on
Commit
b082bff
·
verified ·
1 Parent(s): 2bb6ef9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -926
app.py CHANGED
@@ -1,216 +1,90 @@
1
  import os
2
- import gradio as gr
3
- from kokoro import KPipeline
4
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip, TextClip, concatenate_videoclips
5
- from PIL import Image
6
- import tempfile
7
- import random
8
- import cv2
9
- import math
10
  import requests
11
  import re
12
- import time
13
- import pydub
14
- import pysrt
15
- from gtts import gTTS
16
- import numpy as np
17
- import soundfile as sf
18
-
19
- # Initialize Kokoro TTS pipeline
20
- pipeline = KPipeline(lang_code='a')
21
-
22
- # API Constants
23
- PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
24
- OPENROUTER_API_KEY = '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'
25
- OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
26
- TEMP_FOLDER = "temp_video_processing"
27
- os.makedirs(TEMP_FOLDER, exist_ok=True)
28
-
29
- # --------------- ORIGINAL FUNCTIONS (UNMODIFIED) --------------- #
30
-
31
-
32
- !pip install transformers==4.49.0
33
- !pip install moviepy gTTS requests pydub pillow
34
- !pip cache purge
35
- !apt-get install imagemagick -y
36
- !pip install kokoro>=0.3.4 soundfile
37
- !apt-get-qq -y install espeak-ng > /dev/null 2>&1
38
- !pip install pysrt
39
-
40
-
41
- from kokoro import KPipeline
42
- from IPython.display import display, Audio
43
- import soundfile as sf
44
- import torch
45
- from IPython.display import display, Audio, HTML
46
- import soundfile as sf
47
- import os
48
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
49
- from PIL import Image
50
- import tempfile
51
  import random
52
- import cv2
53
  import math
54
- import os, requests, io, time, re, random
55
  from moviepy.editor import (
56
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
57
- CompositeVideoClip, TextClip
58
  )
59
  import moviepy.video.fx.all as vfx
60
  import moviepy.config as mpy_config
61
  from pydub import AudioSegment
62
- from pydub.generators import Sine
63
- from google.colab import files
64
  from PIL import Image, ImageDraw, ImageFont
65
  import numpy as np
66
  from bs4 import BeautifulSoup
67
- import base64
68
  from urllib.parse import quote
69
  import pysrt
70
- from gtts import gTTS
71
-
72
- # Initialize Kokoro TTS pipeline (using American English, adjust lang_code as needed)
73
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
74
- # Ensure ImageMagick binary is set (to avoid "unset" errors)
75
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
76
-
77
- # ---------------- Global Configuration ---------------- #
78
-
79
- TARGET_RESOLUTION_INPUT = input("RESOLUTION:")
80
-
81
- CLIPS_AMMOUNT = int(input("Clips:"))
82
-
83
- CAPTION_OPTION = input("Caption Yes/No:")
84
-
85
-
86
- if CAPTION_OPTION == "Yes":
87
- CAPTION_COLOR = "white"
88
- else:
89
- CAPTION_COLOR = "transparent"
90
-
91
-
92
- if TARGET_RESOLUTION_INPUT == "Full":
93
- TARGET_RESOLUTION = (1920, 1080)
94
- elif TARGET_RESOLUTION_INPUT == "Short":
95
- TARGET_RESOLUTION = (1080, 1920)
96
-
97
-
98
-
99
-
100
-
101
-
102
-
103
 
 
 
104
 
 
 
105
 
 
106
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
107
  OPENROUTER_API_KEY = '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'
108
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
109
  TEMP_FOLDER = "temp_video_processing"
110
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
111
-
112
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
113
 
114
- # Create temporary folder if it doesn't exist
115
- if not os.path.exists(TEMP_FOLDER):
116
- os.makedirs(TEMP_FOLDER)
117
-
118
- # ---------------- Helper Functions ---------------- #
119
  def generate_script(user_input):
120
- """Generate documentary script with proper OpenRouter handling."""
121
  headers = {
122
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
123
  'HTTP-Referer': 'https://your-domain.com',
124
  'X-Title': 'AI Documentary Maker'
125
  }
126
-
127
  prompt = f"""Short Documentary Script GeneratorInstructions:
128
-
129
  If I say "use this," just output the script exactly as I gave it.
130
  If I only give topics, generate a script based on them.
131
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
132
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
133
  Formatting Rules:
134
-
135
-
136
  Title in Square Brackets:
137
-
138
-
139
  Each section starts with a one-word title inside [ ] (max two words if necessary).
140
  This title will be used as a search term for Pexels footage.
141
-
142
-
143
-
144
  Casual & Funny Narration:
145
-
146
-
147
  Each section has 5-10 words of narration.
148
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
149
-
150
-
151
-
152
  No Special Formatting:
153
-
154
-
155
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
156
-
157
-
158
-
159
  Generalized Search Terms:
160
-
161
-
162
  If a term is too specific, make it more general for Pexels search.
163
-
164
-
165
-
166
  Scene-Specific Writing:
167
-
168
-
169
  Each section describes only what should be shown in the video.
170
-
171
-
172
-
173
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
174
-
175
-
176
  No extra text, just the script.
177
-
178
-
179
-
180
  Example Output:
181
  [North Korea]
182
-
183
  Top 5 unknown facts about North Korea.
184
-
185
  [Invisibility]
186
-
187
  North Korea’s internet speed is so fast… it doesn’t exist.
188
-
189
  [Leadership]
190
-
191
  Kim Jong-un once won an election with 100% votes… against himself.
192
-
193
  [Magic]
194
-
195
  North Korea discovered time travel. That’s why their news is always from the past.
196
-
197
  [Warning]
198
-
199
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
200
-
201
  [Freedom]
202
-
203
  North Korean citizens can do anything… as long as it's government-approved.
204
  Now here is the Topic/scrip: {user_input}
205
  """
206
-
207
  data = {
208
  'model': OPENROUTER_MODEL,
209
  'messages': [{'role': 'user', 'content': prompt}],
210
  'temperature': 0.4,
211
  'max_tokens': 5000
212
  }
213
-
214
  try:
215
  response = requests.post(
216
  'https://openrouter.ai/api/v1/chat/completions',
@@ -218,677 +92,275 @@ Now here is the Topic/scrip: {user_input}
218
  json=data,
219
  timeout=30
220
  )
221
-
222
- # Debug: Print raw response
223
- print("API Response:", response.text)
224
-
225
  if response.status_code == 200:
226
  response_data = response.json()
227
  if 'choices' in response_data and len(response_data['choices']) > 0:
228
  return response_data['choices'][0]['message']['content']
229
- else:
230
- print("Unexpected response format:", response_data)
231
- return None
232
- else:
233
- print(f"API Error {response.status_code}: {response.text}")
234
- return None
235
-
236
- except Exception as e:
237
- print(f"Request failed: {str(e)}")
238
  return None
239
 
240
  def parse_script(script_text):
241
- """
242
- Parse the generated script into a list of elements.
243
- For each section, create two elements:
244
- - A 'media' element using the section title as the visual prompt.
245
- - A 'tts' element with the narration text, voice info, and computed duration.
246
- """
247
  sections = {}
248
  current_title = None
249
  current_text = ""
250
-
251
  try:
252
  for line in script_text.splitlines():
253
  line = line.strip()
254
  if line.startswith("[") and "]" in line:
255
- # Extract content between first [ and first ]
256
  bracket_start = line.find("[")
257
  bracket_end = line.find("]", bracket_start)
258
  if bracket_start != -1 and bracket_end != -1:
259
  if current_title is not None:
260
  sections[current_title] = current_text.strip()
261
  current_title = line[bracket_start+1:bracket_end]
262
- current_text = line[bracket_end+1:].strip() # Get any text after the bracket
263
  elif current_title:
264
  current_text += line + " "
265
-
266
- # Don't forget the last section
267
  if current_title:
268
  sections[current_title] = current_text.strip()
269
-
270
  elements = []
271
  for title, narration in sections.items():
272
- if not title or not narration: # Skip empty sections
273
  continue
274
-
275
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
276
- # Duration: at least 3 sec, or 0.5 sec per word
277
  words = narration.split()
278
  duration = max(3, len(words) * 0.5)
279
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
280
  elements.append(media_element)
281
  elements.append(tts_element)
282
-
283
  return elements
284
- except Exception as e:
285
- print(f"Error parsing script: {e}")
286
  return []
287
 
288
  def search_pexels_videos(query, pexels_api_key):
289
- """Search for a video on Pexels by query and return a random HD video."""
290
  headers = {'Authorization': pexels_api_key}
291
  base_url = "https://api.pexels.com/videos/search"
292
- num_pages = 3 # Search through first 3 pages
293
  videos_per_page = 15
294
-
295
- # Add retry mechanism
296
- max_retries = 3
297
- retry_delay = 1
298
-
299
- search_query = query
300
  all_videos = []
301
-
302
  for page in range(1, num_pages + 1):
303
- for attempt in range(max_retries):
304
- try:
305
- params = {"query": search_query, "per_page": videos_per_page, "page": page}
306
- response = requests.get(base_url, headers=headers, params=params, timeout=10)
307
-
308
- if response.status_code == 200:
309
- data = response.json()
310
- videos = data.get("videos", [])
311
-
312
- if not videos:
313
- print(f"No videos found on page {page}.")
314
- break # No videos on this page, move to the next
315
-
316
- # Collect all HD videos
317
- for video in videos:
318
- video_files = video.get("video_files", [])
319
- for file in video_files:
320
- if file.get("quality") == "hd": # Only collect HD quality
321
- all_videos.append(file.get("link"))
322
- break # Only add one file per video
323
-
324
- break # Success, exit retry loop
325
-
326
- elif response.status_code == 429: # Rate limit
327
- print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
328
- time.sleep(retry_delay)
329
- retry_delay *= 2
330
- else:
331
- print(f"Error fetching videos: {response.status_code} {response.text}")
332
- if attempt < max_retries - 1:
333
- print(f"Retrying in {retry_delay} seconds...")
334
- time.sleep(retry_delay)
335
- retry_delay *= 2
336
- else:
337
- break
338
-
339
- except requests.exceptions.RequestException as e:
340
- print(f"Request exception: {e}")
341
- if attempt < max_retries - 1:
342
- print(f"Retrying in {retry_delay} seconds...")
343
- time.sleep(retry_delay)
344
- retry_delay *= 2
345
- else:
346
- break
347
-
348
- if all_videos:
349
- # Select a random video from the collected ones
350
- random_video = random.choice(all_videos)
351
- print(f"Selected random video from {len(all_videos)} HD videos")
352
- return random_video
353
- else:
354
- print("No suitable videos found after searching all pages.")
355
- return None
356
 
357
  def search_pexels_images(query, pexels_api_key):
358
- """Search for an image on Pexels by query."""
359
  headers = {'Authorization': pexels_api_key}
360
  url = "https://api.pexels.com/v1/search"
361
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
362
-
363
- # Add retry mechanism
364
- max_retries = 3
365
- retry_delay = 1
366
-
367
- for attempt in range(max_retries):
368
- try:
369
- response = requests.get(url, headers=headers, params=params, timeout=10)
370
-
371
- if response.status_code == 200:
372
- data = response.json()
373
- photos = data.get("photos", [])
374
- if photos:
375
- # Get a random image from the first 5 results (if available)
376
- photo = random.choice(photos[:min(5, len(photos))])
377
- img_url = photo.get("src", {}).get("original")
378
- return img_url
379
- else:
380
- print(f"No images found for query: {query}")
381
- return None
382
-
383
- elif response.status_code == 429: # Rate limit
384
- print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
385
- time.sleep(retry_delay)
386
- retry_delay *= 2
387
- else:
388
- print(f"Error fetching images: {response.status_code} {response.text}")
389
- if attempt < max_retries - 1:
390
- print(f"Retrying in {retry_delay} seconds...")
391
- time.sleep(retry_delay)
392
- retry_delay *= 2
393
-
394
- except requests.exceptions.RequestException as e:
395
- print(f"Request exception: {e}")
396
- if attempt < max_retries - 1:
397
- print(f"Retrying in {retry_delay} seconds...")
398
- time.sleep(retry_delay)
399
- retry_delay *= 2
400
-
401
- print(f"No Pexels images found for query: {query} after all attempts")
402
- return None
403
 
404
  def search_google_images(query):
405
- """Search for images on Google Images (for news-related queries)"""
406
  try:
407
  search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
408
  headers = {"User-Agent": USER_AGENT}
409
  response = requests.get(search_url, headers=headers, timeout=10)
410
  soup = BeautifulSoup(response.text, "html.parser")
411
-
412
- # Look for image elements or JSON data containing image URLs
413
  img_tags = soup.find_all("img")
414
-
415
- # Filter out small images (icons, etc.)
416
- image_urls = []
417
- for img in img_tags:
418
- src = img.get("src", "")
419
- if src.startswith("http") and "gstatic" not in src:
420
- image_urls.append(src)
421
-
422
- if image_urls:
423
- return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
424
- else:
425
- print(f"No Google Images found for query: {query}")
426
- return None
427
- except Exception as e:
428
- print(f"Error in Google Images search: {e}")
429
  return None
430
 
431
  def download_image(image_url, filename):
432
- """Download an image from a URL to a local file with enhanced error handling."""
433
  try:
434
  headers = {"User-Agent": USER_AGENT}
435
- print(f"Downloading image from: {image_url} to {filename}")
436
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
437
  response.raise_for_status()
438
-
439
  with open(filename, 'wb') as f:
440
  for chunk in response.iter_content(chunk_size=8192):
441
  f.write(chunk)
442
-
443
- print(f"Image downloaded successfully to: {filename}")
444
-
445
- # Validate the image
446
- try:
447
- img = Image.open(filename)
448
- img.verify() # Verify it's an actual image
449
- # If it passes verification, reopen and convert to RGB if needed
450
- img = Image.open(filename)
451
- if img.mode != 'RGB':
452
- img = img.convert('RGB')
453
- img.save(filename)
454
- print(f"Image validated and processed: {filename}")
455
- return filename
456
- except Exception as e_validate:
457
- print(f"Downloaded file is not a valid image: {e_validate}")
458
- if os.path.exists(filename):
459
- os.remove(filename)
460
- return None
461
-
462
- except requests.exceptions.RequestException as e_download:
463
- print(f"Image download error: {e_download}")
464
- if os.path.exists(filename):
465
- os.remove(filename)
466
- return None
467
- except Exception as e_general:
468
- print(f"General error during image processing: {e_general}")
469
  if os.path.exists(filename):
470
  os.remove(filename)
471
  return None
472
 
473
  def download_video(video_url, filename):
474
- """Download a video from a URL to a local file."""
475
  try:
476
  response = requests.get(video_url, stream=True, timeout=30)
477
  response.raise_for_status()
478
  with open(filename, 'wb') as f:
479
  for chunk in response.iter_content(chunk_size=8192):
480
  f.write(chunk)
481
- print(f"Video downloaded successfully to: {filename}")
482
  return filename
483
- except Exception as e:
484
- print(f"Video download error: {e}")
485
  if os.path.exists(filename):
486
  os.remove(filename)
487
  return None
488
 
489
- def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
490
- """
491
- Generate a visual asset by first searching for a video or using a specific search strategy.
492
- For news-related queries, use Google Images.
493
- Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
494
- """
495
- # Make prompt URL-safe and a valid filename
496
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
497
-
498
- # For news-related queries, use Google Images
499
  if "news" in prompt.lower():
500
- print(f"News-related query detected: {prompt}. Using Google Images...")
501
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
502
  image_url = search_google_images(prompt)
503
- if image_url:
504
- downloaded_image = download_image(image_url, image_file)
505
- if downloaded_image:
506
- print(f"News image saved to {downloaded_image}")
507
- return {"path": downloaded_image, "asset_type": "image"}
508
- else:
509
- print(f"Google Images search failed for prompt: {prompt}")
510
-
511
- # Try video first (with reduced frequency for better media mix)
512
- if random.random() < 0.25: # 25% chance of using a video
513
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
514
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
515
- if video_url:
516
- downloaded_video = download_video(video_url, video_file)
517
- if downloaded_video:
518
- print(f"Video asset saved to {downloaded_video}")
519
- return {"path": downloaded_video, "asset_type": "video"}
520
- else:
521
- print(f"Pexels video search failed for prompt: {prompt}")
522
-
523
- # Fallback or primary choice for images
524
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
525
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
526
- if image_url:
527
- downloaded_image = download_image(image_url, image_file)
528
- if downloaded_image:
529
- print(f"Image asset saved to {downloaded_image}")
530
- return {"path": downloaded_image, "asset_type": "image"}
531
- else:
532
- print(f"Pexels image download failed for prompt: {prompt}")
533
-
534
- # Last resort: try to find a stock image for common terms
535
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
536
  for term in fallback_terms:
537
- print(f"Trying fallback image search with term: {term}")
538
  fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
539
  fallback_url = search_pexels_images(term, PEXELS_API_KEY)
540
- if fallback_url:
541
- downloaded_fallback = download_image(fallback_url, fallback_file)
542
- if downloaded_fallback:
543
- print(f"Fallback image saved to {downloaded_fallback}")
544
- return {"path": downloaded_fallback, "asset_type": "image"}
545
- else:
546
- print(f"Fallback image download failed for term: {term}")
547
- else:
548
- print(f"Fallback image search failed for term: {term}")
549
-
550
- print(f"Failed to generate visual asset for prompt: {prompt}")
551
  return None
552
 
553
- # ---------------- TTS Function Using Kokoro ---------------- #
554
-
555
- def generate_tts(text, voice):
556
- """
557
- Generate TTS audio using Kokoro and save to a WAV file.
558
- Uses the global Kokoro pipeline.
559
- Falls back to gTTS if Kokoro fails.
560
- """
561
- # Create a safe filename
562
- safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
563
- file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
564
-
565
- # If file already exists, reuse it
566
- if os.path.exists(file_path):
567
- print(f"Using cached TTS for text '{text[:10]}...'")
568
- return file_path
569
-
570
- try:
571
- # Map voice 'en' to kokoro's American English voice.
572
- kokoro_voice = 'af_heart' if voice == 'en' else voice
573
- generator = pipeline(text, voice=kokoro_voice, speed=0.8, split_pattern=r'\n+')
574
-
575
- audio_segments = []
576
- for i, (gs, ps, audio) in enumerate(generator):
577
- audio_segments.append(audio)
578
-
579
- if len(audio_segments) > 1:
580
- full_audio = np.concatenate(audio_segments)
581
- else:
582
- full_audio = audio_segments[0]
583
-
584
- sf.write(file_path, full_audio, 24000) # Save as WAV at 24000 Hz
585
- print(f"TTS audio saved to {file_path} (Kokoro)")
586
- return file_path
587
-
588
- except Exception as e:
589
- print(f"Error generating TTS with Kokoro: {e}")
590
- # Fallback to gTTS if Kokoro fails
591
- try:
592
- print("Falling back to gTTS...")
593
- tts = gTTS(text=text, lang='en')
594
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
595
- tts.save(mp3_path)
596
-
597
- # Convert mp3 to wav using pydub
598
- audio = AudioSegment.from_mp3(mp3_path)
599
- audio.export(file_path, format="wav")
600
-
601
- if os.path.exists(mp3_path):
602
- os.remove(mp3_path) # Clean up the temporary mp3
603
-
604
- print(f"Fallback TTS saved to {file_path} (gTTS)")
605
- return file_path
606
-
607
- except Exception as fallback_error:
608
- print(f"Fallback TTS with gTTS also failed: {fallback_error}")
609
- return None
610
-
611
-
612
-
613
-
614
- def generate_silent_audio(duration, sample_rate=24000):
615
- """
616
- Generate a silent WAV audio file lasting 'duration' seconds.
617
- """
618
- import numpy as np
619
- import soundfile as sf
620
- num_samples = int(duration * sample_rate)
621
- silence = np.zeros(num_samples, dtype=np.float32)
622
- silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
623
- sf.write(silent_path, silence, sample_rate)
624
- print(f"Silent audio generated: {silent_path}")
625
- return silent_path
626
-
627
  def generate_tts(text, voice):
628
- """
629
- Generate TTS audio using Kokoro, and if that fails, try gTTS.
630
- If both fail, generate silent audio as a fallback.
631
- """
632
- safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
633
- file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
634
-
635
  if os.path.exists(file_path):
636
- print(f"Using cached TTS for text '{text[:10]}...'")
637
  return file_path
638
-
639
  try:
640
  kokoro_voice = 'af_heart' if voice == 'en' else voice
641
  generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
642
- audio_segments = []
643
- for i, (gs, ps, audio) in enumerate(generator):
644
- audio_segments.append(audio)
645
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
646
  sf.write(file_path, full_audio, 24000)
647
- print(f"TTS audio saved to {file_path} (Kokoro)")
648
  return file_path
649
- except Exception as e:
650
- print(f"Error with Kokoro TTS: {e}")
651
  try:
652
- print("Falling back to gTTS...")
653
- from gtts import gTTS
654
  tts = gTTS(text=text, lang='en')
655
  mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
656
  tts.save(mp3_path)
657
  audio = AudioSegment.from_mp3(mp3_path)
658
  audio.export(file_path, format="wav")
659
  os.remove(mp3_path)
660
- print(f"Fallback TTS saved to {file_path} (gTTS)")
661
  return file_path
662
- except Exception as fallback_error:
663
- print(f"Both TTS methods failed: {fallback_error}")
664
- # Generate silent audio as fallback
665
- return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
666
-
667
 
668
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
669
- """
670
- Apply a smooth Ken Burns effect with a single, clean movement pattern.
671
- """
672
- # Unpack target resolution
673
  target_w, target_h = target_resolution
674
-
675
- # First ensure the image fills the target resolution by resizing it properly
676
- # Calculate the aspect ratio of the original clip
677
  clip_aspect = clip.w / clip.h
678
  target_aspect = target_w / target_h
679
-
680
- # Resize to fill the entire frame
681
- if clip_aspect > target_aspect: # Image is wider than the target frame
682
  new_height = target_h
683
  new_width = int(new_height * clip_aspect)
684
- else: # Image is taller than the target frame
685
  new_width = target_w
686
  new_height = int(new_width / clip_aspect)
687
-
688
- # Resize the clip to ensure it fills the target resolution
689
  clip = clip.resize(newsize=(new_width, new_height))
690
-
691
- # Now apply the base_scale for Ken Burns effect
692
  base_scale = 1.15
693
  new_width = int(new_width * base_scale)
694
  new_height = int(new_height * base_scale)
695
  clip = clip.resize(newsize=(new_width, new_height))
696
-
697
- # Rest of your function stays the same...
698
- # Calculate maximum offsets for panning
699
  max_offset_x = new_width - target_w
700
  max_offset_y = new_height - target_h
701
-
702
- # Define available effects
703
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
704
-
705
- # Randomly select an effect if not specified
706
- if effect_type is None or effect_type == "random":
707
- effect_type = random.choice(available_effects)
708
-
709
- # Set effect parameters
710
  if effect_type == "zoom-in":
711
- start_zoom = 0.9
712
- end_zoom = 1.1
713
- start_center = (new_width / 2, new_height / 2)
714
- end_center = start_center
715
  elif effect_type == "zoom-out":
716
- start_zoom = 1.1
717
- end_zoom = 0.9
718
- start_center = (new_width / 2, new_height / 2)
719
- end_center = start_center
720
  elif effect_type == "pan-left":
721
- start_zoom = 1.0
722
- end_zoom = 1.0
723
  start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
724
  end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
725
  elif effect_type == "pan-right":
726
- start_zoom = 1.0
727
- end_zoom = 1.0
728
  start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
729
  end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
730
  elif effect_type == "up-left":
731
- start_zoom = 1.0
732
- end_zoom = 1.0
733
  start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
734
  end_center = (target_w / 2, target_h / 2)
735
  else:
736
  raise ValueError(f"Unsupported effect_type: {effect_type}")
737
-
738
- # Define the transformation function for each frame
739
  def transform_frame(get_frame, t):
740
  frame = get_frame(t)
741
- # Smooth interpolation using cosine easing
742
- ratio = t / clip.duration if clip.duration > 0 else 0
743
- ratio = 0.5 - 0.5 * math.cos(math.pi * ratio) # Ease in/out
744
-
745
- # Calculate current zoom and crop size
746
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
747
- crop_w = int(target_w / current_zoom)
748
- crop_h = int(target_h / current_zoom)
749
-
750
- # Calculate current center with floating-point precision
751
  current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
752
  current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
753
-
754
- # Clamp center to keep the crop within image bounds
755
- min_center_x = crop_w / 2
756
- max_center_x = new_width - crop_w / 2
757
- min_center_y = crop_h / 2
758
- max_center_y = new_height - crop_h / 2
759
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
760
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
761
-
762
- # Crop with subpixel accuracy and resize
763
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
764
- resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
765
-
766
- return resized_frame
767
-
768
- # Apply the transformation to the clip
769
  return clip.fl(transform_frame)
770
 
771
-
772
-
773
-
774
-
775
- # Define target resolution (e.g., 1920x1080 for Full HD)
776
-
777
  def resize_to_fill(clip, target_resolution):
778
- """
779
- Resize and crop a clip to fill the target resolution while maintaining aspect ratio.
780
-
781
- Args:
782
- clip: MoviePy VideoClip or ImageClip object.
783
- target_resolution: Tuple of (width, height) to resize to.
784
-
785
- Returns:
786
- Resized and cropped clip.
787
- """
788
  target_w, target_h = target_resolution
789
  clip_aspect = clip.w / clip.h
790
  target_aspect = target_w / target_h
791
-
792
  if clip_aspect > target_aspect:
793
- # Clip is wider than target; resize to target height and crop width
794
  clip = clip.resize(height=target_h)
795
  crop_amount = (clip.w - target_w) / 2
796
  clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
797
  else:
798
- # Clip is taller than target; resize to target width and crop height
799
  clip = clip.resize(width=target_w)
800
  crop_amount = (clip.h - target_h) / 2
801
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
802
-
803
  return clip
804
 
805
- # Function to find any MP3 file in the directory tree
806
- def find_mp3_files():
807
- """
808
- Search for any MP3 files in the current directory and subdirectories.
809
- Returns the path to the first MP3 file found or None if none is found.
810
- """
811
- mp3_files = []
812
-
813
- # Walk through all directories starting from current directory
814
- for root, dirs, files in os.walk('.'):
815
- for file in files:
816
- if file.endswith('.mp3'):
817
- mp3_path = os.path.join(root, file)
818
- mp3_files.append(mp3_path)
819
- print(f"Found MP3 file: {mp3_path}")
820
-
821
- if mp3_files:
822
- # Return the first MP3 file found
823
- return mp3_files[0]
824
- return None
825
-
826
- # Update the add_background_music function to use the first found MP3
827
  def add_background_music(final_video, bg_music_volume=0.08):
828
- """Add background music to the final video using any MP3 file found in directories."""
829
- try:
830
- # Find MP3 files
831
- bg_music_path = find_mp3_files()
832
-
833
- if bg_music_path and os.path.exists(bg_music_path):
834
- print(f"Adding background music from: {bg_music_path}")
835
- # Load the background music
836
- bg_music = AudioFileClip(bg_music_path)
837
-
838
- # Loop the music if shorter than the video
839
- if bg_music.duration < final_video.duration:
840
- loops_needed = math.ceil(final_video.duration / bg_music.duration)
841
- bg_segments = [bg_music] * loops_needed
842
- bg_music = concatenate_audioclips(bg_segments)
843
-
844
- # Trim if longer than the video
845
- bg_music = bg_music.subclip(0, final_video.duration)
846
-
847
- # Set volume to 8%
848
- bg_music = bg_music.volumex(bg_music_volume)
849
-
850
- # Mix the background music with the existing audio
851
- video_audio = final_video.audio
852
- mixed_audio = CompositeAudioClip([video_audio, bg_music])
853
-
854
- # Set the mixed audio to the final video
855
- final_video = final_video.set_audio(mixed_audio)
856
- print("Background music added successfully")
857
- else:
858
- print("No MP3 files found, skipping background music")
859
-
860
- return final_video
861
-
862
- except Exception as e:
863
- print(f"Error adding background music: {e}")
864
- print("Continuing without background music")
865
- return final_video
866
-
867
- # Update the subtitle positioning in the create_clip function
868
- # Find the section in create_clip that handles subtitles, and modify the positioning:
869
 
870
  def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
871
- """Create a video clip with synchronized subtitles and properly timed narration."""
872
  try:
873
- print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
874
-
875
  if not os.path.exists(media_path) or not os.path.exists(tts_path):
876
- print("Missing media or TTS file")
877
  return None
878
-
879
- # Load and process audio
880
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
881
- audio_duration = audio_clip.duration
882
- target_duration = audio_duration + 0.2
883
-
884
- # Process visual asset
885
  if asset_type == "video":
886
  clip = VideoFileClip(media_path)
887
  clip = resize_to_fill(clip, TARGET_RESOLUTION)
888
- if clip.duration < target_duration:
889
- clip = clip.loop(duration=target_duration)
890
- else:
891
- clip = clip.subclip(0, target_duration)
892
  elif asset_type == "image":
893
  img = Image.open(media_path)
894
  if img.mode != 'RGB':
@@ -896,190 +368,64 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
896
  img.convert('RGB').save(temp.name)
897
  media_path = temp.name
898
  img.close()
899
-
900
  clip = ImageClip(media_path).set_duration(target_duration)
901
  clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
902
  clip = clip.fadein(0.3).fadeout(0.3)
903
  else:
904
  return None
905
-
906
- # Add subtitles with shorter chunks (4-5 words per line) and position at 2/3 of screen height
907
  if narration_text and CAPTION_COLOR != "transparent":
908
- try:
909
- # Create SRT-style subtitles
910
- words = narration_text.split()
911
- chunks = []
912
- current_chunk = []
913
-
914
- # Create chunks of 4-5 words for better readability
915
- for word in words:
916
- current_chunk.append(word)
917
- if len(current_chunk) >= 5: # Maximum 5 words per chunk
918
- chunks.append(' '.join(current_chunk))
919
- current_chunk = []
920
-
921
- # Add the last chunk if it exists
922
- if current_chunk:
923
- chunks.append(' '.join(current_chunk))
924
-
925
- # Calculate timing for each chunk based on audio duration
926
- chunk_duration = audio_duration / len(chunks)
927
- subtitle_clips = []
928
-
929
- # Position subtitles at 1/3 of the screen height instead of bottom
930
- subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
931
-
932
- for i, chunk_text in enumerate(chunks):
933
- start_time = i * chunk_duration
934
- end_time = (i + 1) * chunk_duration
935
-
936
- # Create text clip for this chunk
937
- txt_clip = TextClip(
938
- chunk_text,
939
- fontsize=45,
940
- font='Arial-Bold',
941
- color=CAPTION_COLOR,
942
- bg_color='rgba(0, 0, 0, 0.25)',
943
- method='caption',
944
- align='center',
945
- stroke_width=2, # Light shadow
946
- stroke_color=CAPTION_COLOR, # Must be set if stroke is used
947
- size=(TARGET_RESOLUTION[0] * 0.8, None) # Width for better readability
948
- ).set_start(start_time).set_end(end_time)
949
-
950
-
951
- txt_clip = txt_clip.set_position(('center', subtitle_y_position))
952
- subtitle_clips.append(txt_clip)
953
-
954
- # Combine all subtitle chunks with the main clip
955
- clip = CompositeVideoClip([clip] + subtitle_clips)
956
-
957
- except Exception as sub_error:
958
- print(f"Subtitle error: {sub_error}")
959
- # Fallback to a simpler method if the chunk approach fails
960
  txt_clip = TextClip(
961
- narration_text,
962
- fontsize=28,
 
963
  color=CAPTION_COLOR,
 
 
964
  align='center',
965
- size=(TARGET_RESOLUTION[0] * 0.7, None)
966
- ).set_position(('center', int(TARGET_RESOLUTION[2] / 3))).set_duration(clip.duration)
967
- clip = CompositeVideoClip([clip, txt_clip])
968
-
 
 
969
  clip = clip.set_audio(audio_clip)
970
- print(f"Clip created: {clip.duration:.1f}s")
971
  return clip
972
-
973
- except Exception as e:
974
- print(f"Error in create_clip: {str(e)}")
975
  return None
976
 
977
-
978
-
979
- def fix_imagemagick_policy():
980
- """Comprehensive fix for ImageMagick security policies"""
981
- try:
982
- print("Attempting to fix ImageMagick security policies...")
983
-
984
- # Find all possible policy.xml locations
985
- policy_paths = [
986
- "/etc/ImageMagick-6/policy.xml",
987
- "/etc/ImageMagick-7/policy.xml",
988
- "/etc/ImageMagick/policy.xml",
989
- "/usr/local/etc/ImageMagick-7/policy.xml"
990
- ]
991
-
992
- found_policy = None
993
- for path in policy_paths:
994
- if os.path.exists(path):
995
- found_policy = path
996
- break
997
-
998
- if not found_policy:
999
- print("No policy.xml found. Using alternative subtitle method.")
1000
- return False
1001
-
1002
- print(f"Modifying policy file at {found_policy}")
1003
-
1004
- # Create backup
1005
- backup_path = f"{found_policy}.bak"
1006
- os.system(f"sudo cp {found_policy} {backup_path}")
1007
-
1008
- # Apply security policy modifications
1009
- os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
1010
- os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
1011
- os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
1012
-
1013
- print("ImageMagick policies updated successfully.")
1014
- return True
1015
-
1016
- except Exception as e:
1017
- print(f"Error fixing policies: {e}")
1018
- return False
1019
-
1020
-
1021
-
1022
-
1023
-
1024
-
1025
- # ---------------- Main Function ---------------- #
1026
-
1027
- import os
1028
- import shutil
1029
- import webbrowser
1030
-
1031
- def main_fixed():
1032
- # Fix ImageMagick policy first
1033
- fix_success = fix_imagemagick_policy()
1034
- if not fix_success:
1035
- print("Will use alternative methods if needed")
1036
-
1037
- # Create temp folder if not exists
1038
- if not os.path.exists(TEMP_FOLDER):
1039
- os.makedirs(TEMP_FOLDER)
1040
-
1041
- user_input = input("Enter your video concept: ")
1042
-
1043
- print("Generating script from Gemini API...")
1044
- script = generate_script(user_input)
1045
  if not script:
1046
- print("Failed to generate script.")
1047
- return
1048
- print("Generated Script:\n", script)
1049
  elements = parse_script(script)
1050
  if not elements:
1051
- print("Failed to parse script into elements.")
1052
- return
1053
- print(f"Parsed {len(elements)//2} script segments.")
1054
-
1055
- # Pair media elements with their corresponding TTS elements
1056
- paired_elements = []
1057
- for i in range(0, len(elements), 2):
1058
- if i+1 < len(elements):
1059
- paired_elements.append((elements[i], elements[i+1]))
1060
-
1061
  if not paired_elements:
1062
- print("No valid script segments found. Exiting.")
1063
- return
1064
-
1065
- # Process each paired segment to create video clips
1066
  clips = []
1067
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
1068
- print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
1069
-
1070
- # Generate the visual asset (video or image) based on the prompt
1071
  media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
1072
  if not media_asset:
1073
- print(f"Skipping segment {idx+1} due to missing media asset.")
1074
  continue
1075
-
1076
- # Generate the TTS audio for the narration
1077
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
1078
  if not tts_path:
1079
- print(f"Skipping segment {idx+1} due to TTS generation failure.")
1080
  continue
1081
-
1082
- # Create the video clip using the media asset and the TTS audio
1083
  clip = create_clip(
1084
  media_path=media_asset['path'],
1085
  asset_type=media_asset['asset_type'],
@@ -1091,142 +437,23 @@ def main_fixed():
1091
  )
1092
  if clip:
1093
  clips.append(clip)
1094
- else:
1095
- print(f"Clip creation failed for segment {idx+1}.")
1096
-
1097
  if not clips:
1098
- print("No clips were successfully created. Exiting.")
1099
- return
1100
-
1101
- # Concatenate all the clips into one final video
1102
- print("\nConcatenating clips...")
1103
  final_video = concatenate_videoclips(clips, method="compose")
1104
-
1105
- # Add background music before exporting
1106
  final_video = add_background_music(final_video, bg_music_volume=0.08)
1107
-
1108
- # Write the final video to a file with the veryfast preset
1109
- print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME} with veryfast rendering preset...")
1110
  final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
1111
- print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
1112
-
1113
- # Auto-open the video file (acts as an auto-download/view feature)
1114
- try:
1115
- webbrowser.open(OUTPUT_VIDEO_FILENAME)
1116
- print("Video is being opened for download/viewing.")
1117
- except Exception as e:
1118
- print("Failed to auto-download/open the video:", e)
1119
-
1120
- # Clean up temporary files so they don't interfere with the next run
1121
- print("Cleaning up temporary files...")
1122
  shutil.rmtree(TEMP_FOLDER)
1123
- print("Temporary files removed.")
1124
 
1125
- if __name__ == "__main__":
1126
- main_fixed()
1127
- # --------------- GRADIO INTERFACE --------------- #
1128
-
1129
- def run_pipeline(user_input, resolution, clip_amount, caption_enabled):
1130
- global TARGET_RESOLUTION, CAPTION_COLOR
1131
-
1132
- # Set parameters based on user input
1133
- TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
1134
- CAPTION_COLOR = "white" if caption_enabled else "transparent"
1135
- CLIPS_AMMOUNT = int(clip_amount)
1136
-
1137
- try:
1138
- # Generate script
1139
- script = generate_script(user_input)
1140
- if not script:
1141
- raise gr.Error("Failed to generate script")
1142
-
1143
- # Parse elements
1144
- elements = parse_script(script)
1145
- if not elements:
1146
- raise gr.Error("Failed to parse script")
1147
-
1148
- # Process elements
1149
- paired_elements = []
1150
- for i in range(0, len(elements), 2):
1151
- if i+1 < len(elements):
1152
- paired_elements.append((elements[i], elements[i+1]))
1153
-
1154
- # Limit clips
1155
- paired_elements = paired_elements[:CLIPS_AMMOUNT]
1156
-
1157
- # Create clips
1158
- clips = []
1159
- for idx, (media_elem, tts_elem) in enumerate(paired_elements):
1160
- media_asset = generate_media(media_elem['prompt'])
1161
- if not media_asset:
1162
- continue
1163
-
1164
- tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
1165
- if not tts_path:
1166
- continue
1167
-
1168
- clip = create_clip(
1169
- media_path=media_asset['path'],
1170
- asset_type=media_asset['asset_type'],
1171
- tts_path=tts_path,
1172
- narration_text=tts_elem['text'],
1173
- segment_index=idx
1174
- )
1175
- if clip:
1176
- clips.append(clip)
1177
-
1178
- # Create final video
1179
- if not clips:
1180
- raise gr.Error("Failed to create any video clips")
1181
-
1182
- final_video = concatenate_videoclips(clips, method="compose")
1183
- final_video = add_background_music(final_video)
1184
-
1185
- # Save output
1186
- output_path = "final_video.mp4"
1187
- final_video.write_videofile(output_path, codec='libx264', fps=24, preset='veryfast', threads=4)
1188
-
1189
- # Cleanup
1190
- for clip in clips:
1191
- clip.close()
1192
- final_video.close()
1193
-
1194
- return output_path
1195
-
1196
- except Exception as e:
1197
- raise gr.Error(f"Error: {str(e)}")
1198
- finally:
1199
- # Clean temporary files
1200
- for f in os.listdir(TEMP_FOLDER):
1201
- os.remove(os.path.join(TEMP_FOLDER, f))
1202
-
1203
- # --------------- GRADIO UI --------------- #
1204
-
1205
- with gr.Blocks(title="AI Documentary Maker", theme=gr.themes.Default()) as demo:
1206
- gr.Markdown("""
1207
- # 🎥 AI Documentary Maker
1208
- Create viral documentary-style videos with AI!
1209
- """)
1210
-
1211
  with gr.Row():
1212
- with gr.Column(scale=1):
1213
- user_input = gr.Textbox(label="Documentary Topic",
1214
- placeholder="Enter your topic or script...")
1215
- resolution = gr.Dropdown(["Full (1920x1080)", "Short (1080x1920)"],
1216
- label="Video Format", value="Short (1080x1920)")
1217
- clip_amount = gr.Slider(1, 10, value=5, step=1,
1218
- label="Number of Clips")
1219
- caption_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
1220
- generate_btn = gr.Button("Generate Video", variant="primary")
1221
-
1222
- with gr.Column(scale=2):
1223
- output_video = gr.Video(label="Generated Video", format="mp4")
1224
-
1225
- generate_btn.click(
1226
- fn=run_pipeline,
1227
- inputs=[user_input, resolution, clip_amount, caption_enabled],
1228
- outputs=output_video
1229
- )
1230
-
1231
- if __name__ == "__main__":
1232
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
+ import shutil
 
 
 
 
 
 
 
3
  import requests
4
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import random
6
+ import time
7
  import math
 
8
  from moviepy.editor import (
9
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
10
+ CompositeVideoClip, TextClip, CompositeAudioClip
11
  )
12
  import moviepy.video.fx.all as vfx
13
  import moviepy.config as mpy_config
14
  from pydub import AudioSegment
15
+ from gtts import gTTS
 
16
  from PIL import Image, ImageDraw, ImageFont
17
  import numpy as np
18
  from bs4 import BeautifulSoup
 
19
  from urllib.parse import quote
20
  import pysrt
21
+ import soundfile as sf
22
+ from kokoro import KPipeline
23
+ import cv2
24
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Initialize Kokoro TTS pipeline
27
+ pipeline = KPipeline(lang_code='a') # 'a' is from original code; adjust if needed
28
 
29
+ # Set ImageMagick binary
30
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
31
 
32
+ # Global Configuration
33
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
34
  OPENROUTER_API_KEY = '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'
35
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
36
  TEMP_FOLDER = "temp_video_processing"
37
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
 
38
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
39
 
40
+ # Helper Functions
 
 
 
 
41
  def generate_script(user_input):
 
42
  headers = {
43
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
44
  'HTTP-Referer': 'https://your-domain.com',
45
  'X-Title': 'AI Documentary Maker'
46
  }
 
47
  prompt = f"""Short Documentary Script GeneratorInstructions:
 
48
  If I say "use this," just output the script exactly as I gave it.
49
  If I only give topics, generate a script based on them.
50
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
51
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
52
  Formatting Rules:
 
 
53
  Title in Square Brackets:
 
 
54
  Each section starts with a one-word title inside [ ] (max two words if necessary).
55
  This title will be used as a search term for Pexels footage.
 
 
 
56
  Casual & Funny Narration:
 
 
57
  Each section has 5-10 words of narration.
58
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 
 
 
59
  No Special Formatting:
 
 
60
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 
 
 
61
  Generalized Search Terms:
 
 
62
  If a term is too specific, make it more general for Pexels search.
 
 
 
63
  Scene-Specific Writing:
 
 
64
  Each section describes only what should be shown in the video.
 
 
 
65
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 
 
66
  No extra text, just the script.
 
 
 
67
  Example Output:
68
  [North Korea]
 
69
  Top 5 unknown facts about North Korea.
 
70
  [Invisibility]
 
71
  North Korea’s internet speed is so fast… it doesn’t exist.
 
72
  [Leadership]
 
73
  Kim Jong-un once won an election with 100% votes… against himself.
 
74
  [Magic]
 
75
  North Korea discovered time travel. That’s why their news is always from the past.
 
76
  [Warning]
 
77
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 
78
  [Freedom]
 
79
  North Korean citizens can do anything… as long as it's government-approved.
80
  Now here is the Topic/scrip: {user_input}
81
  """
 
82
  data = {
83
  'model': OPENROUTER_MODEL,
84
  'messages': [{'role': 'user', 'content': prompt}],
85
  'temperature': 0.4,
86
  'max_tokens': 5000
87
  }
 
88
  try:
89
  response = requests.post(
90
  'https://openrouter.ai/api/v1/chat/completions',
 
92
  json=data,
93
  timeout=30
94
  )
 
 
 
 
95
  if response.status_code == 200:
96
  response_data = response.json()
97
  if 'choices' in response_data and len(response_data['choices']) > 0:
98
  return response_data['choices'][0]['message']['content']
99
+ return None
100
+ except Exception:
 
 
 
 
 
 
 
101
  return None
102
 
103
  def parse_script(script_text):
 
 
 
 
 
 
104
  sections = {}
105
  current_title = None
106
  current_text = ""
 
107
  try:
108
  for line in script_text.splitlines():
109
  line = line.strip()
110
  if line.startswith("[") and "]" in line:
 
111
  bracket_start = line.find("[")
112
  bracket_end = line.find("]", bracket_start)
113
  if bracket_start != -1 and bracket_end != -1:
114
  if current_title is not None:
115
  sections[current_title] = current_text.strip()
116
  current_title = line[bracket_start+1:bracket_end]
117
+ current_text = line[bracket_end+1:].strip()
118
  elif current_title:
119
  current_text += line + " "
 
 
120
  if current_title:
121
  sections[current_title] = current_text.strip()
 
122
  elements = []
123
  for title, narration in sections.items():
124
+ if not title or not narration:
125
  continue
 
126
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
 
127
  words = narration.split()
128
  duration = max(3, len(words) * 0.5)
129
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
130
  elements.append(media_element)
131
  elements.append(tts_element)
 
132
  return elements
133
+ except Exception:
 
134
  return []
135
 
136
  def search_pexels_videos(query, pexels_api_key):
 
137
  headers = {'Authorization': pexels_api_key}
138
  base_url = "https://api.pexels.com/videos/search"
139
+ num_pages = 3
140
  videos_per_page = 15
 
 
 
 
 
 
141
  all_videos = []
 
142
  for page in range(1, num_pages + 1):
143
+ try:
144
+ params = {"query": query, "per_page": videos_per_page, "page": page}
145
+ response = requests.get(base_url, headers=headers, params=params, timeout=10)
146
+ if response.status_code == 200:
147
+ data = response.json()
148
+ videos = data.get("videos", [])
149
+ for video in videos:
150
+ video_files = video.get("video_files", [])
151
+ for file in video_files:
152
+ if file.get("quality") == "hd":
153
+ all_videos.append(file.get("link"))
154
+ break
155
+ except Exception:
156
+ continue
157
+ return random.choice(all_videos) if all_videos else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  def search_pexels_images(query, pexels_api_key):
 
160
  headers = {'Authorization': pexels_api_key}
161
  url = "https://api.pexels.com/v1/search"
162
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
163
+ try:
164
+ response = requests.get(url, headers=headers, params=params, timeout=10)
165
+ if response.status_code == 200:
166
+ data = response.json()
167
+ photos = data.get("photos", [])
168
+ if photos:
169
+ photo = random.choice(photos[:min(5, len(photos))])
170
+ return photo.get("src", {}).get("original")
171
+ return None
172
+ except Exception:
173
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  def search_google_images(query):
 
176
  try:
177
  search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
178
  headers = {"User-Agent": USER_AGENT}
179
  response = requests.get(search_url, headers=headers, timeout=10)
180
  soup = BeautifulSoup(response.text, "html.parser")
 
 
181
  img_tags = soup.find_all("img")
182
+ image_urls = [img.get("src", "") for img in img_tags if img.get("src", "").startswith("http") and "gstatic" not in img.get("src", "")]
183
+ return random.choice(image_urls[:5]) if image_urls else None
184
+ except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
185
  return None
186
 
187
  def download_image(image_url, filename):
 
188
  try:
189
  headers = {"User-Agent": USER_AGENT}
 
190
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
191
  response.raise_for_status()
 
192
  with open(filename, 'wb') as f:
193
  for chunk in response.iter_content(chunk_size=8192):
194
  f.write(chunk)
195
+ img = Image.open(filename)
196
+ img.verify()
197
+ img = Image.open(filename)
198
+ if img.mode != 'RGB':
199
+ img = img.convert('RGB')
200
+ img.save(filename)
201
+ return filename
202
+ except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  if os.path.exists(filename):
204
  os.remove(filename)
205
  return None
206
 
207
  def download_video(video_url, filename):
 
208
  try:
209
  response = requests.get(video_url, stream=True, timeout=30)
210
  response.raise_for_status()
211
  with open(filename, 'wb') as f:
212
  for chunk in response.iter_content(chunk_size=8192):
213
  f.write(chunk)
 
214
  return filename
215
+ except Exception:
 
216
  if os.path.exists(filename):
217
  os.remove(filename)
218
  return None
219
 
220
+ def generate_media(prompt, current_index=0, total_segments=1):
 
 
 
 
 
 
221
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
 
 
222
  if "news" in prompt.lower():
 
223
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
224
  image_url = search_google_images(prompt)
225
+ if image_url and download_image(image_url, image_file):
226
+ return {"path": image_file, "asset_type": "image"}
227
+ if random.random() < 0.25:
 
 
 
 
 
 
 
228
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
229
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
230
+ if video_url and download_video(video_url, video_file):
231
+ return {"path": video_file, "asset_type": "video"}
 
 
 
 
 
 
 
232
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
233
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
234
+ if image_url and download_image(image_url, image_file):
235
+ return {"path": image_file, "asset_type": "image"}
 
 
 
 
 
 
 
236
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
237
  for term in fallback_terms:
 
238
  fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
239
  fallback_url = search_pexels_images(term, PEXELS_API_KEY)
240
+ if fallback_url and download_image(fallback_url, fallback_file):
241
+ return {"path": fallback_file, "asset_type": "image"}
 
 
 
 
 
 
 
 
 
242
  return None
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  def generate_tts(text, voice):
245
+ safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '')
246
+ file_path = os.path.join(TEMP_FOLDER, f"tts{safe_text}.wav")
 
 
 
 
 
247
  if os.path.exists(file_path):
 
248
  return file_path
 
249
  try:
250
  kokoro_voice = 'af_heart' if voice == 'en' else voice
251
  generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
252
+ audio_segments = [audio for _, _, audio in generator]
 
 
253
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
254
  sf.write(file_path, full_audio, 24000)
 
255
  return file_path
256
+ except Exception:
 
257
  try:
 
 
258
  tts = gTTS(text=text, lang='en')
259
  mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
260
  tts.save(mp3_path)
261
  audio = AudioSegment.from_mp3(mp3_path)
262
  audio.export(file_path, format="wav")
263
  os.remove(mp3_path)
 
264
  return file_path
265
+ except Exception:
266
+ num_samples = int(max(3, len(text.split()) * 0.5) * 24000)
267
+ silence = np.zeros(num_samples, dtype=np.float32)
268
+ sf.write(file_path, silence, 24000)
269
+ return file_path
270
 
271
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
 
 
 
 
272
  target_w, target_h = target_resolution
 
 
 
273
  clip_aspect = clip.w / clip.h
274
  target_aspect = target_w / target_h
275
+ if clip_aspect > target_aspect:
 
 
276
  new_height = target_h
277
  new_width = int(new_height * clip_aspect)
278
+ else:
279
  new_width = target_w
280
  new_height = int(new_width / clip_aspect)
 
 
281
  clip = clip.resize(newsize=(new_width, new_height))
 
 
282
  base_scale = 1.15
283
  new_width = int(new_width * base_scale)
284
  new_height = int(new_height * base_scale)
285
  clip = clip.resize(newsize=(new_width, new_height))
 
 
 
286
  max_offset_x = new_width - target_w
287
  max_offset_y = new_height - target_h
 
 
288
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
289
+ effect_type = random.choice(available_effects) if not effect_type or effect_type == "random" else effect_type
 
 
 
 
 
290
  if effect_type == "zoom-in":
291
+ start_zoom, end_zoom = 0.9, 1.1
292
+ start_center = end_center = (new_width / 2, new_height / 2)
 
 
293
  elif effect_type == "zoom-out":
294
+ start_zoom, end_zoom = 1.1, 0.9
295
+ start_center = end_center = (new_width / 2, new_height / 2)
 
 
296
  elif effect_type == "pan-left":
297
+ start_zoom = end_zoom = 1.0
 
298
  start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
299
  end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
300
  elif effect_type == "pan-right":
301
+ start_zoom = end_zoom = 1.0
 
302
  start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
303
  end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
304
  elif effect_type == "up-left":
305
+ start_zoom = end_zoom = 1.0
 
306
  start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
307
  end_center = (target_w / 2, target_h / 2)
308
  else:
309
  raise ValueError(f"Unsupported effect_type: {effect_type}")
 
 
310
  def transform_frame(get_frame, t):
311
  frame = get_frame(t)
312
+ ratio = 0.5 - 0.5 * math.cos(math.pi * t / clip.duration) if clip.duration > 0 else 0
 
 
 
 
313
  current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
314
+ crop_w, crop_h = int(target_w / current_zoom), int(target_h / current_zoom)
 
 
 
315
  current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
316
  current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
317
+ min_center_x, max_center_x = crop_w / 2, new_width - crop_w / 2
318
+ min_center_y, max_center_y = crop_h / 2, new_height - crop_h / 2
 
 
 
 
319
  current_center_x = max(min_center_x, min(current_center_x, max_center_x))
320
  current_center_y = max(min_center_y, min(current_center_y, max_center_y))
 
 
321
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
322
+ return cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
 
 
 
 
323
  return clip.fl(transform_frame)
324
 
 
 
 
 
 
 
325
  def resize_to_fill(clip, target_resolution):
 
 
 
 
 
 
 
 
 
 
326
  target_w, target_h = target_resolution
327
  clip_aspect = clip.w / clip.h
328
  target_aspect = target_w / target_h
 
329
  if clip_aspect > target_aspect:
 
330
  clip = clip.resize(height=target_h)
331
  crop_amount = (clip.w - target_w) / 2
332
  clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
333
  else:
 
334
  clip = clip.resize(width=target_w)
335
  crop_amount = (clip.h - target_h) / 2
336
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
 
337
  return clip
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  def add_background_music(final_video, bg_music_volume=0.08):
340
+ bg_music_path = "background_music.mp3"
341
+ if os.path.exists(bg_music_path):
342
+ bg_music = AudioFileClip(bg_music_path)
343
+ if bg_music.duration < final_video.duration:
344
+ loops_needed = math.ceil(final_video.duration / bg_music.duration)
345
+ bg_segments = [bg_music] * loops_needed
346
+ bg_music = concatenate_audioclips(bg_segments)
347
+ bg_music = bg_music.subclip(0, final_video.duration)
348
+ bg_music = bg_music.volumex(bg_music_volume)
349
+ video_audio = final_video.audio
350
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
351
+ final_video = final_video.set_audio(mixed_audio)
352
+ return final_video
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
  def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
 
355
  try:
 
 
356
  if not os.path.exists(media_path) or not os.path.exists(tts_path):
 
357
  return None
 
 
358
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
359
+ target_duration = audio_clip.duration + 0.2
 
 
 
360
  if asset_type == "video":
361
  clip = VideoFileClip(media_path)
362
  clip = resize_to_fill(clip, TARGET_RESOLUTION)
363
+ clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
 
 
 
364
  elif asset_type == "image":
365
  img = Image.open(media_path)
366
  if img.mode != 'RGB':
 
368
  img.convert('RGB').save(temp.name)
369
  media_path = temp.name
370
  img.close()
 
371
  clip = ImageClip(media_path).set_duration(target_duration)
372
  clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
373
  clip = clip.fadein(0.3).fadeout(0.3)
374
  else:
375
  return None
376
+ subtitle_clips = []
 
377
  if narration_text and CAPTION_COLOR != "transparent":
378
+ words = narration_text.split()
379
+ chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
380
+ chunk_duration = audio_clip.duration / len(chunks)
381
+ subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
382
+ for i, chunk_text in enumerate(chunks):
383
+ start_time = i * chunk_duration
384
+ end_time = (i + 1) * chunk_duration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  txt_clip = TextClip(
386
+ chunk_text,
387
+ fontsize=45,
388
+ font='Arial-Bold',
389
  color=CAPTION_COLOR,
390
+ bg_color='rgba(0, 0, 0, 0.25)',
391
+ method='caption',
392
  align='center',
393
+ stroke_width=2,
394
+ stroke_color=CAPTION_COLOR,
395
+ size=(TARGET_RESOLUTION[0] * 0.8, None)
396
+ ).set_start(start_time).set_end(end_time).set_position(('center', subtitle_y_position))
397
+ subtitle_clips.append(txt_clip)
398
+ clip = CompositeVideoClip([clip] + subtitle_clips)
399
  clip = clip.set_audio(audio_clip)
 
400
  return clip
401
+ except Exception:
 
 
402
  return None
403
 
404
+ # Main Gradio Function
405
+ def generate_video(video_concept, resolution, caption_option):
406
+ global TARGET_RESOLUTION, CAPTION_COLOR
407
+ TARGET_RESOLUTION = (1920, 1080) if resolution == "Full" else (1080, 1920)
408
+ CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
409
+ if os.path.exists(TEMP_FOLDER):
410
+ shutil.rmtree(TEMP_FOLDER)
411
+ os.makedirs(TEMP_FOLDER)
412
+ script = generate_script(video_concept)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  if not script:
414
+ return "Failed to generate script."
 
 
415
  elements = parse_script(script)
416
  if not elements:
417
+ return "Failed to parse script."
418
+ paired_elements = [(elements[i], elements[i+1]) for i in range(0, len(elements), 2) if i+1 < len(elements)]
 
 
 
 
 
 
 
 
419
  if not paired_elements:
420
+ return "No valid script segments found."
 
 
 
421
  clips = []
422
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
 
 
 
423
  media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
424
  if not media_asset:
 
425
  continue
 
 
426
  tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
427
  if not tts_path:
 
428
  continue
 
 
429
  clip = create_clip(
430
  media_path=media_asset['path'],
431
  asset_type=media_asset['asset_type'],
 
437
  )
438
  if clip:
439
  clips.append(clip)
 
 
 
440
  if not clips:
441
+ return "No clips were successfully created."
 
 
 
 
442
  final_video = concatenate_videoclips(clips, method="compose")
 
 
443
  final_video = add_background_music(final_video, bg_music_volume=0.08)
 
 
 
444
  final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
 
 
 
 
 
 
 
 
 
 
 
445
  shutil.rmtree(TEMP_FOLDER)
446
+ return OUTPUT_VIDEO_FILENAME
447
 
448
+ # Gradio Interface
449
+ with gr.Blocks() as demo:
450
+ gr.Markdown("# AI Documentary Video Generator")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  with gr.Row():
452
+ video_concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept here...")
453
+ resolution = gr.Dropdown(["Full", "Short"], label="Resolution", value="Full")
454
+ caption_option = gr.Dropdown(["Yes", "No"], label="Caption", value="Yes")
455
+ generate_btn = gr.Button("Generate Video")
456
+ output_video = gr.Video(label="Generated Video")
457
+ generate_btn.click(generate_video, inputs=[video_concept, resolution, caption_option], outputs=output_video)
458
+
459
+ demo.launch()