Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,3 @@
|
|
1 |
-
|
2 |
-
print("β
Dependencies installed.")
|
3 |
-
|
4 |
-
# @title Import Libraries and Define Globals
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
import shutil
|
@@ -15,137 +11,824 @@ import tempfile
|
|
15 |
import traceback
|
16 |
import numpy as np
|
17 |
import soundfile as sf
|
18 |
-
import pysrt
|
19 |
-
import cv2
|
20 |
-
from kokoro import KPipeline
|
21 |
from moviepy.editor import (
|
22 |
VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
|
23 |
-
CompositeVideoClip, TextClip, CompositeAudioClip
|
24 |
)
|
25 |
import moviepy.video.fx.all as vfx
|
26 |
-
import moviepy.config as mpy_config
|
27 |
from pydub import AudioSegment
|
28 |
from PIL import Image, ImageDraw, ImageFont
|
29 |
from bs4 import BeautifulSoup
|
30 |
from urllib.parse import quote
|
31 |
from gtts import gTTS
|
|
|
32 |
|
33 |
# --- Configuration ---
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
# --- Helper Functions ---
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
try:
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
#
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
try:
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
if not script:
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
87 |
elements = parse_script(script)
|
88 |
if not elements:
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
91 |
clips = []
|
|
|
|
|
|
|
92 |
for i in range(0, len(elements), 2):
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
continue
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
clip = create_clip(
|
108 |
-
|
109 |
-
|
110 |
-
tts_path,
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
115 |
)
|
|
|
116 |
if clip:
|
117 |
clips.append(clip)
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
119 |
if not clips:
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
except Exception as e:
|
130 |
-
|
|
|
|
|
|
|
|
|
131 |
finally:
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
-
iface.launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import shutil
|
|
|
11 |
import traceback
|
12 |
import numpy as np
|
13 |
import soundfile as sf
|
14 |
+
# import pysrt # Not strictly needed if embedding captions directly
|
15 |
+
import cv2 # OpenCV for potential image processing, though PIL is often enough
|
|
|
16 |
from moviepy.editor import (
|
17 |
VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
|
18 |
+
CompositeVideoClip, TextClip, CompositeAudioClip, ColorClip
|
19 |
)
|
20 |
import moviepy.video.fx.all as vfx
|
|
|
21 |
from pydub import AudioSegment
|
22 |
from PIL import Image, ImageDraw, ImageFont
|
23 |
from bs4 import BeautifulSoup
|
24 |
from urllib.parse import quote
|
25 |
from gtts import gTTS
|
26 |
+
import logging
|
27 |
|
28 |
# --- Configuration ---
|
29 |
+
# IMPORTANT: Use Hugging Face Secrets for API keys in a real Space
|
30 |
+
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna' # Replace with your Pexels API Key
|
31 |
+
OPENROUTER_API_KEY = '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' # Replace with your OpenRouter API Key
|
32 |
+
OPENROUTER_MODEL = "mistralai/mistral-7b-instruct:free" # Using a known free model
|
33 |
+
# OPENROUTER_MODEL = "mistralai/mistral-small-latest" # Or a small paid one if needed
|
34 |
+
|
35 |
+
TEMP_FOLDER_BASE = "/tmp/ai_doc_generator"
|
36 |
+
OUTPUT_VIDEO_FILENAME = "final_documentary.mp4"
|
37 |
+
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
38 |
+
DEFAULT_FONT = "DejaVuSans.ttf" # A common font available in many Linux distros, adjust if needed
|
39 |
+
BGM_FILE = "background_music.mp3" # Optional: Place a royalty-free mp3 here
|
40 |
+
BGM_VOLUME = 0.1 # Background music volume multiplier (0.0 to 1.0)
|
41 |
+
|
42 |
+
# --- Logging Setup ---
|
43 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
44 |
+
|
45 |
+
# --- Kokoro TTS Initialization (Optional) ---
|
46 |
+
KOKORO_ENABLED = False
|
47 |
+
pipeline = None
|
48 |
+
# try:
|
49 |
+
# from kokoro import KPipeline
|
50 |
+
# # Check for GPU availability if desired, default to CPU
|
51 |
+
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
52 |
+
# pipeline = KPipeline(lang_code='a', device=device) # 'a' for multilingual? Check Kokoro docs
|
53 |
+
# KOKORO_ENABLED = True
|
54 |
+
# logging.info("β
Kokoro TTS Initialized.")
|
55 |
+
# except ImportError:
|
56 |
+
# logging.warning("Kokoro library not found. Falling back to gTTS.")
|
57 |
+
# pipeline = None
|
58 |
+
# except Exception as e:
|
59 |
+
# logging.warning(f"β οΈ Error initializing Kokoro TTS: {e}. Using gTTS fallback.")
|
60 |
+
# pipeline = None
|
61 |
|
62 |
# --- Helper Functions ---
|
63 |
+
|
64 |
+
def generate_script(topic, api_key, model):
|
65 |
+
"""Generates a documentary script using OpenRouter API."""
|
66 |
+
logging.info(f"Generating script for topic: {topic}")
|
67 |
+
prompt = f"""Create a short documentary script about '{topic}'.
|
68 |
+
The script should be structured as a sequence of scenes and narrations.
|
69 |
+
Each scene description should be enclosed in [SCENE: description] tags. The description should be concise and suggest visuals (e.g., 'drone shot of mountains', 'close up of a historical artifact', 'archival footage of protests').
|
70 |
+
Each narration segment should follow its corresponding scene and be enclosed in [NARRATION: text] tags. The narration should be engaging and informative, broken into short sentences suitable for ~5-10 second clips.
|
71 |
+
Keep the total number of scenes between 5 and 8.
|
72 |
+
|
73 |
+
Example:
|
74 |
+
[SCENE: Time-lapse of a bustling city street at night]
|
75 |
+
[NARRATION: Cities are centers of human activity, constantly evolving.]
|
76 |
+
[SCENE: Close up on intricate gears of an old clock]
|
77 |
+
[NARRATION: But how do we measure the relentless march of time?]
|
78 |
+
|
79 |
+
Generate the script now:
|
80 |
+
"""
|
81 |
+
headers = {
|
82 |
+
"Authorization": f"Bearer {api_key}",
|
83 |
+
"Content-Type": "application/json"
|
84 |
+
}
|
85 |
+
data = {
|
86 |
+
"model": model,
|
87 |
+
"messages": [{"role": "user", "content": prompt}],
|
88 |
+
"max_tokens": 1000, # Adjust as needed
|
89 |
+
}
|
90 |
+
try:
|
91 |
+
response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=60)
|
92 |
+
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
|
93 |
+
result = response.json()
|
94 |
+
script_content = result['choices'][0]['message']['content']
|
95 |
+
logging.info("β
Script generated successfully.")
|
96 |
+
# Basic validation
|
97 |
+
if "[SCENE:" not in script_content or "[NARRATION:" not in script_content:
|
98 |
+
logging.error("β Script generation failed: Output format incorrect.")
|
99 |
+
logging.debug(f"Raw script output: {script_content}")
|
100 |
+
return None
|
101 |
+
return script_content
|
102 |
+
except requests.exceptions.RequestException as e:
|
103 |
+
logging.error(f"β Script generation failed: API request error: {e}")
|
104 |
+
return None
|
105 |
+
except (KeyError, IndexError) as e:
|
106 |
+
logging.error(f"β Script generation failed: Unexpected API response format: {e}")
|
107 |
+
logging.debug(f"Raw API response: {response.text}")
|
108 |
+
return None
|
109 |
+
except Exception as e:
|
110 |
+
logging.error(f"β Script generation failed: An unexpected error occurred: {e}")
|
111 |
+
traceback.print_exc()
|
112 |
+
return None
|
113 |
+
|
114 |
+
def parse_script(script_text):
|
115 |
+
"""Parses the generated script into scene prompts and narration text."""
|
116 |
+
logging.info("Parsing script...")
|
117 |
+
if not script_text:
|
118 |
+
return None
|
119 |
+
|
120 |
+
# Regex to find scene and narration blocks
|
121 |
+
pattern = re.compile(r"\[SCENE:\s*(.*?)\s*\]\s*\[NARRATION:\s*(.*?)\s*\]", re.DOTALL | re.IGNORECASE)
|
122 |
+
matches = pattern.findall(script_text)
|
123 |
+
|
124 |
+
if not matches:
|
125 |
+
logging.error("β Script parsing failed: No valid [SCENE]/[NARRATION] pairs found.")
|
126 |
+
logging.debug(f"Script content for parsing: {script_text}")
|
127 |
+
return None
|
128 |
+
|
129 |
+
elements = []
|
130 |
+
for scene_desc, narration_text in matches:
|
131 |
+
scene_desc = scene_desc.strip()
|
132 |
+
narration_text = narration_text.strip().replace('\n', ' ') # Clean up narration
|
133 |
+
if scene_desc and narration_text:
|
134 |
+
elements.append({"type": "scene", "prompt": scene_desc})
|
135 |
+
elements.append({"type": "narration", "text": narration_text})
|
136 |
+
else:
|
137 |
+
logging.warning(f"β οΈ Skipping invalid pair: Scene='{scene_desc}', Narration='{narration_text}'")
|
138 |
+
|
139 |
+
|
140 |
+
logging.info(f"β
Script parsed into {len(elements)//2} scene/narration pairs.")
|
141 |
+
return elements
|
142 |
+
|
143 |
+
def search_pexels(query, api_key, media_type="videos", per_page=5):
|
144 |
+
"""Searches Pexels API for videos or photos."""
|
145 |
+
if not api_key or api_key == "YOUR_PEXELS_API_KEY_HERE":
|
146 |
+
logging.warning("β οΈ Pexels API key not configured. Skipping search.")
|
147 |
+
return []
|
148 |
+
logging.info(f"Searching Pexels {media_type} for: {query}")
|
149 |
+
base_url = f"https://api.pexels.com/{media_type}/search"
|
150 |
+
headers = {"Authorization": api_key}
|
151 |
+
params = {"query": query, "per_page": per_page, "orientation": "landscape"} # Default landscape
|
152 |
+
|
153 |
+
try:
|
154 |
+
response = requests.get(base_url, headers=headers, params=params, timeout=20)
|
155 |
+
response.raise_for_status()
|
156 |
+
data = response.json()
|
157 |
+
|
158 |
+
results = []
|
159 |
+
media_key = 'videos' if media_type == 'videos' else 'photos'
|
160 |
+
link_key = 'video_files' if media_type == 'videos' else 'src'
|
161 |
+
|
162 |
+
for item in data.get(media_key, []):
|
163 |
+
if media_type == 'videos':
|
164 |
+
# Find HD or highest quality video link
|
165 |
+
video_links = sorted(item.get(link_key, []), key=lambda x: x.get('width', 0), reverse=True)
|
166 |
+
if video_links:
|
167 |
+
# Prefer HD (1920x1080) or similar if available
|
168 |
+
hd_link = next((link['link'] for link in video_links if link.get('quality') == 'hd' and link.get('width') == 1920), None)
|
169 |
+
if hd_link:
|
170 |
+
results.append({'url': hd_link, 'type': 'video'})
|
171 |
+
elif video_links[0].get('link'): # Fallback to highest available
|
172 |
+
results.append({'url': video_links[0]['link'], 'type': 'video'})
|
173 |
+
else: # photos
|
174 |
+
# Get large or original image link
|
175 |
+
img_links = item.get(link_key, {})
|
176 |
+
if img_links.get('large2x'):
|
177 |
+
results.append({'url': img_links['large2x'], 'type': 'image'})
|
178 |
+
elif img_links.get('large'):
|
179 |
+
results.append({'url': img_links['large'], 'type': 'image'})
|
180 |
+
elif img_links.get('original'):
|
181 |
+
results.append({'url': img_links['original'], 'type': 'image'})
|
182 |
+
|
183 |
+
logging.info(f"β
Found {len(results)} Pexels {media_type} results.")
|
184 |
+
return results
|
185 |
+
|
186 |
+
except requests.exceptions.RequestException as e:
|
187 |
+
logging.error(f"β Pexels API request error: {e}")
|
188 |
+
return []
|
189 |
+
except Exception as e:
|
190 |
+
logging.error(f"β Error processing Pexels response: {e}")
|
191 |
+
traceback.print_exc()
|
192 |
+
return []
|
193 |
+
|
194 |
+
def download_media(url, save_dir):
|
195 |
+
"""Downloads media (video or image) from a URL."""
|
196 |
+
logging.info(f"Downloading media from: {url}")
|
197 |
+
try:
|
198 |
+
response = requests.get(url, stream=True, timeout=60, headers={'User-Agent': USER_AGENT})
|
199 |
+
response.raise_for_status()
|
200 |
+
|
201 |
+
# Try to get filename from URL or Content-Disposition
|
202 |
+
filename = url.split('/')[-1].split('?')[0]
|
203 |
+
if not filename or '.' not in filename: # Basic check for extension
|
204 |
+
# Look for content-disposition header
|
205 |
+
cd = response.headers.get('content-disposition')
|
206 |
+
if cd:
|
207 |
+
fname = re.findall('filename="?(.+)"?', cd)
|
208 |
+
if fname:
|
209 |
+
filename = fname[0]
|
210 |
+
# If still no good filename, generate one based on type
|
211 |
+
if not filename or '.' not in filename:
|
212 |
+
content_type = response.headers.get('content-type', '').lower()
|
213 |
+
ext = '.jpg' # default
|
214 |
+
if 'video' in content_type:
|
215 |
+
ext = '.mp4'
|
216 |
+
elif 'jpeg' in content_type or 'jpg' in content_type:
|
217 |
+
ext = '.jpg'
|
218 |
+
elif 'png' in content_type:
|
219 |
+
ext = '.png'
|
220 |
+
filename = f"media_{int(time.time())}{ext}"
|
221 |
+
|
222 |
+
|
223 |
+
save_path = os.path.join(save_dir, filename)
|
224 |
+
|
225 |
+
with open(save_path, 'wb') as f:
|
226 |
+
for chunk in response.iter_content(chunk_size=8192):
|
227 |
+
f.write(chunk)
|
228 |
+
|
229 |
+
logging.info(f"β
Media downloaded successfully to: {save_path}")
|
230 |
+
return save_path
|
231 |
+
except requests.exceptions.RequestException as e:
|
232 |
+
logging.error(f"β Media download failed: Request error: {e}")
|
233 |
+
return None
|
234 |
+
except Exception as e:
|
235 |
+
logging.error(f"β Media download failed: An unexpected error occurred: {e}")
|
236 |
+
traceback.print_exc()
|
237 |
+
return None
|
238 |
+
|
239 |
+
def generate_tts(text, lang, save_dir, segment_index):
|
240 |
+
"""Generates TTS audio using Kokoro (if enabled) or gTTS."""
|
241 |
+
filename = f"narration_{segment_index}.mp3"
|
242 |
+
filepath = os.path.join(save_dir, filename)
|
243 |
+
logging.info(f"Generating TTS for segment {segment_index}: '{text[:50]}...'")
|
244 |
+
|
245 |
+
audio_duration = 0
|
246 |
+
success = False
|
247 |
+
|
248 |
+
# Try Kokoro first if enabled and initialized
|
249 |
+
# if KOKORO_ENABLED and pipeline:
|
250 |
+
# try:
|
251 |
+
# logging.info("Attempting TTS generation with Kokoro...")
|
252 |
+
# # Assuming Kokoro outputs a numpy array and sample rate
|
253 |
+
# wav, sr = pipeline.tts(text=text)
|
254 |
+
# sf.write(filepath, wav, sr)
|
255 |
+
# audio_duration = len(wav) / sr
|
256 |
+
# logging.info(f"β
Kokoro TTS generated successfully ({audio_duration:.2f}s).")
|
257 |
+
# success = True
|
258 |
+
# except Exception as e:
|
259 |
+
# logging.warning(f"β οΈ Kokoro TTS failed: {e}. Falling back to gTTS.")
|
260 |
+
|
261 |
+
# Fallback to gTTS
|
262 |
+
if not success:
|
263 |
+
try:
|
264 |
+
logging.info("Attempting TTS generation with gTTS...")
|
265 |
+
tts = gTTS(text=text, lang=lang)
|
266 |
+
tts.save(filepath)
|
267 |
+
# Get duration using soundfile
|
268 |
+
try:
|
269 |
+
audio_info = sf.info(filepath)
|
270 |
+
audio_duration = audio_info.duration
|
271 |
+
except Exception as e_dur:
|
272 |
+
logging.warning(f"β οΈ Could not get duration using soundfile ({e_dur}), trying pydub...")
|
273 |
+
try:
|
274 |
+
audio_seg = AudioSegment.from_mp3(filepath)
|
275 |
+
audio_duration = len(audio_seg) / 1000.0
|
276 |
+
except Exception as e_dur_pd:
|
277 |
+
logging.error(f"β Failed to get duration with pydub as well ({e_dur_pd}). Setting duration to estimated.")
|
278 |
+
# Estimate duration based on words (very rough)
|
279 |
+
words_per_minute = 150
|
280 |
+
num_words = len(text.split())
|
281 |
+
audio_duration = (num_words / words_per_minute) * 60
|
282 |
+
if audio_duration < 2: audio_duration = 2 # Minimum duration
|
283 |
+
|
284 |
+
logging.info(f"β
gTTS generated successfully ({audio_duration:.2f}s).")
|
285 |
+
success = True
|
286 |
+
except Exception as e:
|
287 |
+
logging.error(f"β gTTS failed: {e}")
|
288 |
+
traceback.print_exc()
|
289 |
+
success = False
|
290 |
+
|
291 |
+
return filepath if success else None, audio_duration if success else 0
|
292 |
+
|
293 |
+
def resize_media_to_fill(clip, target_size):
|
294 |
+
"""Resizes a MoviePy clip (video or image) to fill the target size, cropping if necessary."""
|
295 |
+
# target_size = (width, height)
|
296 |
+
target_w, target_h = target_size
|
297 |
+
target_aspect = target_w / target_h
|
298 |
+
|
299 |
+
clip_w, clip_h = clip.size
|
300 |
+
clip_aspect = clip_w / clip_h
|
301 |
+
|
302 |
+
if abs(clip_aspect - target_aspect) < 0.01: # Aspect ratios are close enough
|
303 |
+
return clip.resize(width=target_w) # Or height=target_h
|
304 |
+
|
305 |
+
if clip_aspect > target_aspect:
|
306 |
+
# Clip is wider than target, resize to target height and crop width
|
307 |
+
resized_clip = clip.resize(height=target_h)
|
308 |
+
crop_width = resized_clip.w
|
309 |
+
crop_x_center = crop_width / 2
|
310 |
+
crop_x1 = int(crop_x_center - target_w / 2)
|
311 |
+
crop_x2 = int(crop_x_center + target_w / 2)
|
312 |
+
# Ensure crop coordinates are within bounds
|
313 |
+
crop_x1 = max(0, crop_x1)
|
314 |
+
crop_x2 = min(resized_clip.w, crop_x2)
|
315 |
+
# Adjust if calculated width is slightly off due to rounding
|
316 |
+
if crop_x2 - crop_x1 != target_w:
|
317 |
+
crop_x2 = crop_x1 + target_w # Prioritize target width
|
318 |
+
|
319 |
+
return resized_clip.fx(vfx.crop, x1=crop_x1, y1=0, x2=crop_x2, y2=target_h)
|
320 |
+
else:
|
321 |
+
# Clip is taller than target, resize to target width and crop height
|
322 |
+
resized_clip = clip.resize(width=target_w)
|
323 |
+
crop_height = resized_clip.h
|
324 |
+
crop_y_center = crop_height / 2
|
325 |
+
crop_y1 = int(crop_y_center - target_h / 2)
|
326 |
+
crop_y2 = int(crop_y_center + target_h / 2)
|
327 |
+
# Ensure crop coordinates are within bounds
|
328 |
+
crop_y1 = max(0, crop_y1)
|
329 |
+
crop_y2 = min(resized_clip.h, crop_y2)
|
330 |
+
# Adjust if calculated height is slightly off
|
331 |
+
if crop_y2 - crop_y1 != target_h:
|
332 |
+
crop_y2 = crop_y1 + target_h
|
333 |
+
|
334 |
+
return resized_clip.fx(vfx.crop, x1=0, y1=crop_y1, x2=target_w, y2=crop_y2)
|
335 |
+
|
336 |
+
|
337 |
+
def apply_ken_burns(image_clip, duration, target_size, zoom_factor=1.1):
|
338 |
+
"""Applies a subtle zoom-out Ken Burns effect to an ImageClip."""
|
339 |
+
# Ensure the input clip already matches the target size
|
340 |
+
if image_clip.size != target_size:
|
341 |
+
logging.warning("Applying Ken Burns to an image not matching target size, resizing first.")
|
342 |
+
image_clip = resize_media_to_fill(image_clip, target_size)
|
343 |
+
|
344 |
+
# Define the resize function based on time `t`
|
345 |
+
def resize_func(t):
|
346 |
+
# Zoom out: start at zoom_factor, end at 1.0
|
347 |
+
current_zoom = 1 + (zoom_factor - 1) * (1 - t / duration)
|
348 |
+
return current_zoom
|
349 |
+
|
350 |
+
# Apply the resize effect over time
|
351 |
+
# Need to center the zoom effect
|
352 |
+
zoomed_clip = image_clip.fx(vfx.resize, resize_func)
|
353 |
+
# Crop back to target size, centered
|
354 |
+
final_clip = zoomed_clip.fx(vfx.crop, x_center=zoomed_clip.w/2, y_center=zoomed_clip.h/2, width=target_size[0], height=target_size[1])
|
355 |
+
|
356 |
+
return final_clip.set_duration(duration)
|
357 |
+
|
358 |
+
|
359 |
+
def create_caption_clip(text, duration, clip_size, font_size=None, font_path=DEFAULT_FONT, color='white', stroke_color='black', stroke_width=1.5, position=('center', 'bottom'), margin=20):
|
360 |
+
"""Creates a MoviePy TextClip for captions with basic wrapping."""
|
361 |
+
width, height = clip_size
|
362 |
+
max_text_width = width * 0.8 # Allow text to occupy 80% of the width
|
363 |
+
|
364 |
+
if font_size is None:
|
365 |
+
font_size = max(20, int(height / 25)) # Dynamic font size based on height
|
366 |
+
|
367 |
+
# Basic word wrapping
|
368 |
+
try:
|
369 |
+
# Attempt to load the font to estimate size
|
370 |
+
pil_font = ImageFont.truetype(font_path, font_size)
|
371 |
+
except IOError:
|
372 |
+
logging.warning(f"Font '{font_path}' not found. Using MoviePy default.")
|
373 |
+
pil_font = None # Use MoviePy default if specified font fails
|
374 |
+
|
375 |
+
words = text.split()
|
376 |
+
lines = []
|
377 |
+
current_line = ""
|
378 |
+
line_width_func = lambda txt: pil_font.getbbox(txt)[2] if pil_font else len(txt) * font_size * 0.6 # Estimate width
|
379 |
+
|
380 |
+
for word in words:
|
381 |
+
test_line = f"{current_line} {word}".strip()
|
382 |
+
# Estimate width (this is approximate)
|
383 |
+
if line_width_func(test_line) <= max_text_width:
|
384 |
+
current_line = test_line
|
385 |
+
else:
|
386 |
+
if current_line: # Add the previous line if it wasn't empty
|
387 |
+
lines.append(current_line)
|
388 |
+
current_line = word # Start new line with the current word
|
389 |
+
# Handle case where a single word is too long
|
390 |
+
if line_width_func(current_line) > max_text_width:
|
391 |
+
logging.warning(f"Word '{current_line}' is too long for caption width.")
|
392 |
+
# Could implement character-level wrapping here if needed
|
393 |
+
|
394 |
+
if current_line: # Add the last line
|
395 |
+
lines.append(current_line)
|
396 |
+
|
397 |
+
wrapped_text = "\n".join(lines)
|
398 |
+
|
399 |
+
# Create the TextClip
|
400 |
+
try:
|
401 |
+
caption = TextClip(
|
402 |
+
wrapped_text,
|
403 |
+
fontsize=font_size,
|
404 |
+
color=color,
|
405 |
+
font=font_path, # MoviePy might handle font lookup differently
|
406 |
+
stroke_color=stroke_color,
|
407 |
+
stroke_width=stroke_width,
|
408 |
+
method='caption', # Use caption method for better wrapping if available
|
409 |
+
size=(int(max_text_width), None), # Constrain width for wrapping
|
410 |
+
align='center'
|
411 |
+
)
|
412 |
+
except Exception as e:
|
413 |
+
logging.error(f"Error creating TextClip (maybe font issue?): {e}. Using simpler TextClip.")
|
414 |
+
# Fallback to simpler TextClip without stroke/specific font if needed
|
415 |
+
caption = TextClip(wrapped_text, fontsize=font_size, color=color, method='caption', size=(int(max_text_width), None), align='center')
|
416 |
+
|
417 |
+
|
418 |
+
# Set position with margin
|
419 |
+
pos_x, pos_y = position
|
420 |
+
final_pos = list(caption.pos(pos_x, pos_y)) # Get numeric position
|
421 |
+
|
422 |
+
if 'bottom' in pos_y:
|
423 |
+
final_pos[1] -= margin
|
424 |
+
elif 'top' in pos_y:
|
425 |
+
final_pos[1] += margin
|
426 |
+
if 'right' in pos_x:
|
427 |
+
final_pos[0] -= margin
|
428 |
+
elif 'left' in pos_x:
|
429 |
+
final_pos[0] += margin
|
430 |
+
|
431 |
+
caption = caption.set_position(tuple(final_pos)).set_duration(duration)
|
432 |
+
return caption
|
433 |
+
|
434 |
+
|
435 |
+
def create_clip(media_path, media_type, audio_path, audio_duration, target_size, add_captions, narration_text, segment_index):
|
436 |
+
"""Creates a single video clip from media, audio, and optional captions."""
|
437 |
+
logging.info(f"Creating clip {segment_index} - Type: {media_type}, Duration: {audio_duration:.2f}s")
|
438 |
+
|
439 |
try:
|
440 |
+
# Load Audio
|
441 |
+
audio_clip = AudioFileClip(audio_path)
|
442 |
+
# Verify audio duration (sometimes file reading is slightly off)
|
443 |
+
if abs(audio_clip.duration - audio_duration) > 0.1:
|
444 |
+
logging.warning(f"Audio file duration ({audio_clip.duration:.2f}s) differs from expected ({audio_duration:.2f}s). Using file duration.")
|
445 |
+
audio_duration = audio_clip.duration
|
446 |
+
# Ensure minimum duration to avoid issues
|
447 |
+
if audio_duration < 0.1:
|
448 |
+
logging.warning(f"Audio duration is very short ({audio_duration:.2f}s). Setting minimum 0.5s.")
|
449 |
+
audio_duration = 0.5
|
450 |
+
audio_clip = audio_clip.subclip(0, audio_duration)
|
451 |
+
|
452 |
+
|
453 |
+
# Load Media (Video or Image)
|
454 |
+
if media_type == 'video':
|
455 |
+
try:
|
456 |
+
video_clip = VideoFileClip(media_path, target_resolution=(target_size[1], target_size[0])) # height, width
|
457 |
+
# Trim or loop video to match audio duration
|
458 |
+
if video_clip.duration >= audio_duration:
|
459 |
+
video_clip = video_clip.subclip(0, audio_duration)
|
460 |
+
else:
|
461 |
+
# Loop the video if it's shorter than the audio
|
462 |
+
logging.warning(f"Video duration ({video_clip.duration:.2f}s) shorter than audio ({audio_duration:.2f}s). Looping video.")
|
463 |
+
# video_clip = video_clip.fx(vfx.loop, duration=audio_duration) # Loop is simpler
|
464 |
+
# Alternatively freeze last frame:
|
465 |
+
num_loops = math.ceil(audio_duration / video_clip.duration)
|
466 |
+
video_clip = concatenate_videoclips([video_clip] * num_loops).subclip(0, audio_duration)
|
467 |
+
|
468 |
+
|
469 |
+
main_clip = resize_media_to_fill(video_clip, target_size)
|
470 |
+
|
471 |
+
except Exception as e:
|
472 |
+
logging.error(f"β Failed to load or process video file '{media_path}': {e}. Creating black clip.")
|
473 |
+
main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
|
474 |
+
|
475 |
+
elif media_type == 'image':
|
476 |
+
try:
|
477 |
+
# Load image, resize to fill target, apply Ken Burns
|
478 |
+
img_clip_base = ImageClip(media_path)
|
479 |
+
img_clip_resized = resize_media_to_fill(img_clip_base, target_size)
|
480 |
+
main_clip = apply_ken_burns(img_clip_resized, audio_duration, target_size)
|
481 |
+
|
482 |
+
except Exception as e:
|
483 |
+
logging.error(f"β Failed to load or process image file '{media_path}': {e}. Creating black clip.")
|
484 |
+
main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
|
485 |
+
else:
|
486 |
+
logging.error(f"β Unknown media type: {media_type}. Creating black clip.")
|
487 |
+
main_clip = ColorClip(size=target_size, color=(0,0,0), duration=audio_duration)
|
488 |
+
|
489 |
+
# Set duration definitively and add audio
|
490 |
+
main_clip = main_clip.set_duration(audio_duration).set_audio(audio_clip)
|
491 |
+
|
492 |
+
# Add Captions if enabled
|
493 |
+
if add_captions and narration_text:
|
494 |
+
caption_clip = create_caption_clip(narration_text, audio_duration, target_size)
|
495 |
+
final_clip = CompositeVideoClip([main_clip, caption_clip], size=target_size)
|
496 |
+
else:
|
497 |
+
final_clip = main_clip
|
498 |
+
|
499 |
+
logging.info(f"β
Clip {segment_index} created successfully.")
|
500 |
+
return final_clip
|
501 |
+
|
502 |
+
except Exception as e:
|
503 |
+
logging.error(f"β Failed to create clip {segment_index}: {e}")
|
504 |
+
traceback.print_exc()
|
505 |
+
return None
|
506 |
+
|
507 |
+
def add_background_music(video_clip, music_file=BGM_FILE, volume=BGM_VOLUME):
|
508 |
+
"""Adds background music to the final video clip."""
|
509 |
+
if not os.path.exists(music_file):
|
510 |
+
logging.warning(f"Background music file '{music_file}' not found. Skipping BGM.")
|
511 |
+
return video_clip
|
512 |
+
|
513 |
+
logging.info(f"Adding background music from {music_file}")
|
514 |
+
try:
|
515 |
+
bgm_clip = AudioFileClip(music_file)
|
516 |
+
video_duration = video_clip.duration
|
517 |
+
|
518 |
+
# Loop or trim BGM to match video duration
|
519 |
+
if bgm_clip.duration < video_duration:
|
520 |
+
# Loop BGM - Use audio_loop fx
|
521 |
+
bgm_clip = bgm_clip.fx(afx.audio_loop, duration=video_duration)
|
522 |
+
# Alternative manual loop:
|
523 |
+
# num_loops = math.ceil(video_duration / bgm_clip.duration)
|
524 |
+
# bgm_clip = concatenate_audioclips([bgm_clip] * num_loops).subclip(0, video_duration)
|
525 |
+
else:
|
526 |
+
bgm_clip = bgm_clip.subclip(0, video_duration)
|
527 |
+
|
528 |
+
# Adjust volume
|
529 |
+
bgm_clip = bgm_clip.volumex(volume)
|
530 |
+
|
531 |
+
# Combine with existing audio
|
532 |
+
original_audio = video_clip.audio
|
533 |
+
if original_audio:
|
534 |
+
combined_audio = CompositeAudioClip([original_audio, bgm_clip])
|
535 |
+
else:
|
536 |
+
# Handle case where video might not have narration audio (e.g., if all TTS failed)
|
537 |
+
logging.warning("Video clip has no primary audio. Adding BGM only.")
|
538 |
+
combined_audio = bgm_clip
|
539 |
+
|
540 |
+
video_clip = video_clip.set_audio(combined_audio)
|
541 |
+
logging.info("β
Background music added.")
|
542 |
+
return video_clip
|
543 |
+
|
544 |
+
except Exception as e:
|
545 |
+
logging.error(f"β Failed to add background music: {e}")
|
546 |
+
traceback.print_exc()
|
547 |
+
return video_clip # Return original clip on failure
|
548 |
+
|
549 |
+
|
550 |
+
# --- Main Gradio Function ---
|
551 |
+
def generate_video_process(topic, resolution_choice, add_captions_option, add_bgm_option, progress=gr.Progress()):
|
552 |
+
"""The main function called by Gradio to generate the video."""
|
553 |
+
start_time = time.time()
|
554 |
+
status_log = []
|
555 |
+
temp_dir = None
|
556 |
+
final_video_path = None
|
557 |
+
|
558 |
+
# Create a unique temporary directory for this run
|
559 |
try:
|
560 |
+
temp_dir = tempfile.mkdtemp(prefix=TEMP_FOLDER_BASE + "_")
|
561 |
+
status_log.append(f"Temporary directory created: {temp_dir}")
|
562 |
+
logging.info(f"Using temp directory: {temp_dir}")
|
563 |
+
|
564 |
+
# --- 1. Generate Script ---
|
565 |
+
progress(0.1, desc="Generating script...")
|
566 |
+
status_log.append("π Generating script...")
|
567 |
+
script = generate_script(topic, OPENROUTER_API_KEY, OPENROUTER_MODEL)
|
568 |
if not script:
|
569 |
+
status_log.append("β Script generation failed. Check API key and model.")
|
570 |
+
return "\n".join(status_log), None
|
571 |
+
status_log.append("β
Script generated.")
|
572 |
+
# status_log.append(f"Raw Script:\n{script[:500]}...") # Optional: Log snippet
|
573 |
+
|
574 |
+
# --- 2. Parse Script ---
|
575 |
+
progress(0.2, desc="Parsing script...")
|
576 |
+
status_log.append("π Parsing script...")
|
577 |
elements = parse_script(script)
|
578 |
if not elements:
|
579 |
+
status_log.append("β Script parsing failed. Check script format.")
|
580 |
+
return "\n".join(status_log), None
|
581 |
+
num_segments = len(elements) // 2
|
582 |
+
status_log.append(f"β
Script parsed into {num_segments} segments.")
|
583 |
+
|
584 |
+
# --- 3. Process Segments (Media Search, Download, TTS, Clip Creation) ---
|
585 |
clips = []
|
586 |
+
target_size = (1920, 1080) if resolution_choice == "Full HD (16:9)" else (1080, 1920) # W, H
|
587 |
+
status_log.append(f"Target resolution: {target_size[0]}x{target_size[1]}")
|
588 |
+
|
589 |
for i in range(0, len(elements), 2):
|
590 |
+
segment_index = i // 2
|
591 |
+
current_progress = 0.2 + (0.6 * (segment_index / num_segments))
|
592 |
+
progress(current_progress, desc=f"Processing segment {segment_index + 1}/{num_segments}")
|
593 |
+
|
594 |
+
scene_elem = elements[i]
|
595 |
+
narration_elem = elements[i+1]
|
596 |
+
scene_prompt = scene_elem['prompt']
|
597 |
+
narration_text = narration_elem['text']
|
598 |
+
|
599 |
+
status_log.append(f"\n--- Segment {segment_index + 1}/{num_segments} ---")
|
600 |
+
status_log.append(f"Scene Prompt: {scene_prompt}")
|
601 |
+
status_log.append(f"Narration: {narration_text[:100]}...")
|
602 |
+
|
603 |
+
# 3a. Generate TTS
|
604 |
+
status_log.append("π Generating narration audio...")
|
605 |
+
tts_path, tts_duration = generate_tts(narration_text, 'en', temp_dir, segment_index)
|
606 |
+
if not tts_path or tts_duration <= 0.1: # Check for valid duration
|
607 |
+
status_log.append(f"β οΈ TTS generation failed for segment {segment_index + 1}. Skipping segment.")
|
608 |
+
logging.warning(f"Skipping segment {segment_index+1} due to TTS failure.")
|
609 |
continue
|
610 |
+
status_log.append(f"β
Narration audio generated ({tts_duration:.2f}s): {os.path.basename(tts_path)}")
|
611 |
+
|
612 |
+
# 3b. Search for Media
|
613 |
+
status_log.append("π Searching for media...")
|
614 |
+
media_path = None
|
615 |
+
media_type = None
|
616 |
+
|
617 |
+
# Try Pexels Video first
|
618 |
+
video_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="videos")
|
619 |
+
if video_results:
|
620 |
+
selected_media = random.choice(video_results)
|
621 |
+
status_log.append(f"π₯ Downloading Pexels video: {selected_media['url']}")
|
622 |
+
media_path = download_media(selected_media['url'], temp_dir)
|
623 |
+
if media_path:
|
624 |
+
media_type = 'video'
|
625 |
+
else:
|
626 |
+
status_log.append("β οΈ Video download failed.")
|
627 |
+
|
628 |
+
# Try Pexels Image if video fails or not found
|
629 |
+
if not media_path:
|
630 |
+
status_log.append("π No suitable video found/downloaded. Searching Pexels images...")
|
631 |
+
image_results = search_pexels(scene_prompt, PEXELS_API_KEY, media_type="photos")
|
632 |
+
if image_results:
|
633 |
+
selected_media = random.choice(image_results)
|
634 |
+
status_log.append(f"π₯ Downloading Pexels image: {selected_media['url']}")
|
635 |
+
media_path = download_media(selected_media['url'], temp_dir)
|
636 |
+
if media_path:
|
637 |
+
media_type = 'image'
|
638 |
+
else:
|
639 |
+
status_log.append("β οΈ Image download failed.")
|
640 |
+
|
641 |
+
# Fallback: If no media found after searches
|
642 |
+
if not media_path:
|
643 |
+
status_log.append(f"β οΈ No suitable media found for '{scene_prompt}'. Using black screen.")
|
644 |
+
media_type = 'color' # Special type for ColorClip
|
645 |
+
media_path = None # No path needed for color clip
|
646 |
+
|
647 |
+
# 3c. Create Clip
|
648 |
+
status_log.append(f"π Creating video clip for segment {segment_index + 1}...")
|
649 |
clip = create_clip(
|
650 |
+
media_path=media_path if media_type != 'color' else None, # Pass None if color
|
651 |
+
media_type=media_type,
|
652 |
+
audio_path=tts_path,
|
653 |
+
audio_duration=tts_duration,
|
654 |
+
target_size=target_size,
|
655 |
+
add_captions=add_captions_option,
|
656 |
+
narration_text=narration_text,
|
657 |
+
segment_index=segment_index
|
658 |
)
|
659 |
+
|
660 |
if clip:
|
661 |
clips.append(clip)
|
662 |
+
status_log.append(f"β
Clip {segment_index + 1} created.")
|
663 |
+
else:
|
664 |
+
status_log.append(f"β Failed to create clip for segment {segment_index + 1}. Skipping.")
|
665 |
+
logging.error(f"Failed to create clip {segment_index+1}, skipping.")
|
666 |
+
|
667 |
+
|
668 |
if not clips:
|
669 |
+
status_log.append("\nβ No valid clips were created. Cannot generate video.")
|
670 |
+
return "\n".join(status_log), None
|
671 |
+
|
672 |
+
# --- 4. Concatenate Clips ---
|
673 |
+
progress(0.85, desc="Combining video clips...")
|
674 |
+
status_log.append("\nπ Combining video clips...")
|
675 |
+
try:
|
676 |
+
final_clip = concatenate_videoclips(clips, method="compose")
|
677 |
+
status_log.append("β
Clips combined successfully.")
|
678 |
+
except Exception as e:
|
679 |
+
status_log.append(f"β Error concatenating clips: {e}")
|
680 |
+
logging.error(f"Concatenation failed: {e}")
|
681 |
+
traceback.print_exc()
|
682 |
+
# Attempt cleanup even on error
|
683 |
+
for clip in clips:
|
684 |
+
clip.close()
|
685 |
+
return "\n".join(status_log), None
|
686 |
+
|
687 |
+
|
688 |
+
# --- 5. Add Background Music (Optional) ---
|
689 |
+
if add_bgm_option:
|
690 |
+
progress(0.9, desc="Adding background music...")
|
691 |
+
status_log.append("π Adding background music...")
|
692 |
+
final_clip = add_background_music(final_clip, music_file=BGM_FILE, volume=BGM_VOLUME)
|
693 |
+
|
694 |
+
|
695 |
+
# --- 6. Write Final Video ---
|
696 |
+
progress(0.95, desc="Writing final video file...")
|
697 |
+
status_log.append("π Writing final video file (this may take time)...")
|
698 |
+
output_path = os.path.join(temp_dir, OUTPUT_VIDEO_FILENAME)
|
699 |
+
try:
|
700 |
+
# Use 'medium' preset for better quality/size balance than 'ultrafast'
|
701 |
+
# Use 'libx264' for wide compatibility. Adjust audio_codec if needed.
|
702 |
+
# threads=4 can help speed up encoding on multi-core systems
|
703 |
+
final_clip.write_videofile(
|
704 |
+
output_path,
|
705 |
+
codec='libx264',
|
706 |
+
audio_codec='aac',
|
707 |
+
fps=24,
|
708 |
+
preset='medium',
|
709 |
+
threads=4,
|
710 |
+
logger='bar' # Use None for less verbose output, or 'bar' for progress
|
711 |
+
)
|
712 |
+
status_log.append(f"β
Final video saved to: {output_path}")
|
713 |
+
final_video_path = output_path # Set the path to be returned
|
714 |
+
except Exception as e:
|
715 |
+
status_log.append(f"β Error writing final video file: {e}")
|
716 |
+
logging.error(f"Final video write failed: {e}")
|
717 |
+
traceback.print_exc()
|
718 |
+
final_video_path = None # Ensure no path is returned on failure
|
719 |
+
finally:
|
720 |
+
# Ensure MoviePy resources are released
|
721 |
+
final_clip.close()
|
722 |
+
for clip in clips:
|
723 |
+
try:
|
724 |
+
clip.close()
|
725 |
+
if clip.audio: clip.audio.close()
|
726 |
+
except:
|
727 |
+
pass # Ignore errors during cleanup
|
728 |
+
|
729 |
+
|
730 |
except Exception as e:
|
731 |
+
status_log.append(f"\nβ An unexpected error occurred during video generation: {e}")
|
732 |
+
logging.error("An unexpected error occurred in generate_video_process:")
|
733 |
+
logging.error(traceback.format_exc())
|
734 |
+
final_video_path = None # Ensure failure state
|
735 |
+
|
736 |
finally:
|
737 |
+
# --- 7. Cleanup ---
|
738 |
+
if temp_dir and os.path.exists(temp_dir):
|
739 |
+
try:
|
740 |
+
shutil.rmtree(temp_dir)
|
741 |
+
status_log.append(f"π§Ή Temporary directory cleaned up: {temp_dir}")
|
742 |
+
logging.info(f"Cleaned up temp directory: {temp_dir}")
|
743 |
+
except Exception as e:
|
744 |
+
status_log.append(f"β οΈ Error cleaning up temporary directory {temp_dir}: {e}")
|
745 |
+
logging.warning(f"Cleanup failed for {temp_dir}: {e}")
|
746 |
+
|
747 |
+
end_time = time.time()
|
748 |
+
total_time = end_time - start_time
|
749 |
+
status_log.append(f"\n--- Generation Complete ---")
|
750 |
+
status_log.append(f"Total time: {total_time:.2f} seconds")
|
751 |
+
|
752 |
+
progress(1.0, desc="Finished!")
|
753 |
+
return "\n".join(status_log), final_video_path
|
754 |
+
|
755 |
+
|
756 |
+
# --- Gradio Interface Definition ---
|
757 |
+
with gr.Blocks() as iface:
|
758 |
+
gr.Markdown("# π€ AI Documentary Generator")
|
759 |
+
gr.Markdown("Enter a topic, choose your settings, and let the AI create a short video documentary!")
|
760 |
+
|
761 |
+
with gr.Row():
|
762 |
+
with gr.Column(scale=1):
|
763 |
+
topic_input = gr.Textbox(
|
764 |
+
label="Video Topic",
|
765 |
+
placeholder="e.g., The History of Coffee, The Secrets of the Deep Ocean, The Rise of Quantum Computing",
|
766 |
+
lines=2
|
767 |
+
)
|
768 |
+
resolution_input = gr.Radio(
|
769 |
+
label="Video Format",
|
770 |
+
choices=["Short (9:16)", "Full HD (16:9)"],
|
771 |
+
value="Short (9:16)"
|
772 |
+
)
|
773 |
+
captions_input = gr.Checkbox(label="Add Captions", value=True)
|
774 |
+
bgm_input = gr.Checkbox(label=f"Add Background Music ({os.path.basename(BGM_FILE) if os.path.exists(BGM_FILE) else 'No BGM file found'})", value=True, interactive=os.path.exists(BGM_FILE))
|
775 |
+
|
776 |
+
generate_button = gr.Button("Generate Video", variant="primary")
|
777 |
+
|
778 |
+
with gr.Column(scale=2):
|
779 |
+
status_output = gr.Textbox(label="Status Log", lines=15, interactive=False)
|
780 |
+
video_output = gr.Video(label="Generated Video")
|
781 |
+
|
782 |
+
generate_button.click(
|
783 |
+
fn=generate_video_process,
|
784 |
+
inputs=[topic_input, resolution_input, captions_input, bgm_input],
|
785 |
+
outputs=[status_output, video_output]
|
786 |
+
)
|
787 |
+
|
788 |
+
gr.Examples(
|
789 |
+
examples=[
|
790 |
+
["The lifecycle of a butterfly", "Short (9:16)", True, True],
|
791 |
+
["Ancient Roman Engineering", "Full HD (16:9)", True, False],
|
792 |
+
["The impact of social media", "Short (9:16)", False, True],
|
793 |
+
],
|
794 |
+
inputs=[topic_input, resolution_input, captions_input, bgm_input]
|
795 |
+
)
|
796 |
+
|
797 |
+
# --- Launch the App ---
|
798 |
+
if __name__ == "__main__":
|
799 |
+
# Optional: Check for API keys on startup
|
800 |
+
if not PEXELS_API_KEY or PEXELS_API_KEY == "YOUR_PEXELS_API_KEY_HERE":
|
801 |
+
logging.warning("PEXELS_API_KEY is not set. Media search will be limited.")
|
802 |
+
print("WARNING: PEXELS_API_KEY is not set. Media search will be limited.")
|
803 |
+
if not OPENROUTER_API_KEY or OPENROUTER_API_KEY == "YOUR_OPENROUTER_API_KEY_HERE":
|
804 |
+
logging.warning("OPENROUTER_API_KEY is not set. Script generation will fail.")
|
805 |
+
print("WARNING: OPENROUTER_API_KEY is not set. Script generation will fail.")
|
806 |
+
|
807 |
+
# Optional: Add a placeholder BGM file if it doesn't exist
|
808 |
+
if not os.path.exists(BGM_FILE):
|
809 |
+
logging.warning(f"Background music file '{BGM_FILE}' not found. Creating a silent placeholder.")
|
810 |
+
try:
|
811 |
+
# Create a short silent mp3 using pydub
|
812 |
+
silent_segment = AudioSegment.silent(duration=1000) # 1 second silence
|
813 |
+
silent_segment.export(BGM_FILE, format="mp3")
|
814 |
+
logging.info(f"Created silent placeholder BGM file: {BGM_FILE}")
|
815 |
+
except Exception as e:
|
816 |
+
logging.error(f"Could not create placeholder BGM file: {e}")
|
817 |
+
|
818 |
+
|
819 |
+
# Fix ImageMagick policy (attempt) - May need sudo/root privileges not available in all environments
|
820 |
+
# def fix_imagemagick_policy():
|
821 |
+
# policy_path = "/etc/ImageMagick-6/policy.xml" # Adjust path if needed
|
822 |
+
# if os.path.exists(policy_path):
|
823 |
+
# try:
|
824 |
+
# # Use sed to modify the policy file (requires sed command)
|
825 |
+
# os.system(f"sed -i 's/rights=\"none\" pattern=\"PS\"/rights=\"read|write\" pattern=\"PS\"/' {policy_path}")
|
826 |
+
# os.system(f"sed -i 's/rights=\"none\" pattern=\"LABEL\"/rights=\"read|write\" pattern=\"LABEL\"/' {policy_path}")
|
827 |
+
# os.system(f"sed -i 's/rights=\"none\" pattern=\"TEXT\"/rights=\"read|write\" pattern=\"TEXT\"/' {policy_path}") # Add TEXT pattern
|
828 |
+
# logging.info(f"Attempted to update ImageMagick policy at {policy_path}")
|
829 |
+
# except Exception as e:
|
830 |
+
# logging.warning(f"Failed to automatically update ImageMagick policy: {e}. Manual adjustment might be needed if text rendering fails.")
|
831 |
+
# fix_imagemagick_policy()
|
832 |
+
|
833 |
|
834 |
+
iface.launch(debug=True, share=True) # Set share=True for public link if needed
|