Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,38 +1,21 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import os
|
3 |
from PIL import Image
|
4 |
from pydub import AudioSegment
|
5 |
import subprocess
|
6 |
import shutil
|
7 |
import math
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
# --- Diccionario de Voces Disponibles ---
|
12 |
-
# Formato: "Nombre para mostrar": "ID de la voz en Edge TTS"
|
13 |
-
VOICES = {
|
14 |
-
"Jorge (México - Masculino)": "es-MX-JorgeForTTS",
|
15 |
-
"Dalia (México - Femenino)": "es-MX-DaliaForTTS",
|
16 |
-
"Alvaro (España - Masculino)": "es-ES-AlvaroForTTS",
|
17 |
-
"Elvira (España - Femenino)": "es-ES-ElviraForTTS",
|
18 |
-
}
|
19 |
-
|
20 |
-
# --- Funciones Auxiliares ---
|
21 |
-
|
22 |
-
async def text_to_speech(text: str, voice_id: str, output_filename="audio.mp3"):
|
23 |
-
"""
|
24 |
-
Convierte texto a voz usando Microsoft Edge TTS.
|
25 |
-
Esta función es asíncrona.
|
26 |
-
"""
|
27 |
try:
|
28 |
-
|
29 |
-
|
30 |
return output_filename
|
31 |
except Exception as e:
|
32 |
-
raise Exception(f"Error al generar el audio con
|
33 |
|
34 |
def get_audio_duration(audio_path):
|
35 |
-
"""Obtiene la duración de un archivo de audio en segundos."""
|
36 |
if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
|
37 |
return 0
|
38 |
try:
|
@@ -42,10 +25,6 @@ def get_audio_duration(audio_path):
|
|
42 |
raise Exception(f"Error al obtener la duración del audio: {e}")
|
43 |
|
44 |
def process_image(img_path, target_width, target_height, output_folder, index):
|
45 |
-
"""
|
46 |
-
Procesa una imagen: la recorta para ajustarse a la relación de aspecto
|
47 |
-
y la redimensiona al tamaño del video final.
|
48 |
-
"""
|
49 |
try:
|
50 |
img = Image.open(img_path).convert("RGB")
|
51 |
original_width, original_height = img.size
|
@@ -66,14 +45,9 @@ def process_image(img_path, target_width, target_height, output_folder, index):
|
|
66 |
img.save(output_path)
|
67 |
return output_path
|
68 |
except Exception as e:
|
69 |
-
print(f"Error procesando imagen {img_path}: {e}")
|
70 |
return None
|
71 |
|
72 |
def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
|
73 |
-
"""
|
74 |
-
Crea un video a partir de imágenes con efecto Ken Burns (zoom y paneo),
|
75 |
-
repitiendo las imágenes si la duración del audio es mayor.
|
76 |
-
"""
|
77 |
if not processed_images:
|
78 |
raise ValueError("No hay imágenes procesadas para crear el video.")
|
79 |
|
@@ -89,15 +63,15 @@ def create_video_with_ken_burns(processed_images, audio_duration, fps, video_siz
|
|
89 |
input_commands = []
|
90 |
for img_path in processed_images * num_loops:
|
91 |
input_commands.extend(["-i", img_path])
|
92 |
-
|
93 |
for i in range(total_clips):
|
94 |
zoom = 1.2
|
95 |
-
filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d=
|
96 |
video_clips.append(f"[v{i}]")
|
97 |
|
98 |
concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
|
99 |
filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
|
100 |
-
|
101 |
command = ["ffmpeg", "-y"]
|
102 |
command.extend(input_commands)
|
103 |
command.extend([
|
@@ -108,127 +82,91 @@ def create_video_with_ken_burns(processed_images, audio_duration, fps, video_siz
|
|
108 |
"-pix_fmt", "yuv420p",
|
109 |
output_filename
|
110 |
])
|
111 |
-
|
112 |
-
print(f"DEBUG: FFmpeg Ken Burns command: {' '.join(command)}")
|
113 |
try:
|
114 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
115 |
except subprocess.CalledProcessError as e:
|
116 |
-
print(f"DEBUG: FFmpeg Ken Burns Error Stderr: {e.stderr}")
|
117 |
raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
|
118 |
|
119 |
def combine_video_and_audio(video_path, audio_path, output_path):
|
120 |
-
"""Combina un archivo de video y uno de audio."""
|
121 |
command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
|
122 |
try:
|
123 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
124 |
except subprocess.CalledProcessError as e:
|
125 |
-
print(f"DEBUG: FFmpeg Combine Error Stderr: {e.stderr}")
|
126 |
raise Exception(f"Error al combinar video y audio: {e.stderr}")
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
async def generate_tts_only(news_text_input, voice_name):
|
131 |
-
"""Genera solo el audio usando la voz seleccionada."""
|
132 |
if not news_text_input:
|
133 |
return "Por favor, escribe una noticia para generar el audio.", None
|
134 |
try:
|
135 |
-
|
136 |
-
audio_file = await text_to_speech(news_text_input, voice_id, "audio_temp_preview.mp3")
|
137 |
return "Audio generado con éxito.", audio_file
|
138 |
except Exception as e:
|
139 |
return f"Ocurrió un error al generar solo el audio: {e}", None
|
140 |
|
141 |
-
|
142 |
-
"""Orquesta la creación del video completo."""
|
143 |
processed_image_folder = "temp_processed_images"
|
144 |
final_output_video_path = "video_noticia_final.mp4"
|
145 |
temp_video_no_audio_path = "video_sin_audio.mp4"
|
146 |
temp_audio_file = "audio_para_video.mp3"
|
147 |
|
148 |
-
# Limpieza inicial
|
149 |
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
|
150 |
os.makedirs(processed_image_folder)
|
151 |
|
152 |
try:
|
153 |
if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
|
154 |
if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
|
155 |
-
|
156 |
-
# Generar audio si no se proporciona uno
|
157 |
if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
|
158 |
shutil.copy(input_audio_file, temp_audio_file)
|
159 |
else:
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
audio_duration = get_audio_duration(temp_audio_file)
|
164 |
-
if audio_duration == 0: raise ValueError("La duración del audio es cero
|
165 |
|
166 |
-
# Procesar imágenes
|
167 |
target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
|
168 |
processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
|
169 |
processed_images_paths = [p for p in processed_images_paths if p]
|
170 |
if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
|
171 |
|
172 |
-
# Crear video y combinar
|
173 |
create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
|
174 |
combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
|
175 |
|
176 |
return "Video generado con éxito.", final_output_video_path
|
177 |
|
178 |
except Exception as e:
|
179 |
-
print(f"ERROR: {e}")
|
180 |
return f"Ocurrió un error: {e}", None
|
181 |
finally:
|
182 |
-
# Limpieza final
|
183 |
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
|
184 |
if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
|
185 |
if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
|
186 |
if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
|
187 |
|
188 |
-
# ==============================================================================
|
189 |
-
# 3. DEFINICIÓN DE LA INTERFAZ DE GRADIO
|
190 |
-
# ==============================================================================
|
191 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
192 |
-
gr.Markdown(
|
193 |
-
"""
|
194 |
-
# 🎥 Creador de Videos de Noticias con Voz Profesional 🎙️
|
195 |
-
Escribe una noticia, elige una voz, sube tus imágenes y selecciona el formato para generar un video dinámico.
|
196 |
-
"""
|
197 |
-
)
|
198 |
with gr.Row():
|
199 |
with gr.Column(scale=2):
|
200 |
news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
|
201 |
-
|
202 |
-
|
203 |
-
label="2. Elige una Voz",
|
204 |
-
choices=list(VOICES.keys()),
|
205 |
-
value=list(VOICES.keys())[0], # Valor por defecto
|
206 |
-
interactive=True
|
207 |
-
)
|
208 |
-
|
209 |
-
image_upload = gr.File(label="3. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
|
210 |
-
video_ratio_dropdown = gr.Dropdown(label="4. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
|
211 |
-
|
212 |
with gr.Accordion("Opciones de Audio (Opcional)", open=False):
|
213 |
generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
|
214 |
audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
|
215 |
audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
|
216 |
-
|
217 |
generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
|
218 |
-
|
219 |
with gr.Column(scale=3):
|
220 |
output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
|
221 |
video_output = gr.Video(label="Video de la Noticia Generado")
|
222 |
|
223 |
generate_audio_button.click(
|
224 |
fn=generate_tts_only,
|
225 |
-
inputs=[news_input
|
226 |
outputs=[audio_status_message, audio_output_preview]
|
227 |
)
|
228 |
generate_video_button.click(
|
229 |
fn=create_news_video_app,
|
230 |
-
inputs=[news_input,
|
231 |
outputs=[output_message, video_output]
|
232 |
)
|
233 |
|
234 |
-
demo.launch(
|
|
|
1 |
import gradio as gr
|
2 |
+
from gtts import gTTS
|
3 |
import os
|
4 |
from PIL import Image
|
5 |
from pydub import AudioSegment
|
6 |
import subprocess
|
7 |
import shutil
|
8 |
import math
|
9 |
+
|
10 |
+
def text_to_speech(text: str, output_filename="audio.mp3"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
try:
|
12 |
+
tts = gTTS(text=text, lang='es')
|
13 |
+
tts.save(output_filename)
|
14 |
return output_filename
|
15 |
except Exception as e:
|
16 |
+
raise Exception(f"Error al generar el audio con gTTS: {e}")
|
17 |
|
18 |
def get_audio_duration(audio_path):
|
|
|
19 |
if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
|
20 |
return 0
|
21 |
try:
|
|
|
25 |
raise Exception(f"Error al obtener la duración del audio: {e}")
|
26 |
|
27 |
def process_image(img_path, target_width, target_height, output_folder, index):
|
|
|
|
|
|
|
|
|
28 |
try:
|
29 |
img = Image.open(img_path).convert("RGB")
|
30 |
original_width, original_height = img.size
|
|
|
45 |
img.save(output_path)
|
46 |
return output_path
|
47 |
except Exception as e:
|
|
|
48 |
return None
|
49 |
|
50 |
def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
|
|
|
|
|
|
|
|
|
51 |
if not processed_images:
|
52 |
raise ValueError("No hay imágenes procesadas para crear el video.")
|
53 |
|
|
|
63 |
input_commands = []
|
64 |
for img_path in processed_images * num_loops:
|
65 |
input_commands.extend(["-i", img_path])
|
66 |
+
|
67 |
for i in range(total_clips):
|
68 |
zoom = 1.2
|
69 |
+
filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
|
70 |
video_clips.append(f"[v{i}]")
|
71 |
|
72 |
concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
|
73 |
filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
|
74 |
+
|
75 |
command = ["ffmpeg", "-y"]
|
76 |
command.extend(input_commands)
|
77 |
command.extend([
|
|
|
82 |
"-pix_fmt", "yuv420p",
|
83 |
output_filename
|
84 |
])
|
|
|
|
|
85 |
try:
|
86 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
87 |
except subprocess.CalledProcessError as e:
|
|
|
88 |
raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
|
89 |
|
90 |
def combine_video_and_audio(video_path, audio_path, output_path):
|
|
|
91 |
command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
|
92 |
try:
|
93 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
94 |
except subprocess.CalledProcessError as e:
|
|
|
95 |
raise Exception(f"Error al combinar video y audio: {e.stderr}")
|
96 |
|
97 |
+
def generate_tts_only(news_text_input):
|
|
|
|
|
|
|
98 |
if not news_text_input:
|
99 |
return "Por favor, escribe una noticia para generar el audio.", None
|
100 |
try:
|
101 |
+
audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
|
|
|
102 |
return "Audio generado con éxito.", audio_file
|
103 |
except Exception as e:
|
104 |
return f"Ocurrió un error al generar solo el audio: {e}", None
|
105 |
|
106 |
+
def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
|
|
|
107 |
processed_image_folder = "temp_processed_images"
|
108 |
final_output_video_path = "video_noticia_final.mp4"
|
109 |
temp_video_no_audio_path = "video_sin_audio.mp4"
|
110 |
temp_audio_file = "audio_para_video.mp3"
|
111 |
|
|
|
112 |
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
|
113 |
os.makedirs(processed_image_folder)
|
114 |
|
115 |
try:
|
116 |
if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
|
117 |
if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
|
118 |
+
|
|
|
119 |
if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
|
120 |
shutil.copy(input_audio_file, temp_audio_file)
|
121 |
else:
|
122 |
+
text_to_speech(news_text_input, temp_audio_file)
|
123 |
+
|
|
|
124 |
audio_duration = get_audio_duration(temp_audio_file)
|
125 |
+
if audio_duration == 0: raise ValueError("La duración del audio es cero.")
|
126 |
|
|
|
127 |
target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
|
128 |
processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
|
129 |
processed_images_paths = [p for p in processed_images_paths if p]
|
130 |
if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
|
131 |
|
|
|
132 |
create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
|
133 |
combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
|
134 |
|
135 |
return "Video generado con éxito.", final_output_video_path
|
136 |
|
137 |
except Exception as e:
|
|
|
138 |
return f"Ocurrió un error: {e}", None
|
139 |
finally:
|
|
|
140 |
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
|
141 |
if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
|
142 |
if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
|
143 |
if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
|
144 |
|
|
|
|
|
|
|
145 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
146 |
+
gr.Markdown("# 🎥 Creador de Videos de Noticias")
|
|
|
|
|
|
|
|
|
|
|
147 |
with gr.Row():
|
148 |
with gr.Column(scale=2):
|
149 |
news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
|
150 |
+
image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
|
151 |
+
video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
with gr.Accordion("Opciones de Audio (Opcional)", open=False):
|
153 |
generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
|
154 |
audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
|
155 |
audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
|
|
|
156 |
generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
|
|
|
157 |
with gr.Column(scale=3):
|
158 |
output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
|
159 |
video_output = gr.Video(label="Video de la Noticia Generado")
|
160 |
|
161 |
generate_audio_button.click(
|
162 |
fn=generate_tts_only,
|
163 |
+
inputs=[news_input],
|
164 |
outputs=[audio_status_message, audio_output_preview]
|
165 |
)
|
166 |
generate_video_button.click(
|
167 |
fn=create_news_video_app,
|
168 |
+
inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
|
169 |
outputs=[output_message, video_output]
|
170 |
)
|
171 |
|
172 |
+
demo.launch()
|