salomonsky commited on
Commit
ffd7703
·
verified ·
1 Parent(s): 80d9746

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -84
app.py CHANGED
@@ -1,38 +1,21 @@
1
  import gradio as gr
 
2
  import os
3
  from PIL import Image
4
  from pydub import AudioSegment
5
  import subprocess
6
  import shutil
7
  import math
8
- import asyncio
9
- import edge_tts
10
-
11
- # --- Diccionario de Voces Disponibles ---
12
- # Formato: "Nombre para mostrar": "ID de la voz en Edge TTS"
13
- VOICES = {
14
- "Jorge (México - Masculino)": "es-MX-JorgeForTTS",
15
- "Dalia (México - Femenino)": "es-MX-DaliaForTTS",
16
- "Alvaro (España - Masculino)": "es-ES-AlvaroForTTS",
17
- "Elvira (España - Femenino)": "es-ES-ElviraForTTS",
18
- }
19
-
20
- # --- Funciones Auxiliares ---
21
-
22
- async def text_to_speech(text: str, voice_id: str, output_filename="audio.mp3"):
23
- """
24
- Convierte texto a voz usando Microsoft Edge TTS.
25
- Esta función es asíncrona.
26
- """
27
  try:
28
- communicate = edge_tts.Communicate(text, voice_id)
29
- await communicate.save(output_filename)
30
  return output_filename
31
  except Exception as e:
32
- raise Exception(f"Error al generar el audio con Edge TTS: {e}")
33
 
34
  def get_audio_duration(audio_path):
35
- """Obtiene la duración de un archivo de audio en segundos."""
36
  if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
37
  return 0
38
  try:
@@ -42,10 +25,6 @@ def get_audio_duration(audio_path):
42
  raise Exception(f"Error al obtener la duración del audio: {e}")
43
 
44
  def process_image(img_path, target_width, target_height, output_folder, index):
45
- """
46
- Procesa una imagen: la recorta para ajustarse a la relación de aspecto
47
- y la redimensiona al tamaño del video final.
48
- """
49
  try:
50
  img = Image.open(img_path).convert("RGB")
51
  original_width, original_height = img.size
@@ -66,14 +45,9 @@ def process_image(img_path, target_width, target_height, output_folder, index):
66
  img.save(output_path)
67
  return output_path
68
  except Exception as e:
69
- print(f"Error procesando imagen {img_path}: {e}")
70
  return None
71
 
72
  def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
73
- """
74
- Crea un video a partir de imágenes con efecto Ken Burns (zoom y paneo),
75
- repitiendo las imágenes si la duración del audio es mayor.
76
- """
77
  if not processed_images:
78
  raise ValueError("No hay imágenes procesadas para crear el video.")
79
 
@@ -89,15 +63,15 @@ def create_video_with_ken_burns(processed_images, audio_duration, fps, video_siz
89
  input_commands = []
90
  for img_path in processed_images * num_loops:
91
  input_commands.extend(["-i", img_path])
92
-
93
  for i in range(total_clips):
94
  zoom = 1.2
95
- filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d=1:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
96
  video_clips.append(f"[v{i}]")
97
 
98
  concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
99
  filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
100
-
101
  command = ["ffmpeg", "-y"]
102
  command.extend(input_commands)
103
  command.extend([
@@ -108,127 +82,91 @@ def create_video_with_ken_burns(processed_images, audio_duration, fps, video_siz
108
  "-pix_fmt", "yuv420p",
109
  output_filename
110
  ])
111
-
112
- print(f"DEBUG: FFmpeg Ken Burns command: {' '.join(command)}")
113
  try:
114
  subprocess.run(command, check=True, capture_output=True, text=True)
115
  except subprocess.CalledProcessError as e:
116
- print(f"DEBUG: FFmpeg Ken Burns Error Stderr: {e.stderr}")
117
  raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
118
 
119
  def combine_video_and_audio(video_path, audio_path, output_path):
120
- """Combina un archivo de video y uno de audio."""
121
  command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
122
  try:
123
  subprocess.run(command, check=True, capture_output=True, text=True)
124
  except subprocess.CalledProcessError as e:
125
- print(f"DEBUG: FFmpeg Combine Error Stderr: {e.stderr}")
126
  raise Exception(f"Error al combinar video y audio: {e.stderr}")
127
 
128
- # --- Funciones Principales de Gradio (Ahora Asíncronas) ---
129
-
130
- async def generate_tts_only(news_text_input, voice_name):
131
- """Genera solo el audio usando la voz seleccionada."""
132
  if not news_text_input:
133
  return "Por favor, escribe una noticia para generar el audio.", None
134
  try:
135
- voice_id = VOICES[voice_name]
136
- audio_file = await text_to_speech(news_text_input, voice_id, "audio_temp_preview.mp3")
137
  return "Audio generado con éxito.", audio_file
138
  except Exception as e:
139
  return f"Ocurrió un error al generar solo el audio: {e}", None
140
 
141
- async def create_news_video_app(news_text_input, voice_name, image_files, video_ratio, input_audio_file):
142
- """Orquesta la creación del video completo."""
143
  processed_image_folder = "temp_processed_images"
144
  final_output_video_path = "video_noticia_final.mp4"
145
  temp_video_no_audio_path = "video_sin_audio.mp4"
146
  temp_audio_file = "audio_para_video.mp3"
147
 
148
- # Limpieza inicial
149
  if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
150
  os.makedirs(processed_image_folder)
151
 
152
  try:
153
  if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
154
  if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
155
-
156
- # Generar audio si no se proporciona uno
157
  if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
158
  shutil.copy(input_audio_file, temp_audio_file)
159
  else:
160
- voice_id = VOICES[voice_name]
161
- await text_to_speech(news_text_input, voice_id, temp_audio_file)
162
-
163
  audio_duration = get_audio_duration(temp_audio_file)
164
- if audio_duration == 0: raise ValueError("La duración del audio es cero, no se puede generar el video.")
165
 
166
- # Procesar imágenes
167
  target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
168
  processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
169
  processed_images_paths = [p for p in processed_images_paths if p]
170
  if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
171
 
172
- # Crear video y combinar
173
  create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
174
  combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
175
 
176
  return "Video generado con éxito.", final_output_video_path
177
 
178
  except Exception as e:
179
- print(f"ERROR: {e}")
180
  return f"Ocurrió un error: {e}", None
181
  finally:
182
- # Limpieza final
183
  if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
184
  if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
185
  if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
186
  if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
187
 
188
- # ==============================================================================
189
- # 3. DEFINICIÓN DE LA INTERFAZ DE GRADIO
190
- # ==============================================================================
191
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
192
- gr.Markdown(
193
- """
194
- # 🎥 Creador de Videos de Noticias con Voz Profesional 🎙️
195
- Escribe una noticia, elige una voz, sube tus imágenes y selecciona el formato para generar un video dinámico.
196
- """
197
- )
198
  with gr.Row():
199
  with gr.Column(scale=2):
200
  news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
201
-
202
- voice_selector = gr.Dropdown(
203
- label="2. Elige una Voz",
204
- choices=list(VOICES.keys()),
205
- value=list(VOICES.keys())[0], # Valor por defecto
206
- interactive=True
207
- )
208
-
209
- image_upload = gr.File(label="3. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
210
- video_ratio_dropdown = gr.Dropdown(label="4. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
211
-
212
  with gr.Accordion("Opciones de Audio (Opcional)", open=False):
213
  generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
214
  audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
215
  audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
216
-
217
  generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
218
-
219
  with gr.Column(scale=3):
220
  output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
221
  video_output = gr.Video(label="Video de la Noticia Generado")
222
 
223
  generate_audio_button.click(
224
  fn=generate_tts_only,
225
- inputs=[news_input, voice_selector],
226
  outputs=[audio_status_message, audio_output_preview]
227
  )
228
  generate_video_button.click(
229
  fn=create_news_video_app,
230
- inputs=[news_input, voice_selector, image_upload, video_ratio_dropdown, audio_output_preview],
231
  outputs=[output_message, video_output]
232
  )
233
 
234
- demo.launch(share=True, debug=True)
 
1
  import gradio as gr
2
+ from gtts import gTTS
3
  import os
4
  from PIL import Image
5
  from pydub import AudioSegment
6
  import subprocess
7
  import shutil
8
  import math
9
+
10
+ def text_to_speech(text: str, output_filename="audio.mp3"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
+ tts = gTTS(text=text, lang='es')
13
+ tts.save(output_filename)
14
  return output_filename
15
  except Exception as e:
16
+ raise Exception(f"Error al generar el audio con gTTS: {e}")
17
 
18
  def get_audio_duration(audio_path):
 
19
  if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
20
  return 0
21
  try:
 
25
  raise Exception(f"Error al obtener la duración del audio: {e}")
26
 
27
  def process_image(img_path, target_width, target_height, output_folder, index):
 
 
 
 
28
  try:
29
  img = Image.open(img_path).convert("RGB")
30
  original_width, original_height = img.size
 
45
  img.save(output_path)
46
  return output_path
47
  except Exception as e:
 
48
  return None
49
 
50
  def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
 
 
 
 
51
  if not processed_images:
52
  raise ValueError("No hay imágenes procesadas para crear el video.")
53
 
 
63
  input_commands = []
64
  for img_path in processed_images * num_loops:
65
  input_commands.extend(["-i", img_path])
66
+
67
  for i in range(total_clips):
68
  zoom = 1.2
69
+ filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
70
  video_clips.append(f"[v{i}]")
71
 
72
  concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
73
  filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
74
+
75
  command = ["ffmpeg", "-y"]
76
  command.extend(input_commands)
77
  command.extend([
 
82
  "-pix_fmt", "yuv420p",
83
  output_filename
84
  ])
 
 
85
  try:
86
  subprocess.run(command, check=True, capture_output=True, text=True)
87
  except subprocess.CalledProcessError as e:
 
88
  raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
89
 
90
  def combine_video_and_audio(video_path, audio_path, output_path):
 
91
  command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
92
  try:
93
  subprocess.run(command, check=True, capture_output=True, text=True)
94
  except subprocess.CalledProcessError as e:
 
95
  raise Exception(f"Error al combinar video y audio: {e.stderr}")
96
 
97
+ def generate_tts_only(news_text_input):
 
 
 
98
  if not news_text_input:
99
  return "Por favor, escribe una noticia para generar el audio.", None
100
  try:
101
+ audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
 
102
  return "Audio generado con éxito.", audio_file
103
  except Exception as e:
104
  return f"Ocurrió un error al generar solo el audio: {e}", None
105
 
106
+ def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
 
107
  processed_image_folder = "temp_processed_images"
108
  final_output_video_path = "video_noticia_final.mp4"
109
  temp_video_no_audio_path = "video_sin_audio.mp4"
110
  temp_audio_file = "audio_para_video.mp3"
111
 
 
112
  if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
113
  os.makedirs(processed_image_folder)
114
 
115
  try:
116
  if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
117
  if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
118
+
 
119
  if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
120
  shutil.copy(input_audio_file, temp_audio_file)
121
  else:
122
+ text_to_speech(news_text_input, temp_audio_file)
123
+
 
124
  audio_duration = get_audio_duration(temp_audio_file)
125
+ if audio_duration == 0: raise ValueError("La duración del audio es cero.")
126
 
 
127
  target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
128
  processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
129
  processed_images_paths = [p for p in processed_images_paths if p]
130
  if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
131
 
 
132
  create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
133
  combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
134
 
135
  return "Video generado con éxito.", final_output_video_path
136
 
137
  except Exception as e:
 
138
  return f"Ocurrió un error: {e}", None
139
  finally:
 
140
  if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
141
  if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
142
  if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
143
  if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
144
 
 
 
 
145
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
146
+ gr.Markdown("# 🎥 Creador de Videos de Noticias")
 
 
 
 
 
147
  with gr.Row():
148
  with gr.Column(scale=2):
149
  news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
150
+ image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
151
+ video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
 
 
 
 
 
 
 
 
 
152
  with gr.Accordion("Opciones de Audio (Opcional)", open=False):
153
  generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
154
  audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
155
  audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
 
156
  generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
 
157
  with gr.Column(scale=3):
158
  output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
159
  video_output = gr.Video(label="Video de la Noticia Generado")
160
 
161
  generate_audio_button.click(
162
  fn=generate_tts_only,
163
+ inputs=[news_input],
164
  outputs=[audio_status_message, audio_output_preview]
165
  )
166
  generate_video_button.click(
167
  fn=create_news_video_app,
168
+ inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
169
  outputs=[output_message, video_output]
170
  )
171
 
172
+ demo.launch()