Spaces:

salomonsky
/

news3

Sleeping

App Files Files Community

salomonsky commited on Jun 22

Commit

ffd7703

verified ·

1 Parent(s): 80d9746

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -84

app.py CHANGED Viewed

@@ -1,38 +1,21 @@
 import gradio as gr
 import os
 from PIL import Image
 from pydub import AudioSegment
 import subprocess
 import shutil
 import math
-import asyncio
-import edge_tts
-# --- Diccionario de Voces Disponibles ---
-# Formato: "Nombre para mostrar": "ID de la voz en Edge TTS"
-VOICES = {
-    "Jorge (México - Masculino)": "es-MX-JorgeForTTS",
-    "Dalia (México - Femenino)": "es-MX-DaliaForTTS",
-    "Alvaro (España - Masculino)": "es-ES-AlvaroForTTS",
-    "Elvira (España - Femenino)": "es-ES-ElviraForTTS",
-}
-# --- Funciones Auxiliares ---
-async def text_to_speech(text: str, voice_id: str, output_filename="audio.mp3"):
-    """
-    Convierte texto a voz usando Microsoft Edge TTS.
-    Esta función es asíncrona.
-    """
     try:
-        communicate = edge_tts.Communicate(text, voice_id)
-        await communicate.save(output_filename)
         return output_filename
     except Exception as e:
-        raise Exception(f"Error al generar el audio con Edge TTS: {e}")
 def get_audio_duration(audio_path):
-    """Obtiene la duración de un archivo de audio en segundos."""
     if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
         return 0
     try:
@@ -42,10 +25,6 @@ def get_audio_duration(audio_path):
         raise Exception(f"Error al obtener la duración del audio: {e}")
 def process_image(img_path, target_width, target_height, output_folder, index):
-    """
-    Procesa una imagen: la recorta para ajustarse a la relación de aspecto
-    y la redimensiona al tamaño del video final.
-    """
     try:
         img = Image.open(img_path).convert("RGB")
         original_width, original_height = img.size
@@ -66,14 +45,9 @@ def process_image(img_path, target_width, target_height, output_folder, index):
         img.save(output_path)
         return output_path
     except Exception as e:
-        print(f"Error procesando imagen {img_path}: {e}")
         return None
 def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
-    """
-    Crea un video a partir de imágenes con efecto Ken Burns (zoom y paneo),
-    repitiendo las imágenes si la duración del audio es mayor.
-    """
     if not processed_images:
         raise ValueError("No hay imágenes procesadas para crear el video.")
@@ -89,15 +63,15 @@ def create_video_with_ken_burns(processed_images, audio_duration, fps, video_siz
     input_commands = []
     for img_path in processed_images * num_loops:
         input_commands.extend(["-i", img_path])
     for i in range(total_clips):
         zoom = 1.2
-        filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d=1:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
         video_clips.append(f"[v{i}]")
     concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
     filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
     command = ["ffmpeg", "-y"]
     command.extend(input_commands)
     command.extend([
@@ -108,127 +82,91 @@ def create_video_with_ken_burns(processed_images, audio_duration, fps, video_siz
         "-pix_fmt", "yuv420p",
         output_filename
     ])
-    print(f"DEBUG: FFmpeg Ken Burns command: {' '.join(command)}")
     try:
         subprocess.run(command, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
-        print(f"DEBUG: FFmpeg Ken Burns Error Stderr: {e.stderr}")
         raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
 def combine_video_and_audio(video_path, audio_path, output_path):
-    """Combina un archivo de video y uno de audio."""
     command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
     try:
         subprocess.run(command, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
-        print(f"DEBUG: FFmpeg Combine Error Stderr: {e.stderr}")
         raise Exception(f"Error al combinar video y audio: {e.stderr}")
-# --- Funciones Principales de Gradio (Ahora Asíncronas) ---
-async def generate_tts_only(news_text_input, voice_name):
-    """Genera solo el audio usando la voz seleccionada."""
     if not news_text_input:
         return "Por favor, escribe una noticia para generar el audio.", None
     try:
-        voice_id = VOICES[voice_name]
-        audio_file = await text_to_speech(news_text_input, voice_id, "audio_temp_preview.mp3")
         return "Audio generado con éxito.", audio_file
     except Exception as e:
         return f"Ocurrió un error al generar solo el audio: {e}", None
-async def create_news_video_app(news_text_input, voice_name, image_files, video_ratio, input_audio_file):
-    """Orquesta la creación del video completo."""
     processed_image_folder = "temp_processed_images"
     final_output_video_path = "video_noticia_final.mp4"
     temp_video_no_audio_path = "video_sin_audio.mp4"
     temp_audio_file = "audio_para_video.mp3"
-    # Limpieza inicial
     if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
     os.makedirs(processed_image_folder)
     try:
         if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
         if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
-        # Generar audio si no se proporciona uno
         if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
             shutil.copy(input_audio_file, temp_audio_file)
         else:
-            voice_id = VOICES[voice_name]
-            await text_to_speech(news_text_input, voice_id, temp_audio_file)
         audio_duration = get_audio_duration(temp_audio_file)
-        if audio_duration == 0: raise ValueError("La duración del audio es cero, no se puede generar el video.")
-        # Procesar imágenes
         target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
         processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
         processed_images_paths = [p for p in processed_images_paths if p]
         if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
-        # Crear video y combinar
         create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
         combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
         return "Video generado con éxito.", final_output_video_path
     except Exception as e:
-        print(f"ERROR: {e}")
         return f"Ocurrió un error: {e}", None
     finally:
-        # Limpieza final
         if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
         if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
         if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
         if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
-# ==============================================================================
-# 3. DEFINICIÓN DE LA INTERFAZ DE GRADIO
-# ==============================================================================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        # 🎥 Creador de Videos de Noticias con Voz Profesional 🎙️
-        Escribe una noticia, elige una voz, sube tus imágenes y selecciona el formato para generar un video dinámico.
-        """
-    )
     with gr.Row():
         with gr.Column(scale=2):
             news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
-            voice_selector = gr.Dropdown(
-                label="2. Elige una Voz",
-                choices=list(VOICES.keys()),
-                value=list(VOICES.keys())[0], # Valor por defecto
-                interactive=True
-            )
-            image_upload = gr.File(label="3. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
-            video_ratio_dropdown = gr.Dropdown(label="4. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
             with gr.Accordion("Opciones de Audio (Opcional)", open=False):
                 generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
                 audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
                 audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
             generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
         with gr.Column(scale=3):
             output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
             video_output = gr.Video(label="Video de la Noticia Generado")
     generate_audio_button.click(
         fn=generate_tts_only,
-        inputs=[news_input, voice_selector],
         outputs=[audio_status_message, audio_output_preview]
     )
     generate_video_button.click(
         fn=create_news_video_app,
-        inputs=[news_input, voice_selector, image_upload, video_ratio_dropdown, audio_output_preview],
         outputs=[output_message, video_output]
     )
-demo.launch(share=True, debug=True)

 import gradio as gr
+from gtts import gTTS
 import os
 from PIL import Image
 from pydub import AudioSegment
 import subprocess
 import shutil
 import math
+def text_to_speech(text: str, output_filename="audio.mp3"):
     try:
+        tts = gTTS(text=text, lang='es')
+        tts.save(output_filename)
         return output_filename
     except Exception as e:
+        raise Exception(f"Error al generar el audio con gTTS: {e}")
 def get_audio_duration(audio_path):
     if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
         return 0
     try:
         raise Exception(f"Error al obtener la duración del audio: {e}")
 def process_image(img_path, target_width, target_height, output_folder, index):
     try:
         img = Image.open(img_path).convert("RGB")
         original_width, original_height = img.size
         img.save(output_path)
         return output_path
     except Exception as e:
         return None
 def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
     if not processed_images:
         raise ValueError("No hay imágenes procesadas para crear el video.")
     input_commands = []
     for img_path in processed_images * num_loops:
         input_commands.extend(["-i", img_path])
     for i in range(total_clips):
         zoom = 1.2
+        filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
         video_clips.append(f"[v{i}]")
     concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
     filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
     command = ["ffmpeg", "-y"]
     command.extend(input_commands)
     command.extend([
         "-pix_fmt", "yuv420p",
         output_filename
     ])
     try:
         subprocess.run(command, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
 def combine_video_and_audio(video_path, audio_path, output_path):
     command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
     try:
         subprocess.run(command, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
         raise Exception(f"Error al combinar video y audio: {e.stderr}")
+def generate_tts_only(news_text_input):
     if not news_text_input:
         return "Por favor, escribe una noticia para generar el audio.", None
     try:
+        audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
         return "Audio generado con éxito.", audio_file
     except Exception as e:
         return f"Ocurrió un error al generar solo el audio: {e}", None
+def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
     processed_image_folder = "temp_processed_images"
     final_output_video_path = "video_noticia_final.mp4"
     temp_video_no_audio_path = "video_sin_audio.mp4"
     temp_audio_file = "audio_para_video.mp3"
     if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
     os.makedirs(processed_image_folder)
     try:
         if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
         if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
         if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
             shutil.copy(input_audio_file, temp_audio_file)
         else:
+            text_to_speech(news_text_input, temp_audio_file)
         audio_duration = get_audio_duration(temp_audio_file)
+        if audio_duration == 0: raise ValueError("La duración del audio es cero.")
         target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
         processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
         processed_images_paths = [p for p in processed_images_paths if p]
         if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
         create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
         combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
         return "Video generado con éxito.", final_output_video_path
     except Exception as e:
         return f"Ocurrió un error: {e}", None
     finally:
         if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
         if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
         if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
         if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎥 Creador de Videos de Noticias")
     with gr.Row():
         with gr.Column(scale=2):
             news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
+            image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
+            video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
             with gr.Accordion("Opciones de Audio (Opcional)", open=False):
                 generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
                 audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
                 audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
             generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
         with gr.Column(scale=3):
             output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
             video_output = gr.Video(label="Video de la Noticia Generado")
     generate_audio_button.click(
         fn=generate_tts_only,
+        inputs=[news_input],
         outputs=[audio_status_message, audio_output_preview]
     )
     generate_video_button.click(
         fn=create_news_video_app,
+        inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
         outputs=[output_message, video_output]
     )
+demo.launch()