news3 / app.py
salomonsky's picture
Update app.py
ffd7703 verified
raw
history blame
7.98 kB
import gradio as gr
from gtts import gTTS
import os
from PIL import Image
from pydub import AudioSegment
import subprocess
import shutil
import math
def text_to_speech(text: str, output_filename="audio.mp3"):
try:
tts = gTTS(text=text, lang='es')
tts.save(output_filename)
return output_filename
except Exception as e:
raise Exception(f"Error al generar el audio con gTTS: {e}")
def get_audio_duration(audio_path):
if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
return 0
try:
audio = AudioSegment.from_file(audio_path)
return audio.duration_seconds
except Exception as e:
raise Exception(f"Error al obtener la duraci贸n del audio: {e}")
def process_image(img_path, target_width, target_height, output_folder, index):
try:
img = Image.open(img_path).convert("RGB")
original_width, original_height = img.size
target_ratio = target_width / target_height
image_ratio = original_width / original_height
if image_ratio > target_ratio:
new_width = int(original_height * target_ratio)
left = (original_width - new_width) / 2
img = img.crop((left, 0, left + new_width, original_height))
elif image_ratio < target_ratio:
new_height = int(original_width / target_ratio)
top = (original_height - new_height) / 2
img = img.crop((0, top, original_width, top + new_height))
img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
output_path = os.path.join(output_folder, f"processed_image_{index:03d}.png")
img.save(output_path)
return output_path
except Exception as e:
return None
def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
if not processed_images:
raise ValueError("No hay im谩genes procesadas para crear el video.")
IMAGE_DURATION = 3
num_images = len(processed_images)
width, height = video_size
num_loops = math.ceil(audio_duration / (num_images * IMAGE_DURATION)) if (num_images * IMAGE_DURATION) > 0 else 1
filter_complex_chains = []
video_clips = []
total_clips = num_images * num_loops
input_commands = []
for img_path in processed_images * num_loops:
input_commands.extend(["-i", img_path])
for i in range(total_clips):
zoom = 1.2
filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
video_clips.append(f"[v{i}]")
concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
command = ["ffmpeg", "-y"]
command.extend(input_commands)
command.extend([
"-filter_complex", filter_complex,
"-map", "[v]",
"-t", str(audio_duration),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
output_filename
])
try:
subprocess.run(command, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
def combine_video_and_audio(video_path, audio_path, output_path):
command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
try:
subprocess.run(command, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
raise Exception(f"Error al combinar video y audio: {e.stderr}")
def generate_tts_only(news_text_input):
if not news_text_input:
return "Por favor, escribe una noticia para generar el audio.", None
try:
audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
return "Audio generado con 茅xito.", audio_file
except Exception as e:
return f"Ocurri贸 un error al generar solo el audio: {e}", None
def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
processed_image_folder = "temp_processed_images"
final_output_video_path = "video_noticia_final.mp4"
temp_video_no_audio_path = "video_sin_audio.mp4"
temp_audio_file = "audio_para_video.mp3"
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
os.makedirs(processed_image_folder)
try:
if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.")
if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
shutil.copy(input_audio_file, temp_audio_file)
else:
text_to_speech(news_text_input, temp_audio_file)
audio_duration = get_audio_duration(temp_audio_file)
if audio_duration == 0: raise ValueError("La duraci贸n del audio es cero.")
target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
processed_images_paths = [p for p in processed_images_paths if p]
if not processed_images_paths: raise ValueError("No se pudieron procesar las im谩genes.")
create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
return "Video generado con 茅xito.", final_output_video_path
except Exception as e:
return f"Ocurri贸 un error: {e}", None
finally:
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 馃帴 Creador de Videos de Noticias")
with gr.Row():
with gr.Column(scale=2):
news_input = gr.Textbox(label="1. Escribe tu noticia aqu铆", lines=5)
image_upload = gr.File(label="2. Sube tus im谩genes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
with gr.Accordion("Opciones de Audio (Opcional)", open=False):
generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
generate_video_button = gr.Button("馃幀 Generar Video Completo", variant="primary")
with gr.Column(scale=3):
output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
video_output = gr.Video(label="Video de la Noticia Generado")
generate_audio_button.click(
fn=generate_tts_only,
inputs=[news_input],
outputs=[audio_status_message, audio_output_preview]
)
generate_video_button.click(
fn=create_news_video_app,
inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
outputs=[output_message, video_output]
)
demo.launch()