Spaces:
Running
Running
| import gradio as gr | |
| from gtts import gTTS | |
| import os | |
| from PIL import Image | |
| from pydub import AudioSegment | |
| import subprocess | |
| import shutil | |
| import math | |
| def text_to_speech(text: str, output_filename="audio.mp3"): | |
| try: | |
| tts = gTTS(text=text, lang='es') | |
| tts.save(output_filename) | |
| return output_filename | |
| except Exception as e: | |
| raise Exception(f"Error al generar el audio con gTTS: {e}") | |
| def get_audio_duration(audio_path): | |
| if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0: | |
| return 0 | |
| try: | |
| audio = AudioSegment.from_file(audio_path) | |
| return audio.duration_seconds | |
| except Exception as e: | |
| raise Exception(f"Error al obtener la duraci贸n del audio: {e}") | |
| def process_image(img_path, target_width, target_height, output_folder, index): | |
| try: | |
| img = Image.open(img_path).convert("RGB") | |
| original_width, original_height = img.size | |
| target_ratio = target_width / target_height | |
| image_ratio = original_width / original_height | |
| if image_ratio > target_ratio: | |
| new_width = int(original_height * target_ratio) | |
| left = (original_width - new_width) / 2 | |
| img = img.crop((left, 0, left + new_width, original_height)) | |
| elif image_ratio < target_ratio: | |
| new_height = int(original_width / target_ratio) | |
| top = (original_height - new_height) / 2 | |
| img = img.crop((0, top, original_width, top + new_height)) | |
| img = img.resize((target_width, target_height), Image.Resampling.LANCZOS) | |
| output_path = os.path.join(output_folder, f"processed_image_{index:03d}.png") | |
| img.save(output_path) | |
| return output_path | |
| except Exception as e: | |
| return None | |
| def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename): | |
| if not processed_images: | |
| raise ValueError("No hay im谩genes procesadas para crear el video.") | |
| IMAGE_DURATION = 3 | |
| num_images = len(processed_images) | |
| width, height = video_size | |
| num_loops = math.ceil(audio_duration / (num_images * IMAGE_DURATION)) if (num_images * IMAGE_DURATION) > 0 else 1 | |
| filter_complex_chains = [] | |
| video_clips = [] | |
| total_clips = num_images * num_loops | |
| input_commands = [] | |
| for img_path in processed_images * num_loops: | |
| input_commands.extend(["-i", img_path]) | |
| for i in range(total_clips): | |
| zoom = 1.2 | |
| filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]") | |
| video_clips.append(f"[v{i}]") | |
| concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]" | |
| filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter | |
| command = ["ffmpeg", "-y"] | |
| command.extend(input_commands) | |
| command.extend([ | |
| "-filter_complex", filter_complex, | |
| "-map", "[v]", | |
| "-t", str(audio_duration), | |
| "-c:v", "libx264", | |
| "-pix_fmt", "yuv420p", | |
| output_filename | |
| ]) | |
| try: | |
| subprocess.run(command, check=True, capture_output=True, text=True) | |
| except subprocess.CalledProcessError as e: | |
| raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}") | |
| def combine_video_and_audio(video_path, audio_path, output_path): | |
| command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path] | |
| try: | |
| subprocess.run(command, check=True, capture_output=True, text=True) | |
| except subprocess.CalledProcessError as e: | |
| raise Exception(f"Error al combinar video y audio: {e.stderr}") | |
| def generate_tts_only(news_text_input): | |
| if not news_text_input: | |
| return "Por favor, escribe una noticia para generar el audio.", None | |
| try: | |
| audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3") | |
| return "Audio generado con 茅xito.", audio_file | |
| except Exception as e: | |
| return f"Ocurri贸 un error al generar solo el audio: {e}", None | |
| def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file): | |
| processed_image_folder = "temp_processed_images" | |
| final_output_video_path = "video_noticia_final.mp4" | |
| temp_video_no_audio_path = "video_sin_audio.mp4" | |
| temp_audio_file = "audio_para_video.mp3" | |
| if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder) | |
| os.makedirs(processed_image_folder) | |
| try: | |
| if not image_files: raise ValueError("Por favor, sube al menos una imagen.") | |
| if not news_text_input and not input_audio_file: raise ValueError("Escribe una noticia o genera el audio primero.") | |
| if input_audio_file and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0: | |
| shutil.copy(input_audio_file, temp_audio_file) | |
| else: | |
| text_to_speech(news_text_input, temp_audio_file) | |
| audio_duration = get_audio_duration(temp_audio_file) | |
| if audio_duration == 0: raise ValueError("La duraci贸n del audio es cero.") | |
| target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720) | |
| processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)] | |
| processed_images_paths = [p for p in processed_images_paths if p] | |
| if not processed_images_paths: raise ValueError("No se pudieron procesar las im谩genes.") | |
| create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path) | |
| combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path) | |
| return "Video generado con 茅xito.", final_output_video_path | |
| except Exception as e: | |
| return f"Ocurri贸 un error: {e}", None | |
| finally: | |
| if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder) | |
| if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path) | |
| if os.path.exists(temp_audio_file): os.remove(temp_audio_file) | |
| if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3") | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 馃帴 Creador de Videos de Noticias") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| news_input = gr.Textbox(label="1. Escribe tu noticia aqu铆", lines=5) | |
| image_upload = gr.File(label="2. Sube tus im谩genes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"]) | |
| video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True) | |
| with gr.Accordion("Opciones de Audio (Opcional)", open=False): | |
| generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)") | |
| audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False) | |
| audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False) | |
| generate_video_button = gr.Button("馃幀 Generar Video Completo", variant="primary") | |
| with gr.Column(scale=3): | |
| output_message = gr.Textbox(label="Estado del Proceso", interactive=False) | |
| video_output = gr.Video(label="Video de la Noticia Generado") | |
| generate_audio_button.click( | |
| fn=generate_tts_only, | |
| inputs=[news_input], | |
| outputs=[audio_status_message, audio_output_preview] | |
| ) | |
| generate_video_button.click( | |
| fn=create_news_video_app, | |
| inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview], | |
| outputs=[output_message, video_output] | |
| ) | |
| demo.launch() | |