import os import requests import edge_tts import gradio as gr from moviepy.editor import * from moviepy.video.fx.all import resize, scroll from PIL import Image import io import asyncio import json # 1. Descargar imágenes/videos de stock (Pexels) def get_stock_media(query, is_video=False): API_KEY = os.getenv("PEXELS_API_KEY") # Configurar en HF Secrets if not API_KEY: raise ValueError("¡Falta la API Key de Pexels! Añádela en los Secrets de Hugging Face.") url = f"https://api.pexels.com/v1/{'videos' if is_video else 'photos'}/search?query={query}&per_page=1" headers = {"Authorization": API_KEY} response = requests.get(url, headers=headers).json() if is_video: video_url = response["videos"][0]["video_files"][0]["link"] return requests.get(video_url).content else: image_url = response["photos"][0]["src"]["large"] return Image.open(io.BytesIO(requests.get(image_url).content)) # 2. Generar voz con Edge TTS async def generate_voice(text, voice="es-ES-AlvaroNeural", output_path="voice.mp3"): communicate = edge_tts.Communicate(text=text, voice=voice) await communicate.save(output_path) # 3. Añadir música de fondo (20% volumen, loop automático) def add_background_music(audio_clip, music_path=None, volume=0.2): if not music_path: return audio_clip music = AudioFileClip(music_path).volumex(volume) if music.duration < audio_clip.duration: music = music.loop(duration=audio_clip.duration) return CompositeAudioClip([audio_clip, music.set_start(0)]) # 4. Efectos de movimiento/zoom para imágenes def apply_effects(clip, zoom_factor=1.05, effect_duration=2): return clip.resize(zoom_factor).set_position('center').fx(scroll, h=50, w=50) # 5. Subtítulos dinámicos (aparecen progresivamente) def generate_subtitles(text, duration, fontsize=30, color="white", stroke_color="black"): words = text.split() word_duration = duration / max(len(words), 1) # Evitar división por cero clips = [] for i, word in enumerate(words): txt_clip = TextClip( " ".join(words[:i+1]), fontsize=fontsize, color=color, stroke_color=stroke_color, font="Arial-Bold", size=(None, None), method="caption" ).set_start(i * word_duration).set_duration(word_duration) clips.append(txt_clip.set_position(("center", "bottom"))) return concatenate_videoclips(clips) # 6. Función principal async def generate_video(script_json, voice_model, music_file=None): try: script = json.loads(script_json) except json.JSONDecodeError: raise gr.Error("¡Formato de script inválido! Usa JSON como en el ejemplo.") clips = [] for i, scene in enumerate(script): img = get_stock_media(scene["prompt"]) img_path = f"scene_{i}.jpg" img.save(img_path) voice_path = f"voice_{i}.mp3" await generate_voice(scene["text"], voice_model, voice_path) audio = AudioFileClip(voice_path) clip = ImageClip(img_path).set_duration(audio.duration) clip = apply_effects(clip) subtitles = generate_subtitles(scene["text"], audio.duration) final_clip = CompositeVideoClip([clip, subtitles]).set_audio(audio) clips.append(final_clip) final_video = concatenate_videoclips(clips) if music_file: final_video.audio = add_background_music(final_video.audio, music_file) output_path = "final_video.mp4" final_video.write_videofile(output_path, fps=24, codec="libx264", threads=4) return output_path # 7. Interfaz Gradio def ui(script_json, voice_model, music_file=None): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: output_video = loop.run_until_complete(generate_video(script_json, voice_model, music_file)) except Exception as e: raise gr.Error(f"Error: {str(e)}") finally: loop.close() return output_video # Voces Edge TTS (puedes añadir más) voices = list(edge_tts.list_voices()) with gr.Blocks(title="Generador de Videos con IA") as demo: gr.Markdown("## 🎥 Generador de Videos con IA (Gratis)") with gr.Row(): script_input = gr.Textbox( label="Script (JSON)", placeholder='[{"prompt": "paisaje", "text": "Texto aquí..."}]', lines=5 ) with gr.Row(): voice_dropdown = gr.Dropdown(choices=voices, label="Voz", value="es-ES-AlvaroNeural") music_upload = gr.File(label="Música de fondo (opcional)", type="filepath") generate_btn = gr.Button("Generar Video") output_video = gr.Video(label="Resultado", format="mp4") gr.Examples( examples=[[ '[{"prompt": "ciudad futurista", "text": "Bienvenidos al futuro."}]', "es-ES-AlvaroNeural", None ]], inputs=[script_input, voice_dropdown, music_upload], outputs=output_video ) generate_btn.click( fn=ui, inputs=[script_input, voice_dropdown, music_upload], outputs=output_video, ) demo.launch(debug=True)