File size: 4,610 Bytes
43fcbe8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import requests
import edge_tts
import gradio as gr
from moviepy.editor import *
from moviepy.editor import AudioFileClip, CompositeVideoClip, concatenate_videoclips, ImageClip, TextClip
from moviepy.video.fx.all import resize, scroll
from PIL import Image
import numpy as np
import io
import asyncio

# 1. Descargar imágenes/videos de stock (Pexels)
def get_stock_media(query, is_video=False):
    API_KEY = os.getenv("PEXELS_API_KEY")  # Configura esto en HF Secrets
    url = f"https://api.pexels.com/v1/{'videos' if is_video else 'photos'}/search?query={query}&per_page=1"
    headers = {"Authorization": API_KEY}
    response = requests.get(url, headers=headers).json()
    if is_video:
        video_url = response["videos"][0]["video_files"][0]["link"]
        return requests.get(video_url).content
    else:
        image_url = response["photos"][0]["src"]["large"]
        return Image.open(io.BytesIO(requests.get(image_url).content))

# 2. Generar voz con Edge TTS (todos los modelos)
async def generate_voice(text, voice="es-ES-AlvaroNeural", output_path="voice.mp3"):
    communicate = edge_tts.Communicate(text=text, voice=voice)
    await communicate.save(output_path)

# 3. Añadir música de fondo (en loop si es necesario)
def add_background_music(audio_clip, music_path="background.mp3", volume=0.2):
    music = AudioFileClip(music_path).volumex(volume)
    if music.duration < audio_clip.duration:
        music = music.loop(duration=audio_clip.duration)
    return CompositeAudioClip([audio_clip, music.set_start(0)])

# 4. Efectos de movimiento/zoom para imágenes
def apply_effects(clip, zoom_factor=1.1, effect_duration=2):
    return clip.resize(zoom_factor).set_position('center').fx(vfx.scroll, h=100, w=100)

# 5. Crear subtítulos dinámicos (karaoke-style)
def generate_subtitles(text, duration, fontsize=30, color="white", stroke_color="black"):
    words = text.split()
    word_duration = duration / len(words)
    clips = []
    for i, word in enumerate(words):
        txt_clip = TextClip(
            " ".join(words[:i+1]),
            fontsize=fontsize,
            color=color,
            stroke_color=stroke_color,
            font="Arial-Bold",
        ).set_start(i * word_duration).set_duration(word_duration)
        clips.append(txt_clip)
    return concatenate_videoclips(clips).set_position(("center", "bottom"))

# 6. Función principal para generar el video
async def generate_video(script, voice_model, music_file=None):
    clips = []
    for i, scene in enumerate(script):
        # Descargar imagen y generar voz
        img = get_stock_media(scene["prompt"])
        img.save(f"scene_{i}.jpg")
        await generate_voice(scene["text"], voice_model, f"voice_{i}.mp3")
        
        # Crear clip con efectos
        audio = AudioFileClip(f"voice_{i}.mp3")
        clip = ImageClip(f"scene_{i}.jpg").set_duration(audio.duration)
        clip = apply_effects(clip)  # Efecto de zoom/movimiento
        
        # Subtítulos dinámicos
        subtitles = generate_subtitles(scene["text"], audio.duration)
        final_clip = CompositeVideoClip([clip, subtitles]).set_audio(audio)
        clips.append(final_clip)
    
    # Unir clips y añadir música
    final_video = concatenate_videoclips(clips)
    if music_file:
        final_video.audio = add_background_music(final_video.audio, music_file.name)
    
    # Exportar
    output_path = "final_video.mp4"
    final_video.write_videofile(output_path, fps=24, codec="libx264")
    return output_path

# 7. Interfaz Gradio
def ui(script, voice_model, music_file):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    output_video = loop.run_until_complete(generate_video(script, voice_model, music_file))
    return output_video

# Lista de voces Edge TTS (ejemplo)
voices = ["es-ES-AlvaroNeural", "es-MX-DaliaNeural", "en-US-JennyNeural", "fr-FR-HenriNeural"]

# Diseño de la interfaz
with gr.Blocks() as demo:
    gr.Markdown("## 🎬 Generador de Videos con IA (100% Gratis)")
    with gr.Row():
        script_input = gr.Textbox(label="Script (JSON)", placeholder='[{"prompt": "futuristic city", "text": "Texto aquí..."}]')
        voice_dropdown = gr.Dropdown(choices=voices, label="Voz Edge TTS", value="es-ES-AlvaroNeural")
        music_upload = gr.File(label="Música de fondo (opcional)", type="file")
    generate_btn = gr.Button("Generar Video")
    output_video = gr.Video(label="Video Generado")
    
    generate_btn.click(
        fn=ui,
        inputs=[script_input, voice_dropdown, music_upload],
        outputs=output_video,
    )

demo.launch()