File size: 12,236 Bytes
8b274aa
15e8c2d
bafc5cd
15e8c2d
b8bd6c3
6bc8144
15e8c2d
 
6bc8144
 
 
 
3a7d955
6bc8144
 
 
cdba26b
 
b8bd6c3
0b39b27
b8bd6c3
b82e6a6
1829fd6
0b39b27
2b5730b
6bc8144
cdba26b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b5730b
cdba26b
6bc8144
 
 
 
 
cdba26b
6bc8144
2b5730b
cdba26b
1829fd6
6bc8144
cdba26b
 
6bc8144
2b5730b
6bc8144
b8bd6c3
cdba26b
 
6bc8144
cdba26b
2b5730b
6bc8144
cdba26b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b39b27
cdba26b
 
 
 
0b39b27
6bc8144
0b39b27
cdba26b
6bc8144
cdba26b
 
6bc8144
 
 
2b5730b
6bc8144
cdba26b
2b5730b
6bc8144
cdba26b
0b39b27
 
 
 
 
cdba26b
 
 
 
0b39b27
 
cdba26b
0b39b27
 
 
 
 
cdba26b
0b39b27
 
 
 
 
 
cdba26b
0b39b27
cdba26b
 
 
 
0b39b27
cdba26b
 
0b39b27
cdba26b
 
 
 
 
0b39b27
cdba26b
 
 
0b39b27
cdba26b
 
 
 
2b5730b
cdba26b
 
6bc8144
cdba26b
 
 
0b39b27
cdba26b
 
0b39b27
cdba26b
 
0b39b27
 
 
 
cdba26b
 
 
 
 
 
 
 
 
0b39b27
cdba26b
3a7d955
cdba26b
 
 
0b39b27
cdba26b
50a2015
cdba26b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b39b27
cdba26b
 
 
 
 
 
 
 
0b39b27
50a2015
cdba26b
 
 
 
0b39b27
 
cdba26b
 
 
0b39b27
cdba26b
 
 
0b39b27
50a2015
cdba26b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b39b27
2b5730b
0b39b27
 
6bc8144
 
0b39b27
cdba26b
6bc8144
0b39b27
 
cdba26b
163c0da
cdba26b
 
0b39b27
cdba26b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b39b27
cdba26b
 
0b39b27
cdba26b
0b39b27
cdba26b
b82e6a6
cdba26b
 
 
0b39b27
cdba26b
0b39b27
 
 
 
 
 
 
 
cdba26b
0b39b27
15763cd
cdba26b
0b39b27
 
 
 
cdba26b
15763cd
cdba26b
0b39b27
 
cdba26b
 
 
0b39b27
 
cdba26b
6bc8144
 
cdba26b
0b39b27
6bc8144
cdba26b
0b39b27
cdba26b
 
0b39b27
 
 
 
cdba26b
0b39b27
cdba26b
0b39b27
 
 
cdba26b
0b39b27
 
 
163c0da
15e8c2d
2b5730b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
import os
import asyncio
import logging
import tempfile
import requests
from datetime import datetime
import edge_tts
import gradio as gr
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from keybert import KeyBERT
from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip
import subprocess
import re
import math
from pydub import AudioSegment
from collections import Counter
import shutil

# Configuración de logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Clave API de Pexels
PEXELS_API_KEY = os.environ.get("PEXELS_API_KEY")

# Buscar videos en Pexels usando API REST
def buscar_videos_pexels(query, api_key, per_page=5):
    headers = {"Authorization": api_key}
    try:
        response = requests.get(
            "https://api.pexels.com/videos/search",
            headers=headers,
            params={"query": query, "per_page": per_page, "orientation": "landscape"},
            timeout=15
        )
        response.raise_for_status()
        return response.json().get("videos", [])
    except Exception as e:
        logger.error(f"Error buscando videos en Pexels: {e}")
        return []

# Inicialización de modelos
MODEL_NAME = "datificate/gpt2-small-spanish"  # Modelo en español
try:
    tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
    model = GPT2LMHeadModel.from_pretrained(MODEL_NAME).eval()
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    logger.info("Modelo GPT-2 en español cargado")
except Exception as e:
    logger.error(f"Error al cargar modelo GPT-2: {e}")
    tokenizer = model = None

try:
    kw_model = KeyBERT('distilbert-base-multilingual-cased')  # Modelo multilingüe
    logger.info("KeyBERT cargado")
except Exception as e:
    logger.error(f"Error al cargar KeyBERT: {e}")
    kw_model = None

# Función mejorada para generar guiones
def generate_script(prompt, max_length=150): 
    if not tokenizer or not model:
        return prompt  # Fallback al prompt original
    
    try:
        # Prompt mejorado con instrucciones claras
        enhanced_prompt = f"Escribe un guion corto y coherente sobre: {prompt}"
        
        inputs = tokenizer(enhanced_prompt, return_tensors="pt", truncation=True, max_length=512)
        
        # Parámetros optimizados para español
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            do_sample=True,
            top_p=0.9,
            top_k=40,
            temperature=0.7,
            repetition_penalty=1.5,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
        text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Limpiar texto generado
        text = re.sub(r'<[^>]+>', '', text)  # Eliminar tokens especiales
        text = text.split(".")[0] + "."  # Tomar la primera oración coherente
        return text
    except Exception as e:
        logger.error(f"Error generando guion: {e}")
        return prompt  # Fallback al prompt original

# Generación de voz
async def text_to_speech(text, output_path, voice="es-ES-ElviraNeural"):
    try:
        communicate = edge_tts.Communicate(text, voice)
        await communicate.save(output_path)
        return True
    except Exception as e:
        logger.error(f"Error en TTS: {e}")
        return False

# Descarga de videos
def download_video_file(url, temp_dir): 
    if not url:
        return None
    
    try:
        response = requests.get(url, stream=True, timeout=30)
        file_name = f"video_{datetime.now().strftime('%H%M%S%f')}.mp4"
        output_path = os.path.join(temp_dir, file_name)
        
        with open(output_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        return output_path
    except Exception as e:
        logger.error(f"Error descargando video: {e}")
        return None

# Loop para audio
def loop_audio_to_length(audio_clip, target_duration):
    if audio_clip.duration >= target_duration:
        return audio_clip.subclip(0, target_duration)
    
    loops = int(target_duration / audio_clip.duration) + 1
    audios = [audio_clip] * loops
    return concatenate_videoclips(audios).subclip(0, target_duration)

# Extracción de palabras clave robusta
def extract_visual_keywords_from_script(script_text):
    # Limpiar texto
    clean_text = re.sub(r'[^\w\sáéíóúñ]', '', script_text.lower())
    
    # Método 1: KeyBERT si está disponible
    if kw_model:
        try:
            keywords = kw_model.extract_keywords(
                clean_text,
                keyphrase_ngram_range=(1, 1),
                stop_words='spanish',
                top_n=3
            )
            return [kw[0].replace(" ", "+") for kw in keywords]
        except:
            pass  # Fallback al método simple
    
    # Método 2: Frecuencia de palabras (fallback)
    words = clean_text.split()
    stop_words = {"el", "la", "los", "las", "de", "en", "y", "a", "que", "es", "un", "una", "con"}
    keywords = [word for word in words if len(word) > 3 and word not in stop_words]
    
    if not keywords:
        return ["naturaleza"]  # Palabra clave por defecto
    
    # Contar frecuencia y seleccionar las 3 más comunes
    word_counts = Counter(keywords)
    return [word.replace(" ", "+") for word, _ in word_counts.most_common(3)]

# Función principal para crear video
def crear_video(prompt_type, input_text, musica_file=None):
    logger.info(f"Iniciando creación de video: {prompt_type}")
    
    # 1. Generar o usar guion
    if prompt_type == "Generar Guion con IA":
        guion = generate_script(input_text)
    else: 
        guion = input_text
    
    logger.info(f"Guion: {guion[:100]}...")
    
    # Validar guion
    if not guion.strip():
        raise ValueError("El guion está vacío")
    
    # Directorio temporal
    temp_dir = tempfile.mkdtemp()
    temp_files = []
    
    try:
        # 2. Generar audio de voz
        voz_path = os.path.join(temp_dir, "voz.mp3")
        if not asyncio.run(text_to_speech(guion, voz_path)):
            raise ValueError("Error generando voz")
        temp_files.append(voz_path)
        
        audio_tts = AudioFileClip(voz_path)
        audio_duration = audio_tts.duration
        
        # 3. Extraer palabras clave
        keywords = extract_visual_keywords_from_script(guion)
        logger.info(f"Palabras clave: {keywords}")
        
        # 4. Buscar y descargar videos
        videos_data = []
        for keyword in keywords:
            videos_data.extend(buscar_videos_pexels(keyword, PEXELS_API_KEY, per_page=2))
        
        video_paths = []
        for video in videos_data:
            best_quality = max(video['video_files'], key=lambda x: x['width'] * x['height'])
            path = download_video_file(best_quality['link'], temp_dir)
            if path:
                video_paths.append(path)
                temp_files.append(path)
        
        if not video_paths:
            raise ValueError("No se encontraron videos adecuados")
        
        # 5. Procesar videos
        clips = []
        current_duration = 0
        
        for path in video_paths:
            if current_duration >= audio_duration:
                break
                
            try:
                clip = VideoFileClip(path)
                usable_duration = min(clip.duration, 10)
                clips.append(clip.subclip(0, usable_duration))
                current_duration += usable_duration
            except Exception as e:
                logger.warning(f"Error procesando video: {e}")
        
        if not clips:
            raise ValueError("No hay clips válidos")
        
        video_base = concatenate_videoclips(clips, method="compose")
        
        # 6. Manejar música de fondo
        final_audio = audio_tts
        
        if musica_file:
            try:
                # Convertir el archivo de música a formato utilizable
                music_path = os.path.join(temp_dir, "musica.mp3")
                shutil.copyfile(musica_file, music_path)
                temp_files.append(music_path)
                
                musica_audio = AudioFileClip(music_path)
                musica_loop = loop_audio_to_length(musica_audio, audio_duration)
                
                final_audio = CompositeAudioClip([
                    musica_loop.volumex(0.3),
                    audio_tts.volumex(1.0)
                ])
            except Exception as e:
                logger.warning(f"Error procesando música: {e}")
        
        # 7. Crear video final
        video_final = video_base.set_audio(final_audio).subclip(0, audio_duration)
        
        output_path = os.path.join(temp_dir, "final_video.mp4")
        video_final.write_videofile(
            output_path,
            fps=24,
            threads=4,
            codec="libx264",
            audio_codec="aac",
            preset="medium",
            logger=None
        )
        
        return output_path
        
    except Exception as e:
        logger.error(f"Error creando video: {e}")
        raise
    finally:
        # Limpieza
        for path in temp_files:
            try:
                if os.path.isfile(path):
                    os.remove(path)
            except:
                pass
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir, ignore_errors=True)

# Función para ejecutar la aplicación
def run_app(prompt_type, prompt_ia, prompt_manual, musica_file):
    input_text = prompt_ia if prompt_type == "Generar Guion con IA" else prompt_manual
    
    if not input_text.strip():
        return None, "Por favor ingresa texto"
    
    try:
        video_path = crear_video(prompt_type, input_text, musica_file)
        return video_path, "✅ Video generado exitosamente"
    except ValueError as ve:
        return None, f"⚠️ Error: {ve}"
    except Exception as e:
        return None, f"❌ Error crítico: {str(e)}"

# Interfaz de Gradio
with gr.Blocks(title="Generador de Videos con IA", theme="soft") as app:
    gr.Markdown("## 🎬 Generador Automático de Videos con IA")
    
    with gr.Tab("Generador de Video"):
        with gr.Row():
            prompt_type = gr.Radio(
                ["Generar Guion con IA", "Usar Mi Guion"], 
                label="Método",
                value="Generar Guion con IA"
            )
        
        with gr.Column(visible=True) as ia_guion_column:
            prompt_ia = gr.Textbox(
                label="Tema para IA", 
                lines=2,
                placeholder="Ej: Un paisaje natural con montañas y ríos..."
            )
        
        with gr.Column(visible=False) as manual_guion_column:
            prompt_manual = gr.Textbox(
                label="Tu Guion Completo", 
                lines=5,
                placeholder="Ej: En este video exploraremos los misterios del océano..."
            )
        
        musica_input = gr.Audio(
            label="Música de fondo (opcional)",
            type="filepath"
        )
        
        boton = gr.Button("✨ Generar Video", variant="primary")
        
        with gr.Column():
            salida_video = gr.Video(label="Video Generado", interactive=False)
            estado_mensaje = gr.Textbox(label="Estado", interactive=False)

    # Manejar visibilidad de columnas
    prompt_type.change(
        lambda x: (gr.update(visible=x == "Generar Guion con IA"), 
                   gr.update(visible=x == "Usar Mi Guion")),
        inputs=prompt_type,
        outputs=[ia_guion_column, manual_guion_column]
    )

    # Lógica de generación
    boton.click(
        lambda: (None, "⏳ Procesando... (puede tardar varios minutos)"),
        outputs=[salida_video, estado_mensaje],
        queue=False
    ).then(
        run_app,
        inputs=[prompt_type, prompt_ia, prompt_manual, musica_input],
        outputs=[salida_video, estado_mensaje]
    )

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860)