Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,8 @@ import gradio as gr
|
|
9 |
import torch
|
10 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
11 |
from keybert import KeyBERT
|
12 |
-
|
|
|
13 |
import re
|
14 |
import math
|
15 |
import shutil
|
@@ -101,7 +102,7 @@ def generate_script(prompt, max_length=150):
|
|
101 |
logger.info(f"Generando guión | Prompt: '{prompt[:50]}...' | Longitud máxima: {max_length}")
|
102 |
if not tokenizer or not model:
|
103 |
logger.warning("Modelos GPT-2 no disponibles - Usando prompt original como guion.")
|
104 |
-
return prompt.strip()
|
105 |
|
106 |
# Frase de instrucción que se le da a la IA
|
107 |
instruction_phrase_start = "Escribe un guion corto, interesante y coherente sobre:"
|
@@ -342,7 +343,8 @@ def extract_visual_keywords_from_script(script_text):
|
|
342 |
|
343 |
logger.debug("Extrayendo palabras clave con método simple...")
|
344 |
words = clean_text.lower().split()
|
345 |
-
stop_words = {"el", "la", "los", "las", "de", "en", "y", "a", "que", "es", "un", "una", "con", "para", "del", "al", "por", "su", "sus", "se", "lo", "le", "me", "te", "nos", "os", "les", "mi", "tu",
|
|
|
346 |
"nuestros", "vuestros", "estas", "esas", "aquellas", "si", "no", "más", "menos", "sin", "sobre", "bajo", "entre", "hasta", "desde", "durante", "mediante", "según", "versus", "via", "cada", "todo", "todos", "toda", "todas", "poco", "pocos", "poca", "pocas", "mucho", "muchos", "mucha", "muchas", "varios", "varias", "otro", "otros", "otra", "otras", "mismo", "misma", "mismos", "mismas", "tan", "tanto", "tanta", "tantos", "tantas", "tal", "tales", "cual", "cuales", "cuyo", "cuya", "cuyos", "cuyas", "quien", "quienes", "cuan", "cuanto", "cuanta", "cuantos", "cuantas", "como", "donde", "cuando", "porque", "aunque", "mientras", "siempre", "nunca", "jamás", "muy", "casi", "solo", "solamente", "incluso", "apenas", "quizás", "tal vez", "acaso", "claro", "cierto", "obvio", "evidentemente", "realmente", "simplemente", "generalmente", "especialmente", "principalmente", "posiblemente", "probablemente", "difícilmente", "fácilmente", "rápidamente", "lentamente", "bien", "mal", "mejor", "peor", "arriba", "abajo", "adelante", "atrás", "cerca", "lejos", "dentro", "fuera", "encima", "debajo", "frente", "detrás", "antes", "después", "luego", "pronto", "tarde", "todavía", "ya", "aun", "aún", "quizá"}
|
347 |
|
348 |
valid_words = [word for word in words if len(word) > 3 and word not in stop_words]
|
@@ -371,8 +373,8 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
371 |
|
372 |
audio_tts_original = None
|
373 |
musica_audio_original = None
|
374 |
-
audio_tts = None
|
375 |
-
musica_audio = None
|
376 |
video_base = None
|
377 |
video_final = None
|
378 |
source_clips = []
|
@@ -398,7 +400,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
398 |
# 2. Generar audio de voz
|
399 |
logger.info("Generando audio de voz...")
|
400 |
voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
|
401 |
-
#
|
402 |
if not asyncio.run(text_to_speech(guion, voz_path, voice="es-ES-JuanNeural")):
|
403 |
logger.error("Fallo en generación de voz")
|
404 |
raise ValueError("Error generando voz a partir del guion.")
|
@@ -407,14 +409,14 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
407 |
audio_tts_original = AudioFileClip(voz_path)
|
408 |
|
409 |
if audio_tts_original.reader is None or audio_tts_original.duration is None or audio_tts_original.duration <= 0:
|
410 |
-
logger.critical("
|
411 |
try: audio_tts_original.close()
|
412 |
except: pass
|
413 |
audio_tts_original = None
|
414 |
-
raise ValueError("
|
415 |
|
416 |
-
audio_tts = audio_tts_original
|
417 |
-
audio_duration = audio_tts_original.duration
|
418 |
logger.info(f"Duración audio voz: {audio_duration:.2f} segundos")
|
419 |
|
420 |
if audio_duration < 1.0:
|
@@ -517,7 +519,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
517 |
source_clips.append(clip)
|
518 |
|
519 |
if clip.reader is None or clip.duration is None or clip.duration <= 0:
|
520 |
-
logger.warning(f"[{i+1}/{len(video_paths)}] Clip fuente {path} parece inválido (reader is None o
|
521 |
continue
|
522 |
|
523 |
remaining_needed = audio_duration - current_duration
|
@@ -588,8 +590,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
588 |
num_full_repeats = int(audio_duration // final_video_base.duration)
|
589 |
remaining_duration = audio_duration % final_video_base.duration
|
590 |
|
591 |
-
repeated_clips_list = [final_video_base] * num_full_repeats
|
592 |
-
|
593 |
if remaining_duration > 0:
|
594 |
try:
|
595 |
remaining_clip = final_video_base.subclip(0, remaining_duration)
|
@@ -627,7 +628,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
627 |
finally:
|
628 |
if 'repeated_clips_list' in locals():
|
629 |
for clip in repeated_clips_list:
|
630 |
-
if clip is not final_video_base:
|
631 |
try: clip.close()
|
632 |
except: pass
|
633 |
|
@@ -685,7 +686,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
685 |
except: pass
|
686 |
musica_audio_original = None
|
687 |
else:
|
688 |
-
#
|
689 |
musica_audio_looped = loop_audio_to_length(musica_audio_original, video_base.duration)
|
690 |
logger.debug(f"Música ajustada a duración del video: {musica_audio_looped.duration:.2f}s")
|
691 |
|
@@ -697,11 +698,10 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
697 |
|
698 |
|
699 |
if musica_audio_looped:
|
700 |
-
#
|
701 |
-
# CAMBIO: Usar volumex(1.0) para la voz (ya estaba en 1.0, confirmamos)
|
702 |
composite_audio = CompositeAudioClip([
|
703 |
-
musica_audio_looped.volumex(0.2),
|
704 |
-
audio_tts_original.volumex(1.0)
|
705 |
])
|
706 |
|
707 |
if composite_audio.duration is None or composite_audio.duration <= 0:
|
@@ -712,7 +712,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
712 |
else:
|
713 |
logger.info("Mezcla de audio completada (voz + música).")
|
714 |
final_audio = composite_audio
|
715 |
-
musica_audio = musica_audio_looped
|
716 |
|
717 |
except Exception as e:
|
718 |
logger.warning(f"Error procesando música de fondo: {str(e)}", exc_info=True)
|
@@ -726,7 +726,7 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
726 |
try:
|
727 |
if final_audio.duration > video_base.duration:
|
728 |
trimmed_final_audio = final_audio.subclip(0, video_base.duration)
|
729 |
-
if trimmed_final_audio
|
730 |
logger.warning("Audio final recortado es inválido. Usando audio final original.")
|
731 |
try: trimmed_final_audio.close()
|
732 |
except: pass
|
@@ -789,50 +789,44 @@ def crear_video(prompt_type, input_text, musica_file=None):
|
|
789 |
except Exception as e:
|
790 |
logger.warning(f"Error cerrando segmento de video en finally: {str(e)}")
|
791 |
|
792 |
-
# Cerrar
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
except Exception as e:
|
799 |
-
logger.warning(f"Error cerrando musica_audio (procesada) en finally: {str(e)}")
|
800 |
-
|
801 |
-
if musica_audio_original is not None and musica_audio_original is not musica_audio:
|
802 |
-
try:
|
803 |
-
musica_audio_original.close()
|
804 |
-
except Exception as e:
|
805 |
-
logger.warning(f"Error cerrando musica_audio_original en finally: {str(e)}")
|
806 |
-
|
807 |
-
# audio_tts currently only holds audio_tts_original, but keep structure
|
808 |
-
if audio_tts is not None and audio_tts is not audio_tts_original:
|
809 |
-
try:
|
810 |
-
audio_tts.close()
|
811 |
-
except Exception as e:
|
812 |
-
logger.warning(f"Error cerrando audio_tts (procesada) en finally: {str(e)}")
|
813 |
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
|
|
|
|
|
|
|
|
|
|
|
|
|
820 |
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
logger.warning(f"Error cerrando video_final en finally: {str(e)}")
|
827 |
-
elif video_base is not None and video_base is not video_final: # Asegurarse de que video_base no es el mismo objeto que video_final
|
828 |
-
try:
|
829 |
-
video_base.close()
|
830 |
-
except Exception as e:
|
831 |
-
logger.warning(f"Error cerrando video_base en finally: {str(e)}")
|
832 |
|
833 |
|
834 |
-
|
835 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
836 |
|
837 |
# Limpiar archivos intermedios, pero NO el archivo de video final
|
838 |
if temp_dir_intermediate and os.path.exists(temp_dir_intermediate):
|
|
|
9 |
import torch
|
10 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
11 |
from keybert import KeyBERT
|
12 |
+
# CORRECCIÓN CRÍTICA: Eliminar 'concatenate_videoclip' (singular) de la importación
|
13 |
+
from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeAudioClip, concatenate_audioclips, AudioClip
|
14 |
import re
|
15 |
import math
|
16 |
import shutil
|
|
|
102 |
logger.info(f"Generando guión | Prompt: '{prompt[:50]}...' | Longitud máxima: {max_length}")
|
103 |
if not tokenizer or not model:
|
104 |
logger.warning("Modelos GPT-2 no disponibles - Usando prompt original como guion.")
|
105 |
+
return prompt.strip() # Return original prompt clean
|
106 |
|
107 |
# Frase de instrucción que se le da a la IA
|
108 |
instruction_phrase_start = "Escribe un guion corto, interesante y coherente sobre:"
|
|
|
343 |
|
344 |
logger.debug("Extrayendo palabras clave con método simple...")
|
345 |
words = clean_text.lower().split()
|
346 |
+
stop_words = {"el", "la", "los", "las", "de", "en", "y", "a", "que", "es", "un", "una", "con", "para", "del", "al", "por", "su", "sus", "se", "lo", "le", "me", "te", "nos", "os", "les", "mi", "tu", # Añadir stop words aquí si es necesario, la lista estaba incompleta en la versión anterior
|
347 |
+
"nuestro", "vuestro", "este", "ese", "aquel", "esta", "esa", "aquella", "esto", "eso", "aquello", "mis", "tus",
|
348 |
"nuestros", "vuestros", "estas", "esas", "aquellas", "si", "no", "más", "menos", "sin", "sobre", "bajo", "entre", "hasta", "desde", "durante", "mediante", "según", "versus", "via", "cada", "todo", "todos", "toda", "todas", "poco", "pocos", "poca", "pocas", "mucho", "muchos", "mucha", "muchas", "varios", "varias", "otro", "otros", "otra", "otras", "mismo", "misma", "mismos", "mismas", "tan", "tanto", "tanta", "tantos", "tantas", "tal", "tales", "cual", "cuales", "cuyo", "cuya", "cuyos", "cuyas", "quien", "quienes", "cuan", "cuanto", "cuanta", "cuantos", "cuantas", "como", "donde", "cuando", "porque", "aunque", "mientras", "siempre", "nunca", "jamás", "muy", "casi", "solo", "solamente", "incluso", "apenas", "quizás", "tal vez", "acaso", "claro", "cierto", "obvio", "evidentemente", "realmente", "simplemente", "generalmente", "especialmente", "principalmente", "posiblemente", "probablemente", "difícilmente", "fácilmente", "rápidamente", "lentamente", "bien", "mal", "mejor", "peor", "arriba", "abajo", "adelante", "atrás", "cerca", "lejos", "dentro", "fuera", "encima", "debajo", "frente", "detrás", "antes", "después", "luego", "pronto", "tarde", "todavía", "ya", "aun", "aún", "quizá"}
|
349 |
|
350 |
valid_words = [word for word in words if len(word) > 3 and word not in stop_words]
|
|
|
373 |
|
374 |
audio_tts_original = None
|
375 |
musica_audio_original = None
|
376 |
+
audio_tts = None
|
377 |
+
musica_audio = None
|
378 |
video_base = None
|
379 |
video_final = None
|
380 |
source_clips = []
|
|
|
400 |
# 2. Generar audio de voz
|
401 |
logger.info("Generando audio de voz...")
|
402 |
voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
|
403 |
+
# Usar voz de Juan
|
404 |
if not asyncio.run(text_to_speech(guion, voz_path, voice="es-ES-JuanNeural")):
|
405 |
logger.error("Fallo en generación de voz")
|
406 |
raise ValueError("Error generando voz a partir del guion.")
|
|
|
409 |
audio_tts_original = AudioFileClip(voz_path)
|
410 |
|
411 |
if audio_tts_original.reader is None or audio_tts_original.duration is None or audio_tts_original.duration <= 0:
|
412 |
+
logger.critical("Clip de audio TTS inicial es inválido (reader is None o duración <= 0).")
|
413 |
try: audio_tts_original.close()
|
414 |
except: pass
|
415 |
audio_tts_original = None
|
416 |
+
raise ValueError("Audio de voz generado es inválido.")
|
417 |
|
418 |
+
audio_tts = audio_tts_original # Usar el clip TTS válido original para la mezcla
|
419 |
+
audio_duration = audio_tts_original.duration # Usar duración original para la longitud del video
|
420 |
logger.info(f"Duración audio voz: {audio_duration:.2f} segundos")
|
421 |
|
422 |
if audio_duration < 1.0:
|
|
|
519 |
source_clips.append(clip)
|
520 |
|
521 |
if clip.reader is None or clip.duration is None or clip.duration <= 0:
|
522 |
+
logger.warning(f"[{i+1}/{len(video_paths)}] Clip fuente {path} parece inválido (reader is None o duración <= 0). Saltando.")
|
523 |
continue
|
524 |
|
525 |
remaining_needed = audio_duration - current_duration
|
|
|
590 |
num_full_repeats = int(audio_duration // final_video_base.duration)
|
591 |
remaining_duration = audio_duration % final_video_base.duration
|
592 |
|
593 |
+
repeated_clips_list = [final_video_base] * num_full_repeats # Lista contiene duplicados del mismo objeto clip
|
|
|
594 |
if remaining_duration > 0:
|
595 |
try:
|
596 |
remaining_clip = final_video_base.subclip(0, remaining_duration)
|
|
|
628 |
finally:
|
629 |
if 'repeated_clips_list' in locals():
|
630 |
for clip in repeated_clips_list:
|
631 |
+
if clip is not final_video_base: # No cerrar si es el clip final
|
632 |
try: clip.close()
|
633 |
except: pass
|
634 |
|
|
|
686 |
except: pass
|
687 |
musica_audio_original = None
|
688 |
else:
|
689 |
+
# Usar la duración correcta del video base para loopear la música
|
690 |
musica_audio_looped = loop_audio_to_length(musica_audio_original, video_base.duration)
|
691 |
logger.debug(f"Música ajustada a duración del video: {musica_audio_looped.duration:.2f}s")
|
692 |
|
|
|
698 |
|
699 |
|
700 |
if musica_audio_looped:
|
701 |
+
# Usar la música loopeada y el audio TTS original para la composición
|
|
|
702 |
composite_audio = CompositeAudioClip([
|
703 |
+
musica_audio_looped.volumex(0.2), # Volumen 20% para música
|
704 |
+
audio_tts_original.volumex(1.0) # Volumen 100% para voz
|
705 |
])
|
706 |
|
707 |
if composite_audio.duration is None or composite_audio.duration <= 0:
|
|
|
712 |
else:
|
713 |
logger.info("Mezcla de audio completada (voz + música).")
|
714 |
final_audio = composite_audio
|
715 |
+
musica_audio = musica_audio_looped # Asignar para limpieza
|
716 |
|
717 |
except Exception as e:
|
718 |
logger.warning(f"Error procesando música de fondo: {str(e)}", exc_info=True)
|
|
|
726 |
try:
|
727 |
if final_audio.duration > video_base.duration:
|
728 |
trimmed_final_audio = final_audio.subclip(0, video_base.duration)
|
729 |
+
if trimmed_final_audio is None or trimmed_final_audio.duration <= 0:
|
730 |
logger.warning("Audio final recortado es inválido. Usando audio final original.")
|
731 |
try: trimmed_final_audio.close()
|
732 |
except: pass
|
|
|
789 |
except Exception as e:
|
790 |
logger.warning(f"Error cerrando segmento de video en finally: {str(e)}")
|
791 |
|
792 |
+
# Cerrar clips de audio en orden: música loopeada, música original (si es diferente), TTS original
|
793 |
+
if musica_audio is not None: # musica_audio holds the potentially looped clip
|
794 |
+
try:
|
795 |
+
musica_audio.close()
|
796 |
+
except Exception as e:
|
797 |
+
logger.warning(f"Error cerrando musica_audio (procesada) en finally: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
798 |
|
799 |
+
if musica_audio_original is not None and musica_audio_original is not musica_audio:
|
800 |
+
try:
|
801 |
+
musica_audio_original.close()
|
802 |
+
except Exception as e:
|
803 |
+
logger.warning(f"Error cerrando musica_audio_original en finally: {str(e)}")
|
804 |
|
805 |
+
# audio_tts actualmente solo contiene audio_tts_original, pero se mantiene la estructura
|
806 |
+
if audio_tts is not None and audio_tts is not audio_tts_original:
|
807 |
+
try:
|
808 |
+
audio_tts.close()
|
809 |
+
except Exception as e:
|
810 |
+
logger.warning(f"Error cerrando audio_tts (procesada) en finally: {str(e)}")
|
811 |
|
812 |
+
if audio_tts_original is not None:
|
813 |
+
try:
|
814 |
+
audio_tts_original.close()
|
815 |
+
except Exception as e:
|
816 |
+
logger.warning(f"Error cerrando audio_tts_original en finally: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
817 |
|
818 |
|
819 |
+
# Cerrar clips de video en orden: video_final (debería cerrar sus componentes), luego video_base (si es diferente de video_final)
|
820 |
+
if video_final is not None:
|
821 |
+
try:
|
822 |
+
video_final.close()
|
823 |
+
except Exception as e:
|
824 |
+
logger.warning(f"Error cerrando video_final en finally: {str(e)}")
|
825 |
+
elif video_base is not None and video_base is not video_final: # Asegurarse de que video_base no es el mismo objeto que video_final
|
826 |
+
try:
|
827 |
+
video_base.close()
|
828 |
+
except Exception as e:
|
829 |
+
logger.warning(f"Error cerrando video_base en finally: {str(e)}")
|
830 |
|
831 |
# Limpiar archivos intermedios, pero NO el archivo de video final
|
832 |
if temp_dir_intermediate and os.path.exists(temp_dir_intermediate):
|