Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,133 +2,243 @@ import os
|
|
2 |
import asyncio
|
3 |
import logging
|
4 |
import tempfile
|
5 |
-
import
|
6 |
-
|
7 |
-
from
|
|
|
8 |
import edge_tts
|
9 |
import gradio as gr
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
19 |
-
model = GPT2LMHeadModel.from_pretrained("gpt2").eval()
|
20 |
-
|
21 |
-
def generate_script(prompt, max_length=300):
|
22 |
-
logger.info("Generando guion...")
|
23 |
-
inputs = tokenizer(prompt, return_tensors="pt", truncation=False)
|
24 |
-
with torch.no_grad():
|
25 |
-
outputs = model.generate(
|
26 |
-
**inputs,
|
27 |
-
max_length=max_length,
|
28 |
-
do_sample=True,
|
29 |
-
top_p=0.95,
|
30 |
-
top_k=60,
|
31 |
-
temperature=0.9,
|
32 |
-
pad_token_id=tokenizer.eos_token_id
|
33 |
-
)
|
34 |
-
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
35 |
-
logger.info(f"Guion generado, longitud: {len(text)} caracteres")
|
36 |
-
return text
|
37 |
-
|
38 |
-
async def text_to_speech(text, voice="es-ES-ElviraNeural", output_path="voz.mp3"):
|
39 |
-
logger.info("Generando audio TTS...")
|
40 |
-
communicate = edge_tts.Communicate(text, voice)
|
41 |
-
await communicate.save(output_path)
|
42 |
-
logger.info(f"Audio guardado en {output_path}")
|
43 |
-
|
44 |
-
def download_video_sample(url):
|
45 |
-
logger.info(f"Descargando video de ejemplo: {url}")
|
46 |
-
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
47 |
-
response = requests.get(url, stream=True)
|
48 |
-
for chunk in response.iter_content(chunk_size=1024*1024):
|
49 |
-
tmp.write(chunk)
|
50 |
-
tmp.close()
|
51 |
-
return tmp.name
|
52 |
-
|
53 |
-
def loop_audio_to_length(audio_clip, target_duration):
|
54 |
-
if audio_clip.duration >= target_duration:
|
55 |
-
return audio_clip.subclip(0, target_duration)
|
56 |
-
loops = int(target_duration // audio_clip.duration) + 1
|
57 |
-
audios = [audio_clip] * loops
|
58 |
-
concatenated = concatenate_videoclips(audios, method="compose")
|
59 |
-
return concatenated.subclip(0, target_duration)
|
60 |
-
|
61 |
-
def crear_video(prompt, musica_url=None):
|
62 |
-
# 1. Generar guion
|
63 |
-
guion = generate_script(prompt, max_length=300)
|
64 |
-
|
65 |
-
# 2. TTS
|
66 |
-
voz_archivo = "voz.mp3"
|
67 |
-
asyncio.run(text_to_speech(guion, output_path=voz_archivo))
|
68 |
-
|
69 |
-
# 3. Descargar videos de ejemplo (puedes reemplazar por tu búsqueda real)
|
70 |
-
# Aquí pongo 3 clips de ejemplo (deberías poner tus URLs)
|
71 |
-
video_urls = [
|
72 |
-
"https://sample-videos.com/video123/mp4/240/big_buck_bunny_240p_1mb.mp4",
|
73 |
-
"https://sample-videos.com/video123/mp4/240/big_buck_bunny_240p_1mb.mp4",
|
74 |
-
"https://sample-videos.com/video123/mp4/240/big_buck_bunny_240p_1mb.mp4"
|
75 |
]
|
76 |
-
|
77 |
-
|
78 |
-
video_path = download_video_sample(url)
|
79 |
-
clip = VideoFileClip(video_path).subclip(0, 10) # máximo 10 segundos
|
80 |
-
clips.append(clip)
|
81 |
-
|
82 |
-
# 4. Concatenar videos
|
83 |
-
video_final = concatenate_videoclips(clips, method="compose")
|
84 |
-
|
85 |
-
# 5. Cargar audio TTS
|
86 |
-
audio_tts = AudioFileClip(voz_archivo)
|
87 |
-
|
88 |
-
# 6. Música de fondo en loop si está definida
|
89 |
-
if musica_url:
|
90 |
-
musica_path = download_video_sample(musica_url)
|
91 |
-
musica_audio = AudioFileClip(musica_path)
|
92 |
-
# Loop música a duración voz
|
93 |
-
musica_loop = loop_audio_to_length(musica_audio, audio_tts.duration)
|
94 |
-
# Mezclar audio TTS y música
|
95 |
-
mezcla = CompositeAudioClip([musica_loop.volumex(0.3), audio_tts.volumex(1.0)])
|
96 |
-
else:
|
97 |
-
mezcla = audio_tts
|
98 |
-
|
99 |
-
# 7. Asignar audio al video
|
100 |
-
video_final = video_final.set_audio(mezcla).subclip(0, audio_tts.duration)
|
101 |
-
|
102 |
-
# 8. Guardar video final
|
103 |
-
output_path = f"video_output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
|
104 |
-
video_final.write_videofile(output_path, fps=24, threads=2, logger=None)
|
105 |
-
|
106 |
-
# 9. Limpiar archivos temporales
|
107 |
-
os.remove(voz_archivo)
|
108 |
-
for clip in clips:
|
109 |
-
clip.close()
|
110 |
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
-
def
|
114 |
-
|
115 |
try:
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
119 |
except Exception as e:
|
120 |
-
logger.error(f"Error
|
121 |
-
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
if __name__ == "__main__":
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import asyncio
|
3 |
import logging
|
4 |
import tempfile
|
5 |
+
import time
|
6 |
+
import shutil
|
7 |
+
from datetime import datetime, timedelta
|
8 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip, ColorClip, TextClip, CompositeVideoClip
|
9 |
import edge_tts
|
10 |
import gradio as gr
|
11 |
|
12 |
+
# Configuración avanzada de logging
|
13 |
+
logging.basicConfig(
|
14 |
+
level=logging.INFO,
|
15 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
16 |
+
handlers=[
|
17 |
+
logging.FileHandler("video_generator.log"),
|
18 |
+
logging.StreamHandler()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
]
|
20 |
+
)
|
21 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# Directorio temporal personalizado con limpieza automática
|
24 |
+
TEMP_DIR = "temp_media"
|
25 |
+
os.makedirs(TEMP_DIR, exist_ok=True)
|
26 |
+
|
27 |
+
def clean_old_files():
|
28 |
+
"""Elimina archivos temporales con más de 24 horas"""
|
29 |
+
now = time.time()
|
30 |
+
cutoff = now - (24 * 3600)
|
31 |
+
|
32 |
+
for filename in os.listdir(TEMP_DIR):
|
33 |
+
file_path = os.path.join(TEMP_DIR, filename)
|
34 |
+
if os.path.isfile(file_path):
|
35 |
+
file_time = os.path.getmtime(file_path)
|
36 |
+
if file_time < cutoff:
|
37 |
+
try:
|
38 |
+
os.remove(file_path)
|
39 |
+
logger.info(f"Eliminado archivo antiguo: {filename}")
|
40 |
+
except Exception as e:
|
41 |
+
logger.error(f"Error al eliminar {filename}: {e}")
|
42 |
+
|
43 |
+
async def text_to_speech(text, voice="es-ES-ElviraNeural"):
|
44 |
+
"""Convierte texto a voz y guarda en archivo temporal"""
|
45 |
+
clean_old_files()
|
46 |
+
output_path = os.path.join(TEMP_DIR, f"tts_{datetime.now().strftime('%Y%m%d%H%M%S')}.mp3")
|
47 |
+
|
48 |
+
try:
|
49 |
+
logger.info(f"Generando TTS para texto de {len(text)} caracteres")
|
50 |
+
communicate = edge_tts.Communicate(text, voice)
|
51 |
+
await communicate.save(output_path)
|
52 |
+
return output_path
|
53 |
+
except Exception as e:
|
54 |
+
logger.error(f"Error en TTS: {e}")
|
55 |
+
raise
|
56 |
|
57 |
+
def create_audio_loop(audio_path, target_duration):
|
58 |
+
"""Crea un loop de audio hasta alcanzar la duración objetivo"""
|
59 |
try:
|
60 |
+
audio = AudioFileClip(audio_path)
|
61 |
+
if audio.duration >= target_duration:
|
62 |
+
return audio.subclip(0, target_duration)
|
63 |
+
|
64 |
+
loops_needed = int(target_duration // audio.duration) + 1
|
65 |
+
clips = [audio] * loops_needed
|
66 |
+
looped_audio = concatenate_audioclips(clips).subclip(0, target_duration)
|
67 |
+
return looped_audio
|
68 |
except Exception as e:
|
69 |
+
logger.error(f"Error al crear loop de audio: {e}")
|
70 |
+
raise
|
71 |
|
72 |
+
def create_video_with_text(text, duration, size=(1280, 720)):
|
73 |
+
"""Crea un video simple con texto centrado"""
|
74 |
+
try:
|
75 |
+
# Fondo del video
|
76 |
+
bg_clip = ColorClip(size, color=(30, 30, 30), duration=duration)
|
77 |
+
|
78 |
+
# Texto con ajuste automático de tamaño
|
79 |
+
text_clip = TextClip(
|
80 |
+
text,
|
81 |
+
fontsize=28,
|
82 |
+
color='white',
|
83 |
+
font='Arial-Bold',
|
84 |
+
size=(size[0]-100, size[1]-100),
|
85 |
+
method='caption',
|
86 |
+
align='center'
|
87 |
+
).set_position('center').set_duration(duration)
|
88 |
+
|
89 |
+
return CompositeVideoClip([bg_clip, text_clip])
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"Error al crear video con texto: {e}")
|
92 |
+
raise
|
93 |
|
94 |
+
async def generate_video_content(text, background_music=None, use_tts=True):
|
95 |
+
"""Función principal que genera el contenido del video"""
|
96 |
+
try:
|
97 |
+
clean_old_files()
|
98 |
+
|
99 |
+
# 1. Procesar audio principal
|
100 |
+
if use_tts:
|
101 |
+
voice_path = await text_to_speech(text)
|
102 |
+
main_audio = AudioFileClip(voice_path)
|
103 |
+
else:
|
104 |
+
# Si no usamos TTS, creamos un audio silencioso de la duración estimada
|
105 |
+
estimated_duration = max(5, len(text.split()) / 3) # Estimación basada en palabras
|
106 |
+
main_audio = AudioFileClip(lambda t: 0, duration=estimated_duration)
|
107 |
+
|
108 |
+
duration = main_audio.duration
|
109 |
+
|
110 |
+
# 2. Procesar música de fondo
|
111 |
+
final_audio = main_audio
|
112 |
+
if background_music:
|
113 |
+
try:
|
114 |
+
bg_music_loop = create_audio_loop(background_music, duration).volumex(0.2)
|
115 |
+
final_audio = CompositeAudioClip([bg_music_loop, main_audio])
|
116 |
+
except Exception as e:
|
117 |
+
logger.error(f"Error al procesar música de fondo, continuando sin ella: {e}")
|
118 |
+
|
119 |
+
# 3. Crear video
|
120 |
+
video_clip = create_video_with_text(text, duration)
|
121 |
+
video_clip = video_clip.set_audio(final_audio)
|
122 |
+
|
123 |
+
# 4. Guardar resultado
|
124 |
+
output_path = os.path.join(TEMP_DIR, f"video_{datetime.now().strftime('%Y%m%d%H%M%S')}.mp4")
|
125 |
+
video_clip.write_videofile(
|
126 |
+
output_path,
|
127 |
+
fps=24,
|
128 |
+
threads=4,
|
129 |
+
codec='libx264',
|
130 |
+
audio_codec='aac',
|
131 |
+
preset='fast',
|
132 |
+
logger=None
|
133 |
+
)
|
134 |
+
|
135 |
+
return output_path
|
136 |
+
except Exception as e:
|
137 |
+
logger.error(f"Error en generate_video_content: {e}")
|
138 |
+
raise
|
139 |
+
finally:
|
140 |
+
# Cerrar todos los clips para liberar recursos
|
141 |
+
if 'main_audio' in locals():
|
142 |
+
main_audio.close()
|
143 |
+
if 'bg_music_loop' in locals():
|
144 |
+
bg_music_loop.close()
|
145 |
+
if 'video_clip' in locals():
|
146 |
+
video_clip.close()
|
147 |
+
|
148 |
+
# Interfaz Gradio mejorada
|
149 |
+
with gr.Blocks(title="Generador de Videos Avanzado", theme="soft") as app:
|
150 |
+
gr.Markdown("""
|
151 |
+
# 🎥 Generador de Videos Automático
|
152 |
+
Crea videos con voz sintetizada y música de fondo
|
153 |
+
""")
|
154 |
+
|
155 |
+
with gr.Tab("Configuración Principal"):
|
156 |
+
with gr.Row():
|
157 |
+
with gr.Column():
|
158 |
+
text_input = gr.Textbox(
|
159 |
+
label="Texto del Video",
|
160 |
+
placeholder="Escribe aquí el contenido de tu video...",
|
161 |
+
lines=5,
|
162 |
+
max_lines=20
|
163 |
+
)
|
164 |
+
|
165 |
+
with gr.Accordion("Opciones Avanzadas", open=False):
|
166 |
+
use_tts = gr.Checkbox(
|
167 |
+
label="Usar Texto a Voz (TTS)",
|
168 |
+
value=True
|
169 |
+
)
|
170 |
+
voice_selector = gr.Dropdown(
|
171 |
+
label="Voz TTS",
|
172 |
+
choices=["es-ES-ElviraNeural", "es-MX-DaliaNeural", "es-US-AlonsoNeural"],
|
173 |
+
value="es-ES-ElviraNeural",
|
174 |
+
visible=True
|
175 |
+
)
|
176 |
+
|
177 |
+
bg_music = gr.Audio(
|
178 |
+
label="Música de Fondo",
|
179 |
+
type="filepath",
|
180 |
+
sources=["upload"],
|
181 |
+
format="mp3"
|
182 |
+
)
|
183 |
+
|
184 |
+
generate_btn = gr.Button("Generar Video", variant="primary")
|
185 |
+
|
186 |
+
with gr.Column():
|
187 |
+
video_output = gr.Video(
|
188 |
+
label="Video Resultante",
|
189 |
+
format="mp4",
|
190 |
+
interactive=False
|
191 |
+
)
|
192 |
+
status_output = gr.Textbox(
|
193 |
+
label="Estado",
|
194 |
+
interactive=False
|
195 |
+
)
|
196 |
+
|
197 |
+
# Lógica para mostrar/ocultar selector de voz
|
198 |
+
use_tts.change(
|
199 |
+
fn=lambda x: gr.Dropdown(visible=x),
|
200 |
+
inputs=use_tts,
|
201 |
+
outputs=voice_selector
|
202 |
+
)
|
203 |
+
|
204 |
+
# Función principal de generación
|
205 |
+
def generate_video(text, use_tts, voice, bg_music):
|
206 |
+
try:
|
207 |
+
if not text.strip():
|
208 |
+
raise ValueError("Por favor ingresa un texto para el video")
|
209 |
+
|
210 |
+
# Limpieza inicial
|
211 |
+
clean_old_files()
|
212 |
+
|
213 |
+
# Generación del video
|
214 |
+
video_path = asyncio.run(
|
215 |
+
generate_video_content(
|
216 |
+
text=text,
|
217 |
+
use_tts=use_tts,
|
218 |
+
background_music=bg_music
|
219 |
+
)
|
220 |
+
)
|
221 |
+
|
222 |
+
return video_path, "✅ Video generado con éxito"
|
223 |
+
except Exception as e:
|
224 |
+
logger.error(f"Error en la generación: {str(e)}")
|
225 |
+
return None, f"❌ Error: {str(e)}"
|
226 |
+
|
227 |
+
generate_btn.click(
|
228 |
+
fn=generate_video,
|
229 |
+
inputs=[text_input, use_tts, voice_selector, bg_music],
|
230 |
+
outputs=[video_output, status_output]
|
231 |
+
)
|
232 |
|
233 |
if __name__ == "__main__":
|
234 |
+
# Limpieza inicial de archivos antiguos
|
235 |
+
clean_old_files()
|
236 |
+
|
237 |
+
# Configuración del servidor
|
238 |
+
app.launch(
|
239 |
+
server_name="0.0.0.0",
|
240 |
+
server_port=7860,
|
241 |
+
show_error=True,
|
242 |
+
share=False,
|
243 |
+
favicon_path=None
|
244 |
+
)
|