Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import tempfile
|
|
5 |
import requests
|
6 |
from datetime import datetime
|
7 |
import edge_tts
|
|
|
8 |
import gradio as gr
|
9 |
import torch
|
10 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
@@ -104,15 +105,15 @@ def get_voice_choices():
|
|
104 |
|
105 |
# Obtener las voces al inicio del script
|
106 |
AVAILABLE_VOICES = get_voice_choices()
|
107 |
-
DEFAULT_VOICE_ID = "es-
|
108 |
DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
|
109 |
for text, voice_id in AVAILABLE_VOICES:
|
110 |
if voice_id == DEFAULT_VOICE_ID:
|
111 |
DEFAULT_VOICE_NAME = text
|
112 |
break
|
113 |
if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
|
114 |
-
DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "
|
115 |
-
DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "
|
116 |
logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
|
117 |
|
118 |
# Clave API de Pexels
|
@@ -243,12 +244,22 @@ async def text_to_speech(text, output_path, voice):
|
|
243 |
communicate = edge_tts.Communicate(text, voice)
|
244 |
await communicate.save(output_path)
|
245 |
if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
|
246 |
-
logger.info(f"Audio guardado exitosamente en: {output_path}")
|
247 |
return True
|
248 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
return False
|
250 |
except Exception as e:
|
251 |
-
logger.error(f"Error en
|
252 |
return False
|
253 |
|
254 |
def download_video_file(url, temp_dir):
|
@@ -364,22 +375,41 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
364 |
|
365 |
# 2. Generar audio de voz
|
366 |
voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
|
367 |
-
tts_voices_to_try = [selected_voice, "es-MX-DaliaNeural"]
|
368 |
tts_success = False
|
|
|
|
|
|
|
369 |
|
370 |
for current_voice in tts_voices_to_try:
|
371 |
logger.info(f"Intentando TTS con voz: {current_voice}")
|
372 |
try:
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
except Exception as e:
|
379 |
logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
|
380 |
|
381 |
if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
|
382 |
-
raise ValueError(f"Error generando voz. Intentos con {tts_voices_to_try} fallaron.")
|
383 |
|
384 |
temp_intermediate_files.append(voz_path)
|
385 |
audio_tts_original = AudioFileClip(voz_path)
|
@@ -477,7 +507,7 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
477 |
if musica_file:
|
478 |
try:
|
479 |
music_path = os.path.join(temp_dir_intermediate, "musica_bg.mp3")
|
480 |
-
shutil.copyfile(musica_file, music_path)
|
481 |
temp_intermediate_files.append(music_path)
|
482 |
musica_audio_original = AudioFileClip(music_path)
|
483 |
if musica_audio_original.duration > 0:
|
@@ -497,7 +527,9 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
497 |
video_final = video_base.set_audio(final_audio)
|
498 |
output_filename = f"video_{int(datetime.now().timestamp())}.mp4"
|
499 |
output_path = os.path.join(temp_dir_intermediate, output_filename)
|
500 |
-
|
|
|
|
|
501 |
|
502 |
video_final.write_videofile(
|
503 |
output_path,
|
@@ -510,11 +542,13 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
510 |
logger='bar'
|
511 |
)
|
512 |
|
513 |
-
shutil.
|
514 |
-
|
|
|
|
|
515 |
total_time = (datetime.now() - start_time).total_seconds()
|
516 |
logger.info(f"Video generado en {total_time:.2f}s")
|
517 |
-
return
|
518 |
|
519 |
except ValueError as ve:
|
520 |
logger.error(f"Error controlado: {str(ve)}")
|
@@ -554,11 +588,16 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
|
|
554 |
except:
|
555 |
pass
|
556 |
for path in temp_intermediate_files:
|
557 |
-
if os.path.isfile(path) and path !=
|
558 |
try:
|
559 |
os.remove(path)
|
560 |
except:
|
561 |
logger.warning(f"No se pudo eliminar {path}")
|
|
|
|
|
|
|
|
|
|
|
562 |
|
563 |
async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
|
564 |
logger.info("="*80)
|
@@ -579,12 +618,12 @@ async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, sele
|
|
579 |
|
580 |
try:
|
581 |
logger.info("Iniciando generación de video...")
|
582 |
-
video_path = await crear_video_async(prompt_type, input_text, selected_voice, musica_file)
|
583 |
if video_path and os.path.exists(video_path):
|
584 |
output_video = video_path
|
585 |
output_file = video_path
|
586 |
-
status_msg = gr.update(value=f"✅ Video generado exitosamente. Descarga
|
587 |
-
logger.info(f"Retornando video_path: {video_path}")
|
588 |
else:
|
589 |
status_msg = gr.update(value="❌ Error: Falló la generación del video.")
|
590 |
logger.error("No se generó video_path válido.")
|
@@ -658,22 +697,21 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft()) as ap
|
|
658 |
)
|
659 |
|
660 |
prompt_type.change(
|
661 |
-
lambda x: (gr.update(visible=x == "Generar Guion con IA"), gr.update(visible=x == "Usar Mi Guion")),
|
662 |
inputs=prompt_type,
|
663 |
outputs=[ia_guion_column, manual_guion_column]
|
664 |
)
|
665 |
|
666 |
generate_btn.click(
|
667 |
-
lambda: (None, None, gr.update(value="⏳ Procesando... Esto puede tomar hasta 1 hora.")),
|
668 |
outputs=[video_output, file_output, status_output]
|
669 |
).then(
|
670 |
-
run_app,
|
671 |
inputs=[prompt_type, prompt_ia, prompt_manual, musica_input, voice_dropdown],
|
672 |
outputs=[video_output, file_output, status_output],
|
673 |
-
queue=True
|
674 |
-
_js="() => { setTimeout(() => window.location.reload(), 3600000); }"
|
675 |
).then(
|
676 |
-
lambda video_path, file_output, status_msg: gr.update(visible=file_output.value is not None),
|
677 |
inputs=[video_output, file_output, status_output],
|
678 |
outputs=[file_output]
|
679 |
)
|
@@ -685,7 +723,7 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft()) as ap
|
|
685 |
3. Sube música (opcional).
|
686 |
4. Selecciona la voz.
|
687 |
5. Haz clic en "✨ Generar Video".
|
688 |
-
6. Revisa el estado. Si el video se genera, estará disponible en /
|
689 |
7. Consulta `video_generator_full.log` para detalles.
|
690 |
""")
|
691 |
|
|
|
5 |
import requests
|
6 |
from datetime import datetime
|
7 |
import edge_tts
|
8 |
+
from gtts import gTTS
|
9 |
import gradio as gr
|
10 |
import torch
|
11 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
|
|
105 |
|
106 |
# Obtener las voces al inicio del script
|
107 |
AVAILABLE_VOICES = get_voice_choices()
|
108 |
+
DEFAULT_VOICE_ID = "es-MX-DaliaNeural" # Cambiado a una voz más estable
|
109 |
DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
|
110 |
for text, voice_id in AVAILABLE_VOICES:
|
111 |
if voice_id == DEFAULT_VOICE_ID:
|
112 |
DEFAULT_VOICE_NAME = text
|
113 |
break
|
114 |
if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
|
115 |
+
DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "es-MX-DaliaNeural"
|
116 |
+
DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "Dalia (México) - Femenino"
|
117 |
logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
|
118 |
|
119 |
# Clave API de Pexels
|
|
|
244 |
communicate = edge_tts.Communicate(text, voice)
|
245 |
await communicate.save(output_path)
|
246 |
if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
|
247 |
+
logger.info(f"Audio guardado exitosamente con edge_tts en: {output_path}")
|
248 |
return True
|
249 |
+
logger.warning(f"edge_tts falló, intentando gTTS...")
|
250 |
+
except Exception as e:
|
251 |
+
logger.error(f"Error en edge_tts con voz '{voice}': {str(e)}")
|
252 |
+
|
253 |
+
try:
|
254 |
+
tts = gTTS(text=text, lang='es')
|
255 |
+
tts.save(output_path)
|
256 |
+
if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
|
257 |
+
logger.info(f"Audio guardado exitosamente con gTTS en: {output_path}")
|
258 |
+
return True
|
259 |
+
logger.error(f"gTTS falló o archivo vacío en: {output_path}")
|
260 |
return False
|
261 |
except Exception as e:
|
262 |
+
logger.error(f"Error en gTTS: {str(e)}")
|
263 |
return False
|
264 |
|
265 |
def download_video_file(url, temp_dir):
|
|
|
375 |
|
376 |
# 2. Generar audio de voz
|
377 |
voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
|
378 |
+
tts_voices_to_try = [selected_voice, "es-MX-DaliaNeural"]
|
379 |
tts_success = False
|
380 |
+
max_chunk_length = 1000
|
381 |
+
text_chunks = [guion[i:i + max_chunk_length] for i in range(0, len(guion), max_chunk_length)]
|
382 |
+
logger.info(f"Texto dividido en {len(text_chunks)} fragmentos para TTS")
|
383 |
|
384 |
for current_voice in tts_voices_to_try:
|
385 |
logger.info(f"Intentando TTS con voz: {current_voice}")
|
386 |
try:
|
387 |
+
temp_audio_files = []
|
388 |
+
for i, chunk in enumerate(text_chunks):
|
389 |
+
temp_path = os.path.join(temp_dir_intermediate, f"voz_chunk_{i}.mp3")
|
390 |
+
tts_success = await text_to_speech(chunk, temp_path, current_voice)
|
391 |
+
if tts_success and os.path.exists(temp_path) and os.path.getsize(temp_path) > 100:
|
392 |
+
temp_audio_files.append(temp_path)
|
393 |
+
else:
|
394 |
+
logger.warning(f"TTS falló para fragmento {i} con voz: {current_voice}")
|
395 |
+
break
|
396 |
+
if len(temp_audio_files) == len(text_chunks):
|
397 |
+
audio_clips = [AudioFileClip(f) for f in temp_audio_files]
|
398 |
+
concatenated_audio = concatenate_audioclips(audio_clips)
|
399 |
+
concatenated_audio.write_audiofile(voz_path, codec='mp3')
|
400 |
+
concatenated_audio.close()
|
401 |
+
for clip in audio_clips:
|
402 |
+
clip.close()
|
403 |
+
tts_success = os.path.exists(voz_path) and os.path.getsize(voz_path) > 100
|
404 |
+
temp_intermediate_files.extend(temp_audio_files)
|
405 |
+
if tts_success:
|
406 |
+
logger.info(f"TTS exitoso con voz: {current_voice}")
|
407 |
+
break
|
408 |
except Exception as e:
|
409 |
logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
|
410 |
|
411 |
if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
|
412 |
+
raise ValueError(f"Error generando voz. Intentos con {tts_voices_to_try} y gTTS fallaron.")
|
413 |
|
414 |
temp_intermediate_files.append(voz_path)
|
415 |
audio_tts_original = AudioFileClip(voz_path)
|
|
|
507 |
if musica_file:
|
508 |
try:
|
509 |
music_path = os.path.join(temp_dir_intermediate, "musica_bg.mp3")
|
510 |
+
shutil.copyfile(musica_file.name if hasattr(musica_file, 'name') else musica_file, music_path)
|
511 |
temp_intermediate_files.append(music_path)
|
512 |
musica_audio_original = AudioFileClip(music_path)
|
513 |
if musica_audio_original.duration > 0:
|
|
|
527 |
video_final = video_base.set_audio(final_audio)
|
528 |
output_filename = f"video_{int(datetime.now().timestamp())}.mp4"
|
529 |
output_path = os.path.join(temp_dir_intermediate, output_filename)
|
530 |
+
persistent_dir = "/data"
|
531 |
+
os.makedirs(persistent_dir, exist_ok=True)
|
532 |
+
persistent_path = os.path.join(persistent_dir, output_filename)
|
533 |
|
534 |
video_final.write_videofile(
|
535 |
output_path,
|
|
|
542 |
logger='bar'
|
543 |
)
|
544 |
|
545 |
+
shutil.move(output_path, persistent_path)
|
546 |
+
download_url = f"https://gnosticdev-invideo-basic.hf.space/file={persistent_path}"
|
547 |
+
logger.info(f"Video guardado en: {persistent_path}")
|
548 |
+
logger.info(f"URL de descarga: {download_url}")
|
549 |
total_time = (datetime.now() - start_time).total_seconds()
|
550 |
logger.info(f"Video generado en {total_time:.2f}s")
|
551 |
+
return persistent_path, download_url
|
552 |
|
553 |
except ValueError as ve:
|
554 |
logger.error(f"Error controlado: {str(ve)}")
|
|
|
588 |
except:
|
589 |
pass
|
590 |
for path in temp_intermediate_files:
|
591 |
+
if os.path.isfile(path) and path != persistent_path:
|
592 |
try:
|
593 |
os.remove(path)
|
594 |
except:
|
595 |
logger.warning(f"No se pudo eliminar {path}")
|
596 |
+
try:
|
597 |
+
if os.path.exists(temp_dir_intermediate):
|
598 |
+
shutil.rmtree(temp_dir_intermediate)
|
599 |
+
except:
|
600 |
+
logger.warning(f"No se pudo eliminar directorio temporal {temp_dir_intermediate}")
|
601 |
|
602 |
async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
|
603 |
logger.info("="*80)
|
|
|
618 |
|
619 |
try:
|
620 |
logger.info("Iniciando generación de video...")
|
621 |
+
video_path, download_url = await crear_video_async(prompt_type, input_text, selected_voice, musica_file)
|
622 |
if video_path and os.path.exists(video_path):
|
623 |
output_video = video_path
|
624 |
output_file = video_path
|
625 |
+
status_msg = gr.update(value=f"✅ Video generado exitosamente. Descarga: {download_url}")
|
626 |
+
logger.info(f"Retornando video_path: {video_path}, URL: {download_url}")
|
627 |
else:
|
628 |
status_msg = gr.update(value="❌ Error: Falló la generación del video.")
|
629 |
logger.error("No se generó video_path válido.")
|
|
|
697 |
)
|
698 |
|
699 |
prompt_type.change(
|
700 |
+
fn=lambda x: (gr.update(visible=x == "Generar Guion con IA"), gr.update(visible=x == "Usar Mi Guion")),
|
701 |
inputs=prompt_type,
|
702 |
outputs=[ia_guion_column, manual_guion_column]
|
703 |
)
|
704 |
|
705 |
generate_btn.click(
|
706 |
+
fn=lambda: (None, None, gr.update(value="⏳ Procesando... Esto puede tomar hasta 1 hora.")),
|
707 |
outputs=[video_output, file_output, status_output]
|
708 |
).then(
|
709 |
+
fn=run_app,
|
710 |
inputs=[prompt_type, prompt_ia, prompt_manual, musica_input, voice_dropdown],
|
711 |
outputs=[video_output, file_output, status_output],
|
712 |
+
queue=True
|
|
|
713 |
).then(
|
714 |
+
fn=lambda video_path, file_output, status_msg: gr.update(visible=file_output.value is not None),
|
715 |
inputs=[video_output, file_output, status_output],
|
716 |
outputs=[file_output]
|
717 |
)
|
|
|
723 |
3. Sube música (opcional).
|
724 |
4. Selecciona la voz.
|
725 |
5. Haz clic en "✨ Generar Video".
|
726 |
+
6. Revisa el estado. Si el video se genera, estará disponible en /data.
|
727 |
7. Consulta `video_generator_full.log` para detalles.
|
728 |
""")
|
729 |
|