gnosticdev commited on
Commit
711e3d2
·
verified ·
1 Parent(s): ac35a46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -29
app.py CHANGED
@@ -5,6 +5,7 @@ import tempfile
5
  import requests
6
  from datetime import datetime
7
  import edge_tts
 
8
  import gradio as gr
9
  import torch
10
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
@@ -104,15 +105,15 @@ def get_voice_choices():
104
 
105
  # Obtener las voces al inicio del script
106
  AVAILABLE_VOICES = get_voice_choices()
107
- DEFAULT_VOICE_ID = "es-ES-JuanNeural"
108
  DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
109
  for text, voice_id in AVAILABLE_VOICES:
110
  if voice_id == DEFAULT_VOICE_ID:
111
  DEFAULT_VOICE_NAME = text
112
  break
113
  if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
114
- DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "en-US-AriaNeural"
115
- DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "Aria (United States) - Female"
116
  logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
117
 
118
  # Clave API de Pexels
@@ -243,12 +244,22 @@ async def text_to_speech(text, output_path, voice):
243
  communicate = edge_tts.Communicate(text, voice)
244
  await communicate.save(output_path)
245
  if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
246
- logger.info(f"Audio guardado exitosamente en: {output_path}")
247
  return True
248
- logger.error(f"TTS guardó un archivo pequeño o vacío en: {output_path}")
 
 
 
 
 
 
 
 
 
 
249
  return False
250
  except Exception as e:
251
- logger.error(f"Error en TTS con voz '{voice}': {str(e)}")
252
  return False
253
 
254
  def download_video_file(url, temp_dir):
@@ -364,22 +375,41 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
364
 
365
  # 2. Generar audio de voz
366
  voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
367
- tts_voices_to_try = [selected_voice, "es-MX-DaliaNeural"] # Cambié fallback a una voz más estable
368
  tts_success = False
 
 
 
369
 
370
  for current_voice in tts_voices_to_try:
371
  logger.info(f"Intentando TTS con voz: {current_voice}")
372
  try:
373
- tts_success = await text_to_speech(guion, voz_path, current_voice)
374
- if tts_success and os.path.exists(voz_path) and os.path.getsize(voz_path) > 100:
375
- logger.info(f"TTS exitoso con voz: {current_voice}")
376
- break
377
- logger.warning(f"TTS falló o archivo inválido con voz: {current_voice}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  except Exception as e:
379
  logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
380
 
381
  if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
382
- raise ValueError(f"Error generando voz. Intentos con {tts_voices_to_try} fallaron.")
383
 
384
  temp_intermediate_files.append(voz_path)
385
  audio_tts_original = AudioFileClip(voz_path)
@@ -477,7 +507,7 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
477
  if musica_file:
478
  try:
479
  music_path = os.path.join(temp_dir_intermediate, "musica_bg.mp3")
480
- shutil.copyfile(musica_file, music_path)
481
  temp_intermediate_files.append(music_path)
482
  musica_audio_original = AudioFileClip(music_path)
483
  if musica_audio_original.duration > 0:
@@ -497,7 +527,9 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
497
  video_final = video_base.set_audio(final_audio)
498
  output_filename = f"video_{int(datetime.now().timestamp())}.mp4"
499
  output_path = os.path.join(temp_dir_intermediate, output_filename)
500
- permanent_path = f"/tmp/{output_filename}"
 
 
501
 
502
  video_final.write_videofile(
503
  output_path,
@@ -510,11 +542,13 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
510
  logger='bar'
511
  )
512
 
513
- shutil.copy(output_path, permanent_path)
514
- logger.info(f"Video guardado en: {permanent_path}")
 
 
515
  total_time = (datetime.now() - start_time).total_seconds()
516
  logger.info(f"Video generado en {total_time:.2f}s")
517
- return permanent_path
518
 
519
  except ValueError as ve:
520
  logger.error(f"Error controlado: {str(ve)}")
@@ -554,11 +588,16 @@ async def crear_video_async(prompt_type, input_text, selected_voice, musica_file
554
  except:
555
  pass
556
  for path in temp_intermediate_files:
557
- if os.path.isfile(path) and path != permanent_path:
558
  try:
559
  os.remove(path)
560
  except:
561
  logger.warning(f"No se pudo eliminar {path}")
 
 
 
 
 
562
 
563
  async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
564
  logger.info("="*80)
@@ -579,12 +618,12 @@ async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, sele
579
 
580
  try:
581
  logger.info("Iniciando generación de video...")
582
- video_path = await crear_video_async(prompt_type, input_text, selected_voice, musica_file)
583
  if video_path and os.path.exists(video_path):
584
  output_video = video_path
585
  output_file = video_path
586
- status_msg = gr.update(value=f"✅ Video generado exitosamente. Descarga disponible en {video_path}")
587
- logger.info(f"Retornando video_path: {video_path}")
588
  else:
589
  status_msg = gr.update(value="❌ Error: Falló la generación del video.")
590
  logger.error("No se generó video_path válido.")
@@ -658,22 +697,21 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft()) as ap
658
  )
659
 
660
  prompt_type.change(
661
- lambda x: (gr.update(visible=x == "Generar Guion con IA"), gr.update(visible=x == "Usar Mi Guion")),
662
  inputs=prompt_type,
663
  outputs=[ia_guion_column, manual_guion_column]
664
  )
665
 
666
  generate_btn.click(
667
- lambda: (None, None, gr.update(value="⏳ Procesando... Esto puede tomar hasta 1 hora.")),
668
  outputs=[video_output, file_output, status_output]
669
  ).then(
670
- run_app,
671
  inputs=[prompt_type, prompt_ia, prompt_manual, musica_input, voice_dropdown],
672
  outputs=[video_output, file_output, status_output],
673
- queue=True,
674
- _js="() => { setTimeout(() => window.location.reload(), 3600000); }"
675
  ).then(
676
- lambda video_path, file_output, status_msg: gr.update(visible=file_output.value is not None),
677
  inputs=[video_output, file_output, status_output],
678
  outputs=[file_output]
679
  )
@@ -685,7 +723,7 @@ with gr.Blocks(title="Generador de Videos con IA", theme=gr.themes.Soft()) as ap
685
  3. Sube música (opcional).
686
  4. Selecciona la voz.
687
  5. Haz clic en "✨ Generar Video".
688
- 6. Revisa el estado. Si el video se genera, estará disponible en /tmp.
689
  7. Consulta `video_generator_full.log` para detalles.
690
  """)
691
 
 
5
  import requests
6
  from datetime import datetime
7
  import edge_tts
8
+ from gtts import gTTS
9
  import gradio as gr
10
  import torch
11
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
 
105
 
106
  # Obtener las voces al inicio del script
107
  AVAILABLE_VOICES = get_voice_choices()
108
+ DEFAULT_VOICE_ID = "es-MX-DaliaNeural" # Cambiado a una voz más estable
109
  DEFAULT_VOICE_NAME = DEFAULT_VOICE_ID
110
  for text, voice_id in AVAILABLE_VOICES:
111
  if voice_id == DEFAULT_VOICE_ID:
112
  DEFAULT_VOICE_NAME = text
113
  break
114
  if DEFAULT_VOICE_ID not in [v[1] for v in AVAILABLE_VOICES]:
115
+ DEFAULT_VOICE_ID = AVAILABLE_VOICES[0][1] if AVAILABLE_VOICES else "es-MX-DaliaNeural"
116
+ DEFAULT_VOICE_NAME = AVAILABLE_VOICES[0][0] if AVAILABLE_VOICES else "Dalia (México) - Femenino"
117
  logger.info(f"Voz por defecto seleccionada (ID): {DEFAULT_VOICE_ID}")
118
 
119
  # Clave API de Pexels
 
244
  communicate = edge_tts.Communicate(text, voice)
245
  await communicate.save(output_path)
246
  if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
247
+ logger.info(f"Audio guardado exitosamente con edge_tts en: {output_path}")
248
  return True
249
+ logger.warning(f"edge_tts falló, intentando gTTS...")
250
+ except Exception as e:
251
+ logger.error(f"Error en edge_tts con voz '{voice}': {str(e)}")
252
+
253
+ try:
254
+ tts = gTTS(text=text, lang='es')
255
+ tts.save(output_path)
256
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 100:
257
+ logger.info(f"Audio guardado exitosamente con gTTS en: {output_path}")
258
+ return True
259
+ logger.error(f"gTTS falló o archivo vacío en: {output_path}")
260
  return False
261
  except Exception as e:
262
+ logger.error(f"Error en gTTS: {str(e)}")
263
  return False
264
 
265
  def download_video_file(url, temp_dir):
 
375
 
376
  # 2. Generar audio de voz
377
  voz_path = os.path.join(temp_dir_intermediate, "voz.mp3")
378
+ tts_voices_to_try = [selected_voice, "es-MX-DaliaNeural"]
379
  tts_success = False
380
+ max_chunk_length = 1000
381
+ text_chunks = [guion[i:i + max_chunk_length] for i in range(0, len(guion), max_chunk_length)]
382
+ logger.info(f"Texto dividido en {len(text_chunks)} fragmentos para TTS")
383
 
384
  for current_voice in tts_voices_to_try:
385
  logger.info(f"Intentando TTS con voz: {current_voice}")
386
  try:
387
+ temp_audio_files = []
388
+ for i, chunk in enumerate(text_chunks):
389
+ temp_path = os.path.join(temp_dir_intermediate, f"voz_chunk_{i}.mp3")
390
+ tts_success = await text_to_speech(chunk, temp_path, current_voice)
391
+ if tts_success and os.path.exists(temp_path) and os.path.getsize(temp_path) > 100:
392
+ temp_audio_files.append(temp_path)
393
+ else:
394
+ logger.warning(f"TTS falló para fragmento {i} con voz: {current_voice}")
395
+ break
396
+ if len(temp_audio_files) == len(text_chunks):
397
+ audio_clips = [AudioFileClip(f) for f in temp_audio_files]
398
+ concatenated_audio = concatenate_audioclips(audio_clips)
399
+ concatenated_audio.write_audiofile(voz_path, codec='mp3')
400
+ concatenated_audio.close()
401
+ for clip in audio_clips:
402
+ clip.close()
403
+ tts_success = os.path.exists(voz_path) and os.path.getsize(voz_path) > 100
404
+ temp_intermediate_files.extend(temp_audio_files)
405
+ if tts_success:
406
+ logger.info(f"TTS exitoso con voz: {current_voice}")
407
+ break
408
  except Exception as e:
409
  logger.error(f"Error en TTS con voz '{current_voice}': {str(e)}")
410
 
411
  if not tts_success or not os.path.exists(voz_path) or os.path.getsize(voz_path) <= 100:
412
+ raise ValueError(f"Error generando voz. Intentos con {tts_voices_to_try} y gTTS fallaron.")
413
 
414
  temp_intermediate_files.append(voz_path)
415
  audio_tts_original = AudioFileClip(voz_path)
 
507
  if musica_file:
508
  try:
509
  music_path = os.path.join(temp_dir_intermediate, "musica_bg.mp3")
510
+ shutil.copyfile(musica_file.name if hasattr(musica_file, 'name') else musica_file, music_path)
511
  temp_intermediate_files.append(music_path)
512
  musica_audio_original = AudioFileClip(music_path)
513
  if musica_audio_original.duration > 0:
 
527
  video_final = video_base.set_audio(final_audio)
528
  output_filename = f"video_{int(datetime.now().timestamp())}.mp4"
529
  output_path = os.path.join(temp_dir_intermediate, output_filename)
530
+ persistent_dir = "/data"
531
+ os.makedirs(persistent_dir, exist_ok=True)
532
+ persistent_path = os.path.join(persistent_dir, output_filename)
533
 
534
  video_final.write_videofile(
535
  output_path,
 
542
  logger='bar'
543
  )
544
 
545
+ shutil.move(output_path, persistent_path)
546
+ download_url = f"https://gnosticdev-invideo-basic.hf.space/file={persistent_path}"
547
+ logger.info(f"Video guardado en: {persistent_path}")
548
+ logger.info(f"URL de descarga: {download_url}")
549
  total_time = (datetime.now() - start_time).total_seconds()
550
  logger.info(f"Video generado en {total_time:.2f}s")
551
+ return persistent_path, download_url
552
 
553
  except ValueError as ve:
554
  logger.error(f"Error controlado: {str(ve)}")
 
588
  except:
589
  pass
590
  for path in temp_intermediate_files:
591
+ if os.path.isfile(path) and path != persistent_path:
592
  try:
593
  os.remove(path)
594
  except:
595
  logger.warning(f"No se pudo eliminar {path}")
596
+ try:
597
+ if os.path.exists(temp_dir_intermediate):
598
+ shutil.rmtree(temp_dir_intermediate)
599
+ except:
600
+ logger.warning(f"No se pudo eliminar directorio temporal {temp_dir_intermediate}")
601
 
602
  async def run_app_async(prompt_type, prompt_ia, prompt_manual, musica_file, selected_voice):
603
  logger.info("="*80)
 
618
 
619
  try:
620
  logger.info("Iniciando generación de video...")
621
+ video_path, download_url = await crear_video_async(prompt_type, input_text, selected_voice, musica_file)
622
  if video_path and os.path.exists(video_path):
623
  output_video = video_path
624
  output_file = video_path
625
+ status_msg = gr.update(value=f"✅ Video generado exitosamente. Descarga: {download_url}")
626
+ logger.info(f"Retornando video_path: {video_path}, URL: {download_url}")
627
  else:
628
  status_msg = gr.update(value="❌ Error: Falló la generación del video.")
629
  logger.error("No se generó video_path válido.")
 
697
  )
698
 
699
  prompt_type.change(
700
+ fn=lambda x: (gr.update(visible=x == "Generar Guion con IA"), gr.update(visible=x == "Usar Mi Guion")),
701
  inputs=prompt_type,
702
  outputs=[ia_guion_column, manual_guion_column]
703
  )
704
 
705
  generate_btn.click(
706
+ fn=lambda: (None, None, gr.update(value="⏳ Procesando... Esto puede tomar hasta 1 hora.")),
707
  outputs=[video_output, file_output, status_output]
708
  ).then(
709
+ fn=run_app,
710
  inputs=[prompt_type, prompt_ia, prompt_manual, musica_input, voice_dropdown],
711
  outputs=[video_output, file_output, status_output],
712
+ queue=True
 
713
  ).then(
714
+ fn=lambda video_path, file_output, status_msg: gr.update(visible=file_output.value is not None),
715
  inputs=[video_output, file_output, status_output],
716
  outputs=[file_output]
717
  )
 
723
  3. Sube música (opcional).
724
  4. Selecciona la voz.
725
  5. Haz clic en "✨ Generar Video".
726
+ 6. Revisa el estado. Si el video se genera, estará disponible en /data.
727
  7. Consulta `video_generator_full.log` para detalles.
728
  """)
729