File size: 4,639 Bytes
aeb448f 5959802 3c49341 561a65c aeb448f 3c49341 aeb448f 3c49341 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 e5873a4 5959802 e5873a4 5959802 e5873a4 5959802 e5873a4 5959802 d71aaa4 e5873a4 5959802 d71aaa4 5959802 e5873a4 5959802 e5873a4 5959802 e5873a4 5959802 d71aaa4 e5873a4 d71aaa4 e5873a4 d71aaa4 e5873a4 d71aaa4 e5873a4 d71aaa4 5959802 7655430 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 e5873a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# interfaceV3.py
import gradio as gr
import shutil
import os
import subprocess
import sys
subprocess.run(["python", "src/setup_wav2lip.py"])
# AÑADIR RUTA DEL MÓDULO CUSTOM
sys.path.append(os.path.abspath("./src"))
from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
from call_openai_api import moni as rtff
# Rutas
AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav")
VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4")
TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt")
RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav")
RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav")
RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4")
TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py")
RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py")
def transcribir_con_progreso(audio_path):
progreso = gr.Progress()
progreso(0, "Iniciando transcripción...")
model_name = "openai/whisper-large"
progreso(25, "Cargando modelo Whisper...")
transcripcion = transcribe_audio(audio_path, model_name)
progreso(75, "Guardando transcripción...")
guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
progreso(100, "Transcripción completada.")
return transcripcion
def generar_audio_desde_texto():
print("Ejecutando text_to_speech...")
result = subprocess.run(
[sys.executable, TEXT_TO_SPEECH_PATH],
capture_output=True,
text=True
)
print("stdout:", result.stdout)
print("stderr:", result.stderr)
if result.returncode != 0:
raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
if os.path.exists(RESULT_AUDIO_TEMP_PATH):
os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
print(f"Audio copiado a: {RESULT_AUDIO_FINAL_PATH}")
return RESULT_AUDIO_FINAL_PATH
else:
print("Audio temporal no encontrado")
return None
def procesar_video_audio():
print("Iniciando procesamiento de video...")
print("Audio de entrada:", RESULT_AUDIO_FINAL_PATH)
print("Video de entrada:", VIDEO_PATH)
result = subprocess.run(
[sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
capture_output=True,
text=True
)
print("stdout:", result.stdout)
print("stderr:", result.stderr)
if os.path.exists(RESULT_VIDEO_PATH):
print("Video generado:", RESULT_VIDEO_PATH)
return RESULT_VIDEO_PATH
else:
print("No se generó el video")
return None
def flujo_completo(audio_file_path):
try:
os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
shutil.copy(audio_file_path, AUDIO_RECORD_PATH)
print("Audio grabado copiado a:", AUDIO_RECORD_PATH)
transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
print("Texto transcrito:", transcripcion)
respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
print("Respuesta de OpenAI:", respuesta_openai)
audio_generado = generar_audio_desde_texto()
video_path = procesar_video_audio()
return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
except Exception as e:
return (
f"Error durante el flujo completo: {str(e)}",
None,
f"Error: {str(e)}",
None,
None
)
def interfaz():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
audio_input = gr.Audio(label="Graba tu voz", type="filepath", format="wav")
estado_grabacion = gr.Textbox(label="Estado", interactive=False)
with gr.Column():
output_audio = gr.Audio(label="Audio grabado", interactive=False)
output_audio_speech = gr.Audio(label="Audio TTS", interactive=False)
video_resultado = gr.Video(label="Video procesado", interactive=False)
texto_transcripcion = gr.Textbox(label="Texto transcrito")
audio_input.change(
flujo_completo,
inputs=audio_input,
outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
)
return demo
if __name__ == "__main__":
demo = interfaz()
demo.launch()
|