File size: 3,816 Bytes
aeb448f 5959802 3c49341 aeb448f 3c49341 aeb448f 3c49341 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 5959802 d71aaa4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# interfaceV3.py
import gradio as gr
import shutil
import os
import subprocess
import sys
# AÑADIR RUTA DEL MÓDULO CUSTOM
sys.path.append(os.path.abspath("./src"))
from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
from call_openai_api import moni as rtff
# Rutas
AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav")
VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4")
TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt")
RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav")
RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav")
RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4")
TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py")
RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py")
def transcribir_con_progreso(audio_path):
progreso = gr.Progress()
progreso(0, "Iniciando transcripción...")
model_name = "openai/whisper-large"
progreso(25, "Cargando modelo Whisper...")
transcripcion = transcribe_audio(audio_path, model_name)
progreso(75, "Guardando transcripción...")
guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
progreso(100, "Transcripción completada.")
return transcripcion
def generar_audio_desde_texto():
result = subprocess.run(
[sys.executable, TEXT_TO_SPEECH_PATH],
capture_output=True,
text=True
)
if result.returncode != 0:
raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
if os.path.exists(RESULT_AUDIO_TEMP_PATH):
os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
return RESULT_AUDIO_FINAL_PATH
else:
return None
def procesar_video_audio():
result = subprocess.run(
[sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
capture_output=True,
text=True
)
if os.path.exists(RESULT_VIDEO_PATH):
return RESULT_VIDEO_PATH
else:
return None
def flujo_completo(audio_file_path):
try:
shutil.copy(audio_file_path, AUDIO_RECORD_PATH)
transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
audio_generado = generar_audio_desde_texto()
video_path = procesar_video_audio()
return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
except Exception as e:
return (
f"Error durante el flujo completo: {str(e)}",
None,
f"Error: {str(e)}",
None,
None
)
def interfaz():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
audio_input = gr.Audio(source="microphone", type="filepath", label="Graba tu voz")
estado_grabacion = gr.Textbox(label="Estado", interactive=False)
with gr.Column():
output_audio = gr.Audio(label="Audio grabado", interactive=False)
output_audio_speech = gr.Audio(label="Audio TTS", interactive=False)
video_resultado = gr.Video(label="Video procesado", interactive=False)
texto_transcripcion = gr.Textbox(label="Texto transcrito")
audio_input.change(
flujo_completo,
inputs=audio_input,
outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
)
return demo
if __name__ == "__main__":
demo = interfaz()
demo.launch()
|