File size: 4,639 Bytes
aeb448f
5959802
 
 
 
 
3c49341
561a65c
 
aeb448f
 
3c49341
aeb448f
 
3c49341
d71aaa4
 
 
 
 
 
 
 
 
 
 
5959802
 
d71aaa4
5959802
d71aaa4
5959802
d71aaa4
5959802
d71aaa4
5959802
 
d71aaa4
5959802
e5873a4
5959802
 
 
 
 
e5873a4
 
 
5959802
 
 
 
 
 
e5873a4
5959802
 
e5873a4
5959802
 
 
d71aaa4
e5873a4
 
 
 
5959802
d71aaa4
5959802
 
 
e5873a4
 
 
 
5959802
e5873a4
5959802
 
e5873a4
5959802
 
d71aaa4
 
 
e5873a4
d71aaa4
e5873a4
 
d71aaa4
e5873a4
 
d71aaa4
e5873a4
 
d71aaa4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5959802
 
 
 
 
7655430
d71aaa4
5959802
 
d71aaa4
 
 
5959802
d71aaa4
 
 
 
 
 
5959802
 
 
d71aaa4
5959802
 
d71aaa4
e5873a4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# interfaceV3.py
import gradio as gr
import shutil
import os
import subprocess
import sys

subprocess.run(["python", "src/setup_wav2lip.py"])

# AÑADIR RUTA DEL MÓDULO CUSTOM
sys.path.append(os.path.abspath("./src"))

from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
from call_openai_api import moni as rtff

# Rutas
AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav")
VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4")
TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt")
RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav")
RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav")
RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4")
TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py")
RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py")


def transcribir_con_progreso(audio_path):
    progreso = gr.Progress()
    progreso(0, "Iniciando transcripción...")
    model_name = "openai/whisper-large"
    progreso(25, "Cargando modelo Whisper...")
    transcripcion = transcribe_audio(audio_path, model_name)
    progreso(75, "Guardando transcripción...")
    guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
    progreso(100, "Transcripción completada.")
    return transcripcion


def generar_audio_desde_texto():
    print("Ejecutando text_to_speech...")
    result = subprocess.run(
        [sys.executable, TEXT_TO_SPEECH_PATH],
        capture_output=True,
        text=True
    )
    print("stdout:", result.stdout)
    print("stderr:", result.stderr)

    if result.returncode != 0:
        raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")

    if os.path.exists(RESULT_AUDIO_TEMP_PATH):
        os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
        shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
        print(f"Audio copiado a: {RESULT_AUDIO_FINAL_PATH}")
        return RESULT_AUDIO_FINAL_PATH
    else:
        print("Audio temporal no encontrado")
        return None


def procesar_video_audio():
    print("Iniciando procesamiento de video...")
    print("Audio de entrada:", RESULT_AUDIO_FINAL_PATH)
    print("Video de entrada:", VIDEO_PATH)

    result = subprocess.run(
        [sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
        capture_output=True,
        text=True
    )

    print("stdout:", result.stdout)
    print("stderr:", result.stderr)

    if os.path.exists(RESULT_VIDEO_PATH):
        print("Video generado:", RESULT_VIDEO_PATH)
        return RESULT_VIDEO_PATH
    else:
        print("No se generó el video")
        return None


def flujo_completo(audio_file_path):
    try:
        os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
        shutil.copy(audio_file_path, AUDIO_RECORD_PATH)
        print("Audio grabado copiado a:", AUDIO_RECORD_PATH)

        transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
        print("Texto transcrito:", transcripcion)

        respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
        print("Respuesta de OpenAI:", respuesta_openai)

        audio_generado = generar_audio_desde_texto()
        video_path = procesar_video_audio()

        return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path

    except Exception as e:
        return (
            f"Error durante el flujo completo: {str(e)}",
            None,
            f"Error: {str(e)}",
            None,
            None
        )


def interfaz():
    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
                audio_input = gr.Audio(label="Graba tu voz", type="filepath", format="wav")
                estado_grabacion = gr.Textbox(label="Estado", interactive=False)

            with gr.Column():
                output_audio = gr.Audio(label="Audio grabado", interactive=False)
                output_audio_speech = gr.Audio(label="Audio TTS", interactive=False)
                video_resultado = gr.Video(label="Video procesado", interactive=False)
                texto_transcripcion = gr.Textbox(label="Texto transcrito")

        audio_input.change(
            flujo_completo,
            inputs=audio_input,
            outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
        )

    return demo


if __name__ == "__main__":
    demo = interfaz()
    demo.launch()