Update app.py
Browse files
app.py
CHANGED
@@ -1,65 +1,37 @@
|
|
1 |
# interfaceV2.py
|
2 |
|
3 |
import gradio as gr
|
4 |
-
import sounddevice as sd
|
5 |
-
from scipy.io.wavfile import write
|
6 |
-
import tempfile
|
7 |
import shutil
|
8 |
import os
|
9 |
import subprocess
|
10 |
import sys
|
11 |
from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
|
12 |
-
from call_openai_api import
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
# Function to record 8-second audio
|
27 |
-
def grabar_audio(duration=8, sample_rate=44100):
|
28 |
-
print("Starting recording...")
|
29 |
-
audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
|
30 |
-
print(f"Recording in progress for {duration} seconds...")
|
31 |
-
sd.wait()
|
32 |
-
print("Recording completed.")
|
33 |
-
|
34 |
-
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
35 |
-
write(temp_audio.name, sample_rate, audio_data)
|
36 |
-
print("Audio temporarily saved at:", temp_audio.name)
|
37 |
-
temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
|
38 |
-
os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
|
39 |
-
shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
|
40 |
-
print(f"Recording copied to: {AUDIO_RECORD_PATH}")
|
41 |
-
|
42 |
-
return AUDIO_RECORD_PATH, "Recording completed."
|
43 |
-
|
44 |
-
# Function to transcribe audio with Whisper
|
45 |
def transcribir_con_progreso(audio_path):
|
46 |
progreso = gr.Progress()
|
47 |
-
progreso(0, "
|
48 |
model_name = "openai/whisper-large"
|
49 |
-
progreso(25, "
|
50 |
-
|
51 |
transcripcion = transcribe_audio(audio_path, model_name)
|
52 |
-
progreso(75, "
|
53 |
guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
|
54 |
-
progreso(100, "
|
55 |
-
if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
|
56 |
-
raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
|
57 |
-
|
58 |
return transcripcion
|
59 |
|
60 |
-
|
61 |
def generar_audio_desde_texto():
|
62 |
-
print("Generating audio from text...")
|
63 |
result = subprocess.run(
|
64 |
[sys.executable, TEXT_TO_SPEECH_PATH],
|
65 |
capture_output=True,
|
@@ -67,117 +39,70 @@ def generar_audio_desde_texto():
|
|
67 |
)
|
68 |
if result.returncode != 0:
|
69 |
raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
|
70 |
-
if result.stdout:
|
71 |
-
print("Output:", result.stdout)
|
72 |
-
if result.stderr:
|
73 |
-
print("Errors:", result.stderr)
|
74 |
|
75 |
if os.path.exists(RESULT_AUDIO_TEMP_PATH):
|
76 |
-
print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
|
77 |
-
|
78 |
os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
|
79 |
shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
|
80 |
-
print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
|
81 |
-
|
82 |
return RESULT_AUDIO_FINAL_PATH
|
83 |
else:
|
84 |
-
print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
|
85 |
return None
|
86 |
|
87 |
-
# Function to process video and audio using run_inference.py with the generated audio file
|
88 |
-
def procesar_video_audio():
|
89 |
-
print("Starting video and audio processing...")
|
90 |
-
run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
|
91 |
|
|
|
92 |
result = subprocess.run(
|
93 |
-
[sys.executable,
|
94 |
capture_output=True,
|
95 |
text=True
|
96 |
)
|
97 |
-
|
98 |
-
if result.stdout:
|
99 |
-
print("Output:", result.stdout)
|
100 |
-
if result.stderr:
|
101 |
-
print("Errors:", result.stderr)
|
102 |
-
|
103 |
if os.path.exists(RESULT_VIDEO_PATH):
|
104 |
-
print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
|
105 |
return RESULT_VIDEO_PATH
|
106 |
else:
|
107 |
-
print("Error: Video file was not generated at 'results/result_voice.mp4'")
|
108 |
return None
|
109 |
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
def interfaz():
|
112 |
with gr.Blocks() as demo:
|
113 |
with gr.Row():
|
114 |
with gr.Column():
|
115 |
gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
|
116 |
-
|
117 |
-
estado_grabacion = gr.Textbox(label="
|
118 |
|
119 |
with gr.Column():
|
120 |
-
output_audio = gr.Audio(
|
121 |
-
output_audio_speech = gr.Audio(
|
122 |
-
video_resultado = gr.Video(
|
123 |
texto_transcripcion = gr.Textbox(label="Texto transcrito")
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
|
131 |
-
audio_generado = generar_audio_desde_texto()
|
132 |
-
video_path = procesar_video_audio()
|
133 |
-
|
134 |
-
# Ensure function always returns 5 outputs for Gradio, even in error cases
|
135 |
-
if video_path and audio_generado:
|
136 |
-
return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
|
137 |
-
else:
|
138 |
-
return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
|
139 |
-
"""
|
140 |
-
def flujo_completo():
|
141 |
-
try:
|
142 |
-
print("Inicio del flujo completo...")
|
143 |
-
# Grabar audio
|
144 |
-
audio_path, mensaje_grabacion = grabar_audio()
|
145 |
-
print("Audio grabado en:", audio_path)
|
146 |
-
# Transcribir audio
|
147 |
-
transcripcion = transcribir_con_progreso(audio_path)
|
148 |
-
print("Transcripción completada:", transcripcion)
|
149 |
-
|
150 |
-
#respuesta_openai = rtff(transcripcion)
|
151 |
-
respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
|
152 |
-
print("Respuesta generada por OpenAI")
|
153 |
-
|
154 |
-
# Generar audio desde texto
|
155 |
-
audio_generado = generar_audio_desde_texto()
|
156 |
-
print("Audio generado:", audio_generado)
|
157 |
-
# Procesar video y audio
|
158 |
-
video_path = procesar_video_audio()
|
159 |
-
print("Video procesado en:", video_path)
|
160 |
-
# Devolver resultados si todo fue exitoso
|
161 |
-
return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
|
162 |
-
|
163 |
-
except Exception as e:
|
164 |
-
# Imprime el error en la terminal y regresa mensajes de error a la interfaz
|
165 |
-
print("Error detectado en flujo completo:", str(e))
|
166 |
-
return (
|
167 |
-
"Error durante el flujo completo",
|
168 |
-
None, # Audio grabado
|
169 |
-
f"Error: {str(e)}", # Transcripción
|
170 |
-
None, # Audio generado
|
171 |
-
None # Video procesado
|
172 |
-
)
|
173 |
-
|
174 |
-
grabar_button.click(
|
175 |
-
flujo_completo,
|
176 |
-
outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
|
177 |
-
)
|
178 |
|
179 |
return demo
|
180 |
|
|
|
181 |
if __name__ == "__main__":
|
182 |
demo = interfaz()
|
183 |
-
demo.launch(
|
|
|
1 |
# interfaceV2.py
|
2 |
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
import shutil
|
5 |
import os
|
6 |
import subprocess
|
7 |
import sys
|
8 |
from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
|
9 |
+
from call_openai_api import moni as rtff
|
10 |
+
|
11 |
+
# Rutas
|
12 |
+
AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav")
|
13 |
+
VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4")
|
14 |
+
TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt")
|
15 |
+
RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav")
|
16 |
+
RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav")
|
17 |
+
RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4")
|
18 |
+
TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py")
|
19 |
+
RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py")
|
20 |
+
|
21 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def transcribir_con_progreso(audio_path):
|
23 |
progreso = gr.Progress()
|
24 |
+
progreso(0, "Iniciando transcripción...")
|
25 |
model_name = "openai/whisper-large"
|
26 |
+
progreso(25, "Cargando modelo Whisper...")
|
|
|
27 |
transcripcion = transcribe_audio(audio_path, model_name)
|
28 |
+
progreso(75, "Guardando transcripción...")
|
29 |
guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
|
30 |
+
progreso(100, "Transcripción completada.")
|
|
|
|
|
|
|
31 |
return transcripcion
|
32 |
|
33 |
+
|
34 |
def generar_audio_desde_texto():
|
|
|
35 |
result = subprocess.run(
|
36 |
[sys.executable, TEXT_TO_SPEECH_PATH],
|
37 |
capture_output=True,
|
|
|
39 |
)
|
40 |
if result.returncode != 0:
|
41 |
raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
|
|
|
|
|
|
|
|
|
42 |
|
43 |
if os.path.exists(RESULT_AUDIO_TEMP_PATH):
|
|
|
|
|
44 |
os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
|
45 |
shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
|
|
|
|
|
46 |
return RESULT_AUDIO_FINAL_PATH
|
47 |
else:
|
|
|
48 |
return None
|
49 |
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
def procesar_video_audio():
|
52 |
result = subprocess.run(
|
53 |
+
[sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
|
54 |
capture_output=True,
|
55 |
text=True
|
56 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
if os.path.exists(RESULT_VIDEO_PATH):
|
|
|
58 |
return RESULT_VIDEO_PATH
|
59 |
else:
|
|
|
60 |
return None
|
61 |
|
62 |
+
|
63 |
+
def flujo_completo(audio_file_path):
|
64 |
+
try:
|
65 |
+
shutil.copy(audio_file_path, AUDIO_RECORD_PATH)
|
66 |
+
transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
|
67 |
+
respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
|
68 |
+
audio_generado = generar_audio_desde_texto()
|
69 |
+
video_path = procesar_video_audio()
|
70 |
+
|
71 |
+
return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
|
72 |
+
|
73 |
+
except Exception as e:
|
74 |
+
return (
|
75 |
+
f"Error durante el flujo completo: {str(e)}",
|
76 |
+
None,
|
77 |
+
f"Error: {str(e)}",
|
78 |
+
None,
|
79 |
+
None
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
def interfaz():
|
84 |
with gr.Blocks() as demo:
|
85 |
with gr.Row():
|
86 |
with gr.Column():
|
87 |
gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
|
88 |
+
audio_input = gr.Audio(source="microphone", type="filepath", label="Graba tu voz")
|
89 |
+
estado_grabacion = gr.Textbox(label="Estado", interactive=False)
|
90 |
|
91 |
with gr.Column():
|
92 |
+
output_audio = gr.Audio(label="Audio grabado", interactive=False)
|
93 |
+
output_audio_speech = gr.Audio(label="Audio TTS", interactive=False)
|
94 |
+
video_resultado = gr.Video(label="Video procesado", interactive=False)
|
95 |
texto_transcripcion = gr.Textbox(label="Texto transcrito")
|
96 |
+
|
97 |
+
audio_input.change(
|
98 |
+
flujo_completo,
|
99 |
+
inputs=audio_input,
|
100 |
+
outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
|
101 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
return demo
|
104 |
|
105 |
+
|
106 |
if __name__ == "__main__":
|
107 |
demo = interfaz()
|
108 |
+
demo.launch()
|