fireedman commited on
Commit
d71aaa4
·
verified ·
1 Parent(s): d463f10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -129
app.py CHANGED
@@ -1,65 +1,37 @@
1
  # interfaceV2.py
2
 
3
  import gradio as gr
4
- import sounddevice as sd
5
- from scipy.io.wavfile import write
6
- import tempfile
7
  import shutil
8
  import os
9
  import subprocess
10
  import sys
11
  from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
12
- from call_openai_api import moni as rtff # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
13
-
14
-
15
- # Paths to files (adjusted as per your specified structure)
16
- AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
17
- #VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
18
- VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
19
- #TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
20
- TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
21
- RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
22
- RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
23
- RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
24
- TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
25
-
26
- # Function to record 8-second audio
27
- def grabar_audio(duration=8, sample_rate=44100):
28
- print("Starting recording...")
29
- audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
30
- print(f"Recording in progress for {duration} seconds...")
31
- sd.wait()
32
- print("Recording completed.")
33
-
34
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
35
- write(temp_audio.name, sample_rate, audio_data)
36
- print("Audio temporarily saved at:", temp_audio.name)
37
- temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
38
- os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
39
- shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
40
- print(f"Recording copied to: {AUDIO_RECORD_PATH}")
41
-
42
- return AUDIO_RECORD_PATH, "Recording completed."
43
-
44
- # Function to transcribe audio with Whisper
45
  def transcribir_con_progreso(audio_path):
46
  progreso = gr.Progress()
47
- progreso(0, "Starting transcription...")
48
  model_name = "openai/whisper-large"
49
- progreso(25, "Loading Whisper model...")
50
-
51
  transcripcion = transcribe_audio(audio_path, model_name)
52
- progreso(75, "Saving transcription...")
53
  guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
54
- progreso(100, "Transcription completed.")
55
- if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
56
- raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
57
-
58
  return transcripcion
59
 
60
- # Function to convert text to audio using text_to_speech.py
61
  def generar_audio_desde_texto():
62
- print("Generating audio from text...")
63
  result = subprocess.run(
64
  [sys.executable, TEXT_TO_SPEECH_PATH],
65
  capture_output=True,
@@ -67,117 +39,70 @@ def generar_audio_desde_texto():
67
  )
68
  if result.returncode != 0:
69
  raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
70
- if result.stdout:
71
- print("Output:", result.stdout)
72
- if result.stderr:
73
- print("Errors:", result.stderr)
74
 
75
  if os.path.exists(RESULT_AUDIO_TEMP_PATH):
76
- print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
77
-
78
  os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
79
  shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
80
- print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
81
-
82
  return RESULT_AUDIO_FINAL_PATH
83
  else:
84
- print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
85
  return None
86
 
87
- # Function to process video and audio using run_inference.py with the generated audio file
88
- def procesar_video_audio():
89
- print("Starting video and audio processing...")
90
- run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
91
 
 
92
  result = subprocess.run(
93
- [sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
94
  capture_output=True,
95
  text=True
96
  )
97
-
98
- if result.stdout:
99
- print("Output:", result.stdout)
100
- if result.stderr:
101
- print("Errors:", result.stderr)
102
-
103
  if os.path.exists(RESULT_VIDEO_PATH):
104
- print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
105
  return RESULT_VIDEO_PATH
106
  else:
107
- print("Error: Video file was not generated at 'results/result_voice.mp4'")
108
  return None
109
 
110
- # Gradio Interface Configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def interfaz():
112
  with gr.Blocks() as demo:
113
  with gr.Row():
114
  with gr.Column():
115
  gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
116
- grabar_button = gr.Button("Comenzando la grabacion de audio")
117
- estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
118
 
119
  with gr.Column():
120
- output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
121
- output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
122
- video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
123
  texto_transcripcion = gr.Textbox(label="Texto transcrito")
124
- progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
125
-
126
- # Full flow: recording, transcription, text-to-speech, and video processing
127
- """
128
- def flujo_completo():
129
- _, mensaje_grabacion = grabar_audio()
130
- transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
131
- audio_generado = generar_audio_desde_texto()
132
- video_path = procesar_video_audio()
133
-
134
- # Ensure function always returns 5 outputs for Gradio, even in error cases
135
- if video_path and audio_generado:
136
- return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
137
- else:
138
- return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
139
- """
140
- def flujo_completo():
141
- try:
142
- print("Inicio del flujo completo...")
143
- # Grabar audio
144
- audio_path, mensaje_grabacion = grabar_audio()
145
- print("Audio grabado en:", audio_path)
146
- # Transcribir audio
147
- transcripcion = transcribir_con_progreso(audio_path)
148
- print("Transcripción completada:", transcripcion)
149
-
150
- #respuesta_openai = rtff(transcripcion)
151
- respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
152
- print("Respuesta generada por OpenAI")
153
-
154
- # Generar audio desde texto
155
- audio_generado = generar_audio_desde_texto()
156
- print("Audio generado:", audio_generado)
157
- # Procesar video y audio
158
- video_path = procesar_video_audio()
159
- print("Video procesado en:", video_path)
160
- # Devolver resultados si todo fue exitoso
161
- return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
162
-
163
- except Exception as e:
164
- # Imprime el error en la terminal y regresa mensajes de error a la interfaz
165
- print("Error detectado en flujo completo:", str(e))
166
- return (
167
- "Error durante el flujo completo",
168
- None, # Audio grabado
169
- f"Error: {str(e)}", # Transcripción
170
- None, # Audio generado
171
- None # Video procesado
172
- )
173
-
174
- grabar_button.click(
175
- flujo_completo,
176
- outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
177
- )
178
 
179
  return demo
180
 
 
181
  if __name__ == "__main__":
182
  demo = interfaz()
183
- demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])
 
1
  # interfaceV2.py
2
 
3
  import gradio as gr
 
 
 
4
  import shutil
5
  import os
6
  import subprocess
7
  import sys
8
  from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
9
+ from call_openai_api import moni as rtff
10
+
11
+ # Rutas
12
+ AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav")
13
+ VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4")
14
+ TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt")
15
+ RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav")
16
+ RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav")
17
+ RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4")
18
+ TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py")
19
+ RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py")
20
+
21
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def transcribir_con_progreso(audio_path):
23
  progreso = gr.Progress()
24
+ progreso(0, "Iniciando transcripción...")
25
  model_name = "openai/whisper-large"
26
+ progreso(25, "Cargando modelo Whisper...")
 
27
  transcripcion = transcribe_audio(audio_path, model_name)
28
+ progreso(75, "Guardando transcripción...")
29
  guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
30
+ progreso(100, "Transcripción completada.")
 
 
 
31
  return transcripcion
32
 
33
+
34
  def generar_audio_desde_texto():
 
35
  result = subprocess.run(
36
  [sys.executable, TEXT_TO_SPEECH_PATH],
37
  capture_output=True,
 
39
  )
40
  if result.returncode != 0:
41
  raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
 
 
 
 
42
 
43
  if os.path.exists(RESULT_AUDIO_TEMP_PATH):
 
 
44
  os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
45
  shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
 
 
46
  return RESULT_AUDIO_FINAL_PATH
47
  else:
 
48
  return None
49
 
 
 
 
 
50
 
51
+ def procesar_video_audio():
52
  result = subprocess.run(
53
+ [sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
54
  capture_output=True,
55
  text=True
56
  )
 
 
 
 
 
 
57
  if os.path.exists(RESULT_VIDEO_PATH):
 
58
  return RESULT_VIDEO_PATH
59
  else:
 
60
  return None
61
 
62
+
63
+ def flujo_completo(audio_file_path):
64
+ try:
65
+ shutil.copy(audio_file_path, AUDIO_RECORD_PATH)
66
+ transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
67
+ respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
68
+ audio_generado = generar_audio_desde_texto()
69
+ video_path = procesar_video_audio()
70
+
71
+ return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
72
+
73
+ except Exception as e:
74
+ return (
75
+ f"Error durante el flujo completo: {str(e)}",
76
+ None,
77
+ f"Error: {str(e)}",
78
+ None,
79
+ None
80
+ )
81
+
82
+
83
  def interfaz():
84
  with gr.Blocks() as demo:
85
  with gr.Row():
86
  with gr.Column():
87
  gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
88
+ audio_input = gr.Audio(source="microphone", type="filepath", label="Graba tu voz")
89
+ estado_grabacion = gr.Textbox(label="Estado", interactive=False)
90
 
91
  with gr.Column():
92
+ output_audio = gr.Audio(label="Audio grabado", interactive=False)
93
+ output_audio_speech = gr.Audio(label="Audio TTS", interactive=False)
94
+ video_resultado = gr.Video(label="Video procesado", interactive=False)
95
  texto_transcripcion = gr.Textbox(label="Texto transcrito")
96
+
97
+ audio_input.change(
98
+ flujo_completo,
99
+ inputs=audio_input,
100
+ outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
101
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  return demo
104
 
105
+
106
  if __name__ == "__main__":
107
  demo = interfaz()
108
+ demo.launch()