fireedman commited on
Commit
5959802
·
verified ·
1 Parent(s): d4757ae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # interfaceV2.py
2
+
3
+ import gradio as gr
4
+ import sounddevice as sd
5
+ from scipy.io.wavfile import write
6
+ import tempfile
7
+ import shutil
8
+ import os
9
+ import subprocess
10
+ import sys
11
+ from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
12
+ from call_openai_api import moni as rtff # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
13
+
14
+
15
+ # Paths to files (adjusted as per your specified structure)
16
+ AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
17
+ #VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
18
+ VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
19
+ #TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
20
+ TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
21
+ RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
22
+ RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
23
+ RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
24
+ TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
25
+
26
+ # Function to record 8-second audio
27
+ def grabar_audio(duration=8, sample_rate=44100):
28
+ print("Starting recording...")
29
+ audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
30
+ print(f"Recording in progress for {duration} seconds...")
31
+ sd.wait()
32
+ print("Recording completed.")
33
+
34
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
35
+ write(temp_audio.name, sample_rate, audio_data)
36
+ print("Audio temporarily saved at:", temp_audio.name)
37
+ temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
38
+ os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
39
+ shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
40
+ print(f"Recording copied to: {AUDIO_RECORD_PATH}")
41
+
42
+ return AUDIO_RECORD_PATH, "Recording completed."
43
+
44
+ # Function to transcribe audio with Whisper
45
+ def transcribir_con_progreso(audio_path):
46
+ progreso = gr.Progress()
47
+ progreso(0, "Starting transcription...")
48
+ model_name = "openai/whisper-large"
49
+ progreso(25, "Loading Whisper model...")
50
+
51
+ transcripcion = transcribe_audio(audio_path, model_name)
52
+ progreso(75, "Saving transcription...")
53
+ guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
54
+ progreso(100, "Transcription completed.")
55
+ if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
56
+ raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
57
+
58
+ return transcripcion
59
+
60
+ # Function to convert text to audio using text_to_speech.py
61
+ def generar_audio_desde_texto():
62
+ print("Generating audio from text...")
63
+ result = subprocess.run(
64
+ [sys.executable, TEXT_TO_SPEECH_PATH],
65
+ capture_output=True,
66
+ text=True
67
+ )
68
+ if result.returncode != 0:
69
+ raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
70
+ if result.stdout:
71
+ print("Output:", result.stdout)
72
+ if result.stderr:
73
+ print("Errors:", result.stderr)
74
+
75
+ if os.path.exists(RESULT_AUDIO_TEMP_PATH):
76
+ print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
77
+
78
+ os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
79
+ shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
80
+ print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
81
+
82
+ return RESULT_AUDIO_FINAL_PATH
83
+ else:
84
+ print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
85
+ return None
86
+
87
+ # Function to process video and audio using run_inference.py with the generated audio file
88
+ def procesar_video_audio():
89
+ print("Starting video and audio processing...")
90
+ run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
91
+
92
+ result = subprocess.run(
93
+ [sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
94
+ capture_output=True,
95
+ text=True
96
+ )
97
+
98
+ if result.stdout:
99
+ print("Output:", result.stdout)
100
+ if result.stderr:
101
+ print("Errors:", result.stderr)
102
+
103
+ if os.path.exists(RESULT_VIDEO_PATH):
104
+ print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
105
+ return RESULT_VIDEO_PATH
106
+ else:
107
+ print("Error: Video file was not generated at 'results/result_voice.mp4'")
108
+ return None
109
+
110
+ # Gradio Interface Configuration
111
+ def interfaz():
112
+ with gr.Blocks() as demo:
113
+ with gr.Row():
114
+ with gr.Column():
115
+ gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
116
+ grabar_button = gr.Button("Comenzando la grabacion de audio")
117
+ estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
118
+
119
+ with gr.Column():
120
+ output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
121
+ output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
122
+ video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
123
+ texto_transcripcion = gr.Textbox(label="Texto transcrito")
124
+ progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
125
+
126
+ # Full flow: recording, transcription, text-to-speech, and video processing
127
+ """
128
+ def flujo_completo():
129
+ _, mensaje_grabacion = grabar_audio()
130
+ transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
131
+ audio_generado = generar_audio_desde_texto()
132
+ video_path = procesar_video_audio()
133
+
134
+ # Ensure function always returns 5 outputs for Gradio, even in error cases
135
+ if video_path and audio_generado:
136
+ return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
137
+ else:
138
+ return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
139
+ """
140
+ def flujo_completo():
141
+ try:
142
+ print("Inicio del flujo completo...")
143
+ # Grabar audio
144
+ audio_path, mensaje_grabacion = grabar_audio()
145
+ print("Audio grabado en:", audio_path)
146
+ # Transcribir audio
147
+ transcripcion = transcribir_con_progreso(audio_path)
148
+ print("Transcripción completada:", transcripcion)
149
+
150
+ #respuesta_openai = rtff(transcripcion)
151
+ respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
152
+ print("Respuesta generada por OpenAI")
153
+
154
+ # Generar audio desde texto
155
+ audio_generado = generar_audio_desde_texto()
156
+ print("Audio generado:", audio_generado)
157
+ # Procesar video y audio
158
+ video_path = procesar_video_audio()
159
+ print("Video procesado en:", video_path)
160
+ # Devolver resultados si todo fue exitoso
161
+ return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
162
+
163
+ except Exception as e:
164
+ # Imprime el error en la terminal y regresa mensajes de error a la interfaz
165
+ print("Error detectado en flujo completo:", str(e))
166
+ return (
167
+ "Error durante el flujo completo",
168
+ None, # Audio grabado
169
+ f"Error: {str(e)}", # Transcripción
170
+ None, # Audio generado
171
+ None # Video procesado
172
+ )
173
+
174
+ grabar_button.click(
175
+ flujo_completo,
176
+ outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
177
+ )
178
+
179
+ return demo
180
+
181
+ if __name__ == "__main__":
182
+ demo = interfaz()
183
+ demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])