Spaces:

fireedman
/

EKNA_V1

Running

App Files Files Community

EKNA_V1 / src /interfaceV2.py

fireedman

Primer commit, creo que faltan los modelos pesados

d4757ae 4 days ago

raw

history blame contribute delete

8.53 kB

	# interfaceV2.py

	import gradio as gr
	import sounddevice as sd
	from scipy.io.wavfile import write
	import tempfile
	import shutil
	import os
	import subprocess
	import sys
	from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
	from call_openai_api import moni as rtff # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio


	# Paths to files (adjusted as per your specified structure)
	AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
	#VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
	VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
	#TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
	TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
	RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
	RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
	RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
	TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")

	# Function to record 8-second audio
	def grabar_audio(duration=8, sample_rate=44100):
	print("Starting recording...")
	audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
	print(f"Recording in progress for {duration} seconds...")
	sd.wait()
	print("Recording completed.")

	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	write(temp_audio.name, sample_rate, audio_data)
	print("Audio temporarily saved at:", temp_audio.name)
	temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
	os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
	shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
	print(f"Recording copied to: {AUDIO_RECORD_PATH}")

	return AUDIO_RECORD_PATH, "Recording completed."

	# Function to transcribe audio with Whisper
	def transcribir_con_progreso(audio_path):
	progreso = gr.Progress()
	progreso(0, "Starting transcription...")
	model_name = "openai/whisper-large"
	progreso(25, "Loading Whisper model...")

	transcripcion = transcribe_audio(audio_path, model_name)
	progreso(75, "Saving transcription...")
	guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
	progreso(100, "Transcription completed.")
	if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
	raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")

	return transcripcion

	# Function to convert text to audio using text_to_speech.py
	def generar_audio_desde_texto():
	print("Generating audio from text...")
	result = subprocess.run(
	[sys.executable, TEXT_TO_SPEECH_PATH],
	capture_output=True,
	text=True
	)
	if result.returncode != 0:
	raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
	if result.stdout:
	print("Output:", result.stdout)
	if result.stderr:
	print("Errors:", result.stderr)

	if os.path.exists(RESULT_AUDIO_TEMP_PATH):
	print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")

	os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
	shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
	print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")

	return RESULT_AUDIO_FINAL_PATH
	else:
	print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
	return None

	# Function to process video and audio using run_inference.py with the generated audio file
	def procesar_video_audio():
	print("Starting video and audio processing...")
	run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")

	result = subprocess.run(
	[sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
	capture_output=True,
	text=True
	)

	if result.stdout:
	print("Output:", result.stdout)
	if result.stderr:
	print("Errors:", result.stderr)

	if os.path.exists(RESULT_VIDEO_PATH):
	print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
	return RESULT_VIDEO_PATH
	else:
	print("Error: Video file was not generated at 'results/result_voice.mp4'")
	return None

	# Gradio Interface Configuration
	def interfaz():
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
	grabar_button = gr.Button("Comenzando la grabacion de audio")
	estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)

	with gr.Column():
	output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
	output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
	video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
	texto_transcripcion = gr.Textbox(label="Texto transcrito")
	progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)

	# Full flow: recording, transcription, text-to-speech, and video processing
	"""
	def flujo_completo():
	_, mensaje_grabacion = grabar_audio()
	transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
	audio_generado = generar_audio_desde_texto()
	video_path = procesar_video_audio()

	# Ensure function always returns 5 outputs for Gradio, even in error cases
	if video_path and audio_generado:
	return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
	else:
	return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
	"""
	def flujo_completo():
	try:
	print("Inicio del flujo completo...")
	# Grabar audio
	audio_path, mensaje_grabacion = grabar_audio()
	print("Audio grabado en:", audio_path)
	# Transcribir audio
	transcripcion = transcribir_con_progreso(audio_path)
	print("Transcripción completada:", transcripcion)

	#respuesta_openai = rtff(transcripcion)
	respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
	print("Respuesta generada por OpenAI")

	# Generar audio desde texto
	audio_generado = generar_audio_desde_texto()
	print("Audio generado:", audio_generado)
	# Procesar video y audio
	video_path = procesar_video_audio()
	print("Video procesado en:", video_path)
	# Devolver resultados si todo fue exitoso
	return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path

	except Exception as e:
	# Imprime el error en la terminal y regresa mensajes de error a la interfaz
	print("Error detectado en flujo completo:", str(e))
	return (
	"Error durante el flujo completo",
	None, # Audio grabado
	f"Error: {str(e)}", # Transcripción
	None, # Audio generado
	None # Video procesado
	)

	grabar_button.click(
	flujo_completo,
	outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
	)

	return demo

	if __name__ == "__main__":
	demo = interfaz()
	demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])