import streamlit as st from src.for_streamlit.spt import SpeechToText from src.for_streamlit.texttotext import ConversationHandler from src.for_streamlit.texttospeech import TextToSpeech from streamlit_mic_recorder import mic_recorder st.title("🎙️ Voice to Voice ") st.write("Click the button below to start recording.") # Cache the models to prevent reloading @st.cache_resource def load_speech_to_text(): return SpeechToText() @st.cache_resource def load_conversation_handler(): return ConversationHandler() @st.cache_resource def load_text_to_speech(): return TextToSpeech() # Load models once speech_to_text = load_speech_to_text() conversation_handler = load_conversation_handler() text_to_speech = load_text_to_speech() # Capture microphone input audio_data = mic_recorder() def main(): if audio_data and 'bytes' in audio_data: audio_bytes = audio_data['bytes'] # Play recorded audio st.audio(audio_bytes, format="audio/wav") st.write("Transcribing...") # Transcribe the audio transcription = speech_to_text.record_and_transcribe(audio_bytes) if transcription: st.success("Transcription:") st.write(transcription) st.write("Generating response...") response = conversation_handler.give_response(transcription) if response: st.success("Response:") st.write(response.content) # Convert response text to speech audio_buffer = text_to_speech.synthesize(response.content) if audio_buffer: st.success("Generated audio:") st.audio(audio_buffer, format="audio/wav") else: st.error("No audio available.") else: st.error("No response available.") else: st.error("No transcription available.") else: st.warning("Please record some audio.") if __name__ == "__main__": main()