mohi / app.py
puzan789's picture
updated
2d31940
import streamlit as st
from src.for_streamlit.spt import SpeechToText
from src.for_streamlit.texttotext import ConversationHandler
from src.for_streamlit.texttospeech import TextToSpeech
from streamlit_mic_recorder import mic_recorder
st.title("πŸŽ™οΈ Voice to Voice ")
st.write("Click the button below to start recording.")
# Cache the models to prevent reloading
@st.cache_resource
def load_speech_to_text():
return SpeechToText()
@st.cache_resource
def load_conversation_handler():
return ConversationHandler()
@st.cache_resource
def load_text_to_speech():
return TextToSpeech()
# Load models once
speech_to_text = load_speech_to_text()
conversation_handler = load_conversation_handler()
text_to_speech = load_text_to_speech()
# Capture microphone input
audio_data = mic_recorder()
def main():
if audio_data and 'bytes' in audio_data:
audio_bytes = audio_data['bytes']
# Play recorded audio
st.audio(audio_bytes, format="audio/wav")
st.write("Transcribing...")
# Transcribe the audio
transcription = speech_to_text.record_and_transcribe(audio_bytes)
if transcription:
st.success("Transcription:")
st.write(transcription)
st.write("Generating response...")
response = conversation_handler.give_response(transcription)
if response:
st.success("Response:")
st.write(response.content)
# Convert response text to speech
audio_buffer = text_to_speech.synthesize(response.content)
if audio_buffer:
st.success("Generated audio:")
st.audio(audio_buffer, format="audio/wav")
else:
st.error("No audio available.")
else:
st.error("No response available.")
else:
st.error("No transcription available.")
else:
st.warning("Please record some audio.")
if __name__ == "__main__":
main()