|
import streamlit as st |
|
from src.for_streamlit.spt import SpeechToText |
|
from src.for_streamlit.texttotext import ConversationHandler |
|
from src.for_streamlit.texttospeech import TextToSpeech |
|
from streamlit_mic_recorder import mic_recorder |
|
|
|
st.title("ποΈ Voice to Voice ") |
|
st.write("Click the button below to start recording.") |
|
|
|
|
|
@st.cache_resource |
|
def load_speech_to_text(): |
|
return SpeechToText() |
|
|
|
@st.cache_resource |
|
def load_conversation_handler(): |
|
return ConversationHandler() |
|
|
|
@st.cache_resource |
|
def load_text_to_speech(): |
|
return TextToSpeech() |
|
|
|
|
|
speech_to_text = load_speech_to_text() |
|
conversation_handler = load_conversation_handler() |
|
text_to_speech = load_text_to_speech() |
|
|
|
|
|
audio_data = mic_recorder() |
|
|
|
def main(): |
|
if audio_data and 'bytes' in audio_data: |
|
audio_bytes = audio_data['bytes'] |
|
|
|
|
|
st.audio(audio_bytes, format="audio/wav") |
|
st.write("Transcribing...") |
|
|
|
|
|
transcription = speech_to_text.record_and_transcribe(audio_bytes) |
|
if transcription: |
|
st.success("Transcription:") |
|
st.write(transcription) |
|
|
|
st.write("Generating response...") |
|
response = conversation_handler.give_response(transcription) |
|
|
|
if response: |
|
st.success("Response:") |
|
st.write(response.content) |
|
|
|
|
|
audio_buffer = text_to_speech.synthesize(response.content) |
|
if audio_buffer: |
|
st.success("Generated audio:") |
|
st.audio(audio_buffer, format="audio/wav") |
|
else: |
|
st.error("No audio available.") |
|
else: |
|
st.error("No response available.") |
|
else: |
|
st.error("No transcription available.") |
|
else: |
|
st.warning("Please record some audio.") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|