import streamlit as st
from src.for_streamlit.spt import SpeechToText
from src.for_streamlit.texttotext import ConversationHandler
from src.for_streamlit.texttospeech import TextToSpeech
from streamlit_mic_recorder import mic_recorder

st.title("🎙️ Voice to Voice ")
st.write("Click the button below to start recording.")

# Cache the models to prevent reloading
@st.cache_resource
def load_speech_to_text():
    return SpeechToText()

@st.cache_resource
def load_conversation_handler():
    return ConversationHandler()

@st.cache_resource
def load_text_to_speech():
    return TextToSpeech()

# Load models once
speech_to_text = load_speech_to_text()
conversation_handler = load_conversation_handler()
text_to_speech = load_text_to_speech()

# Capture microphone input
audio_data = mic_recorder()

def main():
    if audio_data and 'bytes' in audio_data:
        audio_bytes = audio_data['bytes']
        
        # Play recorded audio
        st.audio(audio_bytes, format="audio/wav")
        st.write("Transcribing...")

        # Transcribe the audio
        transcription = speech_to_text.record_and_transcribe(audio_bytes)
        if transcription:
            st.success("Transcription:")
            st.write(transcription)

            st.write("Generating response...")
            response = conversation_handler.give_response(transcription)

            if response:
                st.success("Response:")
                st.write(response.content)

                # Convert response text to speech
                audio_buffer = text_to_speech.synthesize(response.content)
                if audio_buffer:
                    st.success("Generated audio:")
                    st.audio(audio_buffer, format="audio/wav")
                else:
                    st.error("No audio available.")
            else:
                st.error("No response available.")
        else:
            st.error("No transcription available.")
    else:
        st.warning("Please record some audio.")

if __name__ == "__main__":
    main()