import os import streamlit as st import speech_recognition as sr from gtts import gTTS from groq import Groq from dotenv import load_dotenv import tempfile import base64 # Load environment variables load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Initialize Groq Client client = Groq(api_key=GROQ_API_KEY) # Function to transcribe voice to text def transcribe_audio(audio_file): recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) return text except sr.UnknownValueError: return "Could not understand the audio" except sr.RequestError: return "Error with speech recognition service" # Function to generate speech from text def text_to_speech(text): tts = gTTS(text=text, lang="pt") temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name tts.save(temp_audio_path) return temp_audio_path # Function to get AI insights from LLaMA 3 70B def get_ai_response(text): response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ {"role": "system", "content": "You are an advanced AI that helps with speech processing."}, {"role": "user", "content": f"Analyze this text: {text}"} ] ) return response.choices[0].message.content # Streamlit UI st.title("🎙️ AI Voice Converter: Speech-to-Text & Text-to-Speech") st.write("Convert voice into text and generate AI-powered speech.") # Voice-to-Text Section st.subheader("🎤 Voice-to-Text") audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"]) if audio_file: with open("temp.wav", "wb") as f: f.write(audio_file.getbuffer()) st.audio(audio_file, format="audio/wav") if st.button("Transcribe Audio"): transcribed_text = transcribe_audio("temp.wav") st.write("**Transcribed Text:**", transcribed_text) # AI insights ai_insights = get_ai_response(transcribed_text) st.write("**AI Analysis:**", ai_insights) # Text-to-Voice Section st.subheader("📝 Text-to-Speech") input_text = st.text_area("Enter text to convert into speech") if st.button("Generate Speech"): if input_text.strip(): audio_path = text_to_speech(input_text) # Convert audio file to base64 for download with open(audio_path, "rb") as f: audio_bytes = f.read() b64 = base64.b64encode(audio_bytes).decode() # Audio player and download button st.audio(audio_path, format="audio/mp3") st.markdown(f'Download Speech', unsafe_allow_html=True) else: st.error("Please enter text to generate speech.")