Spaces:
Running
Running
| import os | |
| import streamlit as st | |
| import speech_recognition as sr | |
| from gtts import gTTS | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| import tempfile | |
| import base64 | |
| # Load environment variables | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| # Initialize Groq Client | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Function to transcribe voice to text | |
| def transcribe_audio(audio_file): | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio_file) as source: | |
| audio_data = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio_data) | |
| return text | |
| except sr.UnknownValueError: | |
| return "Could not understand the audio" | |
| except sr.RequestError: | |
| return "Error with speech recognition service" | |
| # Function to generate speech from text | |
| def text_to_speech(text): | |
| tts = gTTS(text=text, lang="en") | |
| temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name | |
| tts.save(temp_audio_path) | |
| return temp_audio_path | |
| # Function to get AI insights from LLaMA 3 70B | |
| def get_ai_response(text): | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": "You are an advanced AI that helps with speech processing."}, | |
| {"role": "user", "content": f"Analyze this text: {text}"} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # Streamlit UI | |
| st.title("ποΈ AI Voice Converter: Speech-to-Text & Text-to-Speech") | |
| st.write("Convert voice into text and generate AI-powered speech.") | |
| # Voice-to-Text Section | |
| st.subheader("π€ Voice-to-Text") | |
| audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"]) | |
| if audio_file: | |
| with open("temp.wav", "wb") as f: | |
| f.write(audio_file.getbuffer()) | |
| st.audio(audio_file, format="audio/wav") | |
| if st.button("Transcribe Audio"): | |
| transcribed_text = transcribe_audio("temp.wav") | |
| st.write("**Transcribed Text:**", transcribed_text) | |
| # AI insights | |
| ai_insights = get_ai_response(transcribed_text) | |
| st.write("**AI Analysis:**", ai_insights) | |
| # Text-to-Voice Section | |
| st.subheader("π Text-to-Speech") | |
| input_text = st.text_area("Enter text to convert into speech") | |
| if st.button("Generate Speech"): | |
| if input_text.strip(): | |
| audio_path = text_to_speech(input_text) | |
| # Convert audio file to base64 for download | |
| with open(audio_path, "rb") as f: | |
| audio_bytes = f.read() | |
| b64 = base64.b64encode(audio_bytes).decode() | |
| # Audio player and download button | |
| st.audio(audio_path, format="audio/mp3") | |
| st.markdown(f'<a href="data:audio/mp3;base64,{b64}" download="output.mp3">Download Speech</a>', unsafe_allow_html=True) | |
| else: | |
| st.error("Please enter text to generate speech.") |