Spaces:

Abbas0786
/

UrduVoiceToVoice

Build error

File size: 3,692 Bytes

import streamlit as st
import requests
from pydub import AudioSegment
from pydub.playback import play
from io import BytesIO
import os
from gtts import gTTS
import speech_recognition as sr
from groq import Groq
import tempfile

# Set up Groq API
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)

def process_text(text):
    # Use Groq to generate a response
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "user", "content": text}
            ],
            model="llama3-8b-8192",
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error fetching Groq data: {e}"

def text_to_speech(text, lang='ur'):
    # Convert text to speech
    tts = gTTS(text=text, lang=lang)
    audio_file = BytesIO()
    tts.write_to_fp(audio_file)
    audio_file.seek(0)
    return audio_file

def audio_to_text(audio_file):
    # Convert audio to text
    recognizer = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(audio_file.read())
        temp_file.seek(0)
        with sr.AudioFile(temp_file.name) as source:
            audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data, language='ur')
                return text
            except sr.UnknownValueError:
                return "Could not understand audio"
            except sr.RequestError as e:
                return f"Could not request results; {e}"

# Streamlit UI
st.title("Urdu Voice Assistant")

mode = st.radio("Choose input method", ("Real-time Voice", "Upload Voice File"))

if mode == "Real-time Voice":
    st.write("Click the button and start speaking.")
    if st.button("Start Recording"):
        st.write("Recording... Please wait.")
        recognizer = sr.Recognizer()
        with sr.Microphone() as source:
            audio_data = recognizer.listen(source)
            st.write("Processing...")
            try:
                text = recognizer.recognize_google(audio_data, language='ur')
                st.write(f"You said: {text}")

                # Get response from Groq
                response_text = process_text(text)
                st.write(f"Response: {response_text}")

                # Convert response to audio
                audio_file = text_to_speech(response_text)
                st.audio(audio_file, format='audio/mp3')
            except sr.UnknownValueError:
                st.write("Sorry, could not understand the audio.")
            except sr.RequestError as e:
                st.write(f"Sorry, there was an error with the request: {e}")

elif mode == "Upload Voice File":
    uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
    if uploaded_file:
        st.write("Processing...")
        # Convert uploaded file to WAV format if needed
        if uploaded_file.type == "audio/mpeg":
            audio = AudioSegment.from_mp3(uploaded_file)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
                audio.export(temp_file.name, format="wav")
                with open(temp_file.name, "rb") as temp_file_content:
                    text = audio_to_text(temp_file_content)
        else:
            text = audio_to_text(uploaded_file)
        st.write(f"Transcribed Text: {text}")

        # Get response from Groq
        response_text = process_text(text)
        st.write(f"Response: {response_text}")

        # Convert response to audio
        audio_file = text_to_speech(response_text)
        st.audio(audio_file, format='audio/mp3')