File size: 3,692 Bytes
23edd2f
 
 
 
 
 
 
 
 
e41a473
23edd2f
 
e41a473
23edd2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e41a473
 
 
 
 
 
 
 
 
 
 
 
23edd2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e41a473
 
 
 
 
 
 
 
 
23edd2f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import streamlit as st
import requests
from pydub import AudioSegment
from pydub.playback import play
from io import BytesIO
import os
from gtts import gTTS
import speech_recognition as sr
from groq import Groq
import tempfile

# Set up Groq API
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)

def process_text(text):
    # Use Groq to generate a response
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "user", "content": text}
            ],
            model="llama3-8b-8192",
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error fetching Groq data: {e}"

def text_to_speech(text, lang='ur'):
    # Convert text to speech
    tts = gTTS(text=text, lang=lang)
    audio_file = BytesIO()
    tts.write_to_fp(audio_file)
    audio_file.seek(0)
    return audio_file

def audio_to_text(audio_file):
    # Convert audio to text
    recognizer = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(audio_file.read())
        temp_file.seek(0)
        with sr.AudioFile(temp_file.name) as source:
            audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data, language='ur')
                return text
            except sr.UnknownValueError:
                return "Could not understand audio"
            except sr.RequestError as e:
                return f"Could not request results; {e}"

# Streamlit UI
st.title("Urdu Voice Assistant")

mode = st.radio("Choose input method", ("Real-time Voice", "Upload Voice File"))

if mode == "Real-time Voice":
    st.write("Click the button and start speaking.")
    if st.button("Start Recording"):
        st.write("Recording... Please wait.")
        recognizer = sr.Recognizer()
        with sr.Microphone() as source:
            audio_data = recognizer.listen(source)
            st.write("Processing...")
            try:
                text = recognizer.recognize_google(audio_data, language='ur')
                st.write(f"You said: {text}")

                # Get response from Groq
                response_text = process_text(text)
                st.write(f"Response: {response_text}")

                # Convert response to audio
                audio_file = text_to_speech(response_text)
                st.audio(audio_file, format='audio/mp3')
            except sr.UnknownValueError:
                st.write("Sorry, could not understand the audio.")
            except sr.RequestError as e:
                st.write(f"Sorry, there was an error with the request: {e}")

elif mode == "Upload Voice File":
    uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
    if uploaded_file:
        st.write("Processing...")
        # Convert uploaded file to WAV format if needed
        if uploaded_file.type == "audio/mpeg":
            audio = AudioSegment.from_mp3(uploaded_file)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
                audio.export(temp_file.name, format="wav")
                with open(temp_file.name, "rb") as temp_file_content:
                    text = audio_to_text(temp_file_content)
        else:
            text = audio_to_text(uploaded_file)
        st.write(f"Transcribed Text: {text}")

        # Get response from Groq
        response_text = process_text(text)
        st.write(f"Response: {response_text}")

        # Convert response to audio
        audio_file = text_to_speech(response_text)
        st.audio(audio_file, format='audio/mp3')