File size: 3,143 Bytes
23edd2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import requests
from pydub import AudioSegment
from pydub.playback import play
from io import BytesIO
import os
from gtts import gTTS
import speech_recognition as sr
from groq import Groq

# Set up Groq API
groq_api_key = "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"
client = Groq(api_key=groq_api_key)

def process_text(text):
    # Use Groq to generate a response
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "user", "content": text}
            ],
            model="llama3-8b-8192",
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error fetching Groq data: {e}"

def text_to_speech(text, lang='ur'):
    # Convert text to speech
    tts = gTTS(text=text, lang=lang)
    audio_file = BytesIO()
    tts.write_to_fp(audio_file)
    audio_file.seek(0)
    return audio_file

def audio_to_text(audio_file):
    # Convert audio to text
    recognizer = sr.Recognizer()
    audio = sr.AudioFile(audio_file)
    with audio as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data, language='ur')
            return text
        except sr.UnknownValueError:
            return "Could not understand audio"
        except sr.RequestError as e:
            return f"Could not request results; {e}"

# Streamlit UI
st.title("Urdu Voice Assistant")

mode = st.radio("Choose input method", ("Real-time Voice", "Upload Voice File"))

if mode == "Real-time Voice":
    st.write("Click the button and start speaking.")
    if st.button("Start Recording"):
        st.write("Recording... Please wait.")
        recognizer = sr.Recognizer()
        with sr.Microphone() as source:
            audio_data = recognizer.listen(source)
            st.write("Processing...")
            try:
                text = recognizer.recognize_google(audio_data, language='ur')
                st.write(f"You said: {text}")

                # Get response from Groq
                response_text = process_text(text)
                st.write(f"Response: {response_text}")

                # Convert response to audio
                audio_file = text_to_speech(response_text)
                st.audio(audio_file, format='audio/mp3')
            except sr.UnknownValueError:
                st.write("Sorry, could not understand the audio.")
            except sr.RequestError as e:
                st.write(f"Sorry, there was an error with the request: {e}")

elif mode == "Upload Voice File":
    uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
    if uploaded_file:
        st.write("Processing...")
        # Convert the uploaded file to text
        text = audio_to_text(uploaded_file)
        st.write(f"Transcribed Text: {text}")

        # Get response from Groq
        response_text = process_text(text)
        st.write(f"Response: {response_text}")

        # Convert response to audio
        audio_file = text_to_speech(response_text)
        st.audio(audio_file, format='audio/mp3')