zohaibterminator commited on
Commit
c492cb8
·
verified ·
1 Parent(s): 12c19a5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -90
app.py CHANGED
@@ -1,91 +1,182 @@
1
- import streamlit as st
2
- from dotenv import load_dotenv
3
- from audiorecorder import audiorecorder
4
- from langchain_core.messages import HumanMessage, AIMessage
5
- import requests
6
- from transformers import pipeline
7
- from gtts import gTTS
8
- import io
9
-
10
- # Load environment variables (if any)
11
- load_dotenv()
12
-
13
- user_id = "1" # example user id
14
-
15
- # Initialize the wav2vec2 model for Urdu speech-to-text
16
- pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
17
-
18
- def get_response(user_input):
19
- '''
20
- Takes user_input in English and invokes the infer API for response.
21
-
22
- Parameters:
23
- user_input (string): User Query in English.
24
- Returns:
25
- res (string): Response from the LLM.
26
- '''
27
- url = f"http://127.0.0.1:8000/infer/{user_id}"
28
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
29
- data = {"user_input": user_input}
30
- response = requests.post(url, headers=headers, data=data)
31
- res = response.json()
32
- return res["data"]
33
-
34
-
35
- def text_to_speech(text, lang='ur'):
36
- '''
37
- Converts text to speech using gTTS.
38
-
39
- Parameters:
40
- text (string): Text to be converted to speech.
41
- lang (string): Language for the speech synthesis. Default is 'ur' (Urdu).
42
- Returns:
43
- response_audio_io (BytesIO): BytesIO object containing the audio data.
44
- '''
45
- tts = gTTS(text, lang=lang)
46
- response_audio_io = io.BytesIO()
47
- tts.write_to_fp(response_audio_io)
48
- response_audio_io.seek(0)
49
- return response_audio_io
50
-
51
-
52
- st.set_page_config(page_title="Urdu Virtual Assistant", page_icon="🤖") # set the page title and icon
53
-
54
- col1, col2 = st.columns([1, 5]) # Adjust the ratio to control the logo and title sizes
55
-
56
- # Display the logo in the first column
57
- with col1:
58
- st.image("bolo_logo-removebg-preview.png", width=100) # Adjust the width as needed
59
-
60
- # Display the title in the second column
61
- with col2:
62
- st.title("Urdu Virtual Assistant") # set the main title of the application
63
- st.write("This application is a comprehensive speech-to-speech model designed to understand and respond in Urdu. It not only handles natural conversations but also has the capability to access and provide real-time information by integrating with the Tavily search engine. Whether you're asking for the weather or engaging in everyday dialogue, this assistant delivers accurate and context-aware responses, all in Urdu.")
64
-
65
- # Add a text input box
66
- audio = audiorecorder()
67
-
68
- if len(audio) > 0:
69
- # Save the audio to a file
70
- audio.export("audio.wav", format="wav")
71
-
72
- # Convert audio to text using the wav2vec2 model
73
- with open("audio.wav", "rb") as f:
74
- audio_bytes = f.read()
75
-
76
- # Process the audio file
77
- result = pipe("audio.wav")
78
- user_query = result["text"]
79
-
80
- with st.chat_message("Human"): # create the message box for human input
81
- st.audio(audio.export().read()) # display the audio player
82
- st.markdown(user_query)
83
-
84
- # Get response from the LLM
85
- response_text = get_response(user_input=user_query)
86
- response_audio = text_to_speech(response_text, lang='ur')
87
-
88
- # Play the generated speech in the app
89
- with st.chat_message("AI"):
90
- st.audio(response_audio.read(), format='audio/mp3')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  st.markdown(response_text)
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from audiorecorder import audiorecorder
4
+ from langchain_core.messages import HumanMessage, AIMessage
5
+ import requests
6
+ from transformers import pipeline
7
+ from gtts import gTTS
8
+ import io
9
+ from langchain_core.runnables.base import RunnableSequence
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_groq import ChatGroq
12
+ import os
13
+ import requests
14
+ from dotenv import load_dotenv
15
+ from langgraph.checkpoint.memory import MemorySaver
16
+ from langgraph.prebuilt import create_react_agent
17
+ from langchain_community.tools.tavily_search import TavilySearchResults
18
+
19
+ # Load environment variables (if any)
20
+ load_dotenv()
21
+
22
+ user_id = "1" # example user id
23
+
24
+ llm = ChatGroq(
25
+ model="llama-3.1-70b-versatile",
26
+ temperature=0,
27
+ max_tokens=None,
28
+ timeout=None,
29
+ max_retries=5,
30
+ groq_api_key=os.getenv("GROQ_API_KEY")
31
+ )
32
+
33
+ search = TavilySearchResults(
34
+ max_results=2,
35
+ )
36
+ tools = [search]
37
+ memory = MemorySaver()
38
+
39
+ agent_executor = create_react_agent(llm, tools, checkpointer=memory)
40
+
41
+ # Initialize the wav2vec2 model for Urdu speech-to-text
42
+ pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
43
+
44
+ def translate(target, text):
45
+ '''
46
+ Translates given text into target language
47
+
48
+ Parameters:
49
+ target (string): 2 character code to specify the target language.
50
+ text (string): Text to be translated.
51
+
52
+ Returns:
53
+ res (string): Translated text.
54
+ '''
55
+ url = "https://microsoft-translator-text.p.rapidapi.com/translate"
56
+
57
+ querystring = {"api-version":"3.0","profanityAction":"NoAction","textType":"plain", "to":target}
58
+
59
+ payload = [{ "Text": text }]
60
+ headers = {
61
+ "x-rapidapi-key": os.getenv("RAPIDAPI_LANG_TRANS"),
62
+ "x-rapidapi-host": "microsoft-translator-text.p.rapidapi.com",
63
+ "Content-Type": "application/json"
64
+ }
65
+
66
+ response = requests.post(url, json=payload, headers=headers, params=querystring)
67
+ res = response.json()
68
+ return res[0]["translations"][0]["text"]
69
+
70
+ '''
71
+ def get_response(user_input):
72
+ Takes user_input in English and invokes the infer API for response.
73
+
74
+ Parameters:
75
+ user_input (string): User Query in English.
76
+ Returns:
77
+ res (string): Response from the LLM.
78
+ url = f"https://whole-icons-hammer.loca.lt/infer/{user_id}"
79
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
80
+ data = {"user_input": user_input}
81
+ response = requests.post(url, headers=headers, data=data)
82
+ print(response)
83
+ res = response.json()
84
+ return res["data"]
85
+ '''
86
+
87
+ def infer(user_input: str):
88
+ '''
89
+ Returns the translated response from the LLM in response to a user query.
90
+
91
+ Parameters:
92
+ user_id (string): User ID of a user.
93
+ user_input (string): User query.
94
+
95
+ Returns:
96
+ JSON Response (Dictionary): Returns a translated response from the LLM.
97
+ '''
98
+
99
+ user_input = translate("en", user_input) # translate user query to english
100
+
101
+ prompt = ChatPromptTemplate.from_messages( # define a prompt
102
+ [
103
+ (
104
+ "system",
105
+ "You're a compassionate AI virtual Assistant"
106
+ ),
107
+ ("human", "{user_input}")
108
+ ]
109
+ )
110
+
111
+ runnable = prompt | agent_executor # define a chain
112
+
113
+ conversation = RunnableSequence( # wrap the chain along with chat history and user input
114
+ runnable,
115
+ )
116
+
117
+ response = conversation.invoke( # invoke the chain by giving the user input and the chat history
118
+ {"user_input": user_input},
119
+ config={"configurable": {"thread_id":user_id}}
120
+ )
121
+
122
+ res = translate("ur", response["messages"][-1].content) # translate the response to Urdu
123
+ return res
124
+
125
+
126
+ def text_to_speech(text, lang='ur'):
127
+ '''
128
+ Converts text to speech using gTTS.
129
+
130
+ Parameters:
131
+ text (string): Text to be converted to speech.
132
+ lang (string): Language for the speech synthesis. Default is 'ur' (Urdu).
133
+ Returns:
134
+ response_audio_io (BytesIO): BytesIO object containing the audio data.
135
+ '''
136
+ tts = gTTS(text, lang=lang)
137
+ response_audio_io = io.BytesIO()
138
+ tts.write_to_fp(response_audio_io)
139
+ response_audio_io.seek(0)
140
+ return response_audio_io
141
+
142
+
143
+ st.set_page_config(page_title="Urdu Virtual Assistant", page_icon="🤖") # set the page title and icon
144
+
145
+ col1, col2 = st.columns([1, 5]) # Adjust the ratio to control the logo and title sizes
146
+
147
+ # Display the logo in the first column
148
+ with col1:
149
+ st.image("bolo_logo-removebg-preview.png", width=100) # Adjust the width as needed
150
+
151
+ # Display the title in the second column
152
+ with col2:
153
+ st.title("Urdu Virtual Assistant") # set the main title of the application
154
+ st.write("This application is a comprehensive speech-to-speech model designed to understand and respond in Urdu. It not only handles natural conversations but also has the capability to access and provide real-time information by integrating with the Tavily search engine. Whether you're asking for the weather or engaging in everyday dialogue, this assistant delivers accurate and context-aware responses, all in Urdu.")
155
+
156
+ # Add a text input box
157
+ audio = audiorecorder()
158
+
159
+ if len(audio) > 0:
160
+ # Save the audio to a file
161
+ audio.export("audio.wav", format="wav")
162
+
163
+ # Convert audio to text using the wav2vec2 model
164
+ with open("audio.wav", "rb") as f:
165
+ audio_bytes = f.read()
166
+
167
+ # Process the audio file
168
+ result = pipe("audio.wav")
169
+ user_query = result["text"]
170
+
171
+ with st.chat_message("Human"): # create the message box for human input
172
+ st.audio(audio.export().read()) # display the audio player
173
+ st.markdown(user_query)
174
+
175
+ # Get response from the LLM
176
+ response_text = infer(user_input=user_query)
177
+ response_audio = text_to_speech(response_text, lang='ur')
178
+
179
+ # Play the generated speech in the app
180
+ with st.chat_message("AI"):
181
+ st.audio(response_audio.read(), format='audio/mp3')
182
  st.markdown(response_text)