|
import streamlit as st
|
|
from dotenv import load_dotenv
|
|
from audiorecorder import audiorecorder
|
|
from langchain_core.messages import HumanMessage, AIMessage
|
|
import requests
|
|
from transformers import pipeline
|
|
from gtts import gTTS
|
|
import io
|
|
from langchain_core.runnables.base import RunnableSequence
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
from langchain_groq import ChatGroq
|
|
import os
|
|
import requests
|
|
from dotenv import load_dotenv
|
|
from langgraph.checkpoint.memory import MemorySaver
|
|
from langgraph.prebuilt import create_react_agent
|
|
from langchain_community.tools.tavily_search import TavilySearchResults
|
|
|
|
st.set_page_config(page_title="Urdu Virtual Assistant", page_icon="🤖")
|
|
|
|
|
|
load_dotenv()
|
|
|
|
user_id = "1"
|
|
|
|
llm = ChatGroq(
|
|
model="llama-3.1-70b-versatile",
|
|
temperature=0,
|
|
max_tokens=None,
|
|
timeout=None,
|
|
max_retries=5,
|
|
groq_api_key=os.getenv("GROQ_API_KEY")
|
|
)
|
|
|
|
search = TavilySearchResults(
|
|
max_results=2,
|
|
)
|
|
tools = [search]
|
|
|
|
agent_executor = create_react_agent(llm, tools)
|
|
|
|
|
|
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
|
|
|
|
def translate(target, text):
|
|
'''
|
|
Translates given text into target language
|
|
|
|
Parameters:
|
|
target (string): 2 character code to specify the target language.
|
|
text (string): Text to be translated.
|
|
|
|
Returns:
|
|
res (string): Translated text.
|
|
'''
|
|
url = "https://microsoft-translator-text.p.rapidapi.com/translate"
|
|
|
|
querystring = {"api-version":"3.0","profanityAction":"NoAction","textType":"plain", "to":target}
|
|
|
|
payload = [{ "Text": text }]
|
|
headers = {
|
|
"x-rapidapi-key": os.getenv("RAPIDAPI_LANG_TRANS"),
|
|
"x-rapidapi-host": "microsoft-translator-text.p.rapidapi.com",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = requests.post(url, json=payload, headers=headers, params=querystring)
|
|
res = response.json()
|
|
return res[0]["translations"][0]["text"]
|
|
|
|
|
|
def infer(user_input: str):
|
|
'''
|
|
Returns the translated response from the LLM in response to a user query.
|
|
|
|
Parameters:
|
|
user_id (string): User ID of a user.
|
|
user_input (string): User query.
|
|
|
|
Returns:
|
|
res (string): Returns a translated response from the LLM.
|
|
'''
|
|
|
|
user_input = translate("en", user_input)
|
|
|
|
prompt = ChatPromptTemplate.from_messages(
|
|
[
|
|
(
|
|
"system",
|
|
"You are a compassionate and friendly AI virtual assistant. You will provide helpful answers to user queries using the provided tool to ensure the accuracy and relevance of your responses. Please validate the information before providing answers that are not verified."
|
|
),
|
|
("human", "{user_input}")
|
|
]
|
|
)
|
|
|
|
runnable = prompt | agent_executor
|
|
|
|
conversation = RunnableSequence(
|
|
runnable,
|
|
)
|
|
|
|
response = conversation.invoke(
|
|
{"user_input": user_input},
|
|
)
|
|
|
|
res = translate("ur", response["messages"][-1].content)
|
|
return res
|
|
|
|
|
|
def text_to_speech(text, lang='ur'):
|
|
'''
|
|
Converts text to speech using gTTS.
|
|
|
|
Parameters:
|
|
text (string): Text to be converted to speech.
|
|
lang (string): Language for the speech synthesis. Default is 'ur' (Urdu).
|
|
Returns:
|
|
response_audio_io (BytesIO): BytesIO object containing the audio data.
|
|
'''
|
|
tts = gTTS(text, lang=lang)
|
|
response_audio_io = io.BytesIO()
|
|
tts.write_to_fp(response_audio_io)
|
|
response_audio_io.seek(0)
|
|
return response_audio_io
|
|
|
|
|
|
col1, col2 = st.columns([1, 5])
|
|
|
|
|
|
with col1:
|
|
st.image("bolo_logo-removebg-preview.png", width=100)
|
|
|
|
|
|
with col2:
|
|
st.title("Urdu Virtual Assistant")
|
|
st.write("This application is a comprehensive speech-to-speech model designed to understand and respond in Urdu. It not only handles natural conversations but also has the capability to access and provide real-time information by integrating with the Tavily search engine. Whether you're asking for the weather or engaging in everyday dialogue, this assistant delivers accurate and context-aware responses, all in Urdu.")
|
|
|
|
|
|
audio = audiorecorder()
|
|
|
|
if len(audio) > 0:
|
|
|
|
audio.export("audio.wav", format="wav")
|
|
|
|
|
|
with open("audio.wav", "rb") as f:
|
|
audio_bytes = f.read()
|
|
|
|
|
|
result = pipe("audio.wav")
|
|
user_query = result["text"]
|
|
|
|
with st.chat_message("Human"):
|
|
st.audio(audio.export().read())
|
|
st.markdown(user_query)
|
|
|
|
|
|
response_text = infer(user_input=user_query)
|
|
response_audio = text_to_speech(response_text, lang='ur')
|
|
|
|
|
|
with st.chat_message("AI"):
|
|
st.audio(response_audio.read(), format='audio/mp3')
|
|
st.markdown(response_text) |