Spaces:

annas4421
/

Test-CHATBOT

Sleeping

App Files Files Community

Test-CHATBOT / app.py

annas4421

Update app.py

745a51d verified about 1 year ago

raw

history blame

5.14 kB

	# importing dependencies
	from dotenv import load_dotenv
	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import faiss
	from langchain.prompts import PromptTemplate
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain
	from langchain.chat_models import ChatOpenAI
	from htmlTemplates import css, bot_template, user_template
	from langchain.embeddings import openai
	from langchain.embeddings.openai import OpenAIEmbeddings
	import os


	from openai import OpenAI
	api_key = os.getenv("OPENAI_API_KEY")
	client = OpenAI(api_key=api_key)


	# creating custom template to guide llm model
	custom_template ="""<s>[INST]You will start the conversation by greeting the user and introducing yourself as qanoon-bot,\
	stating your availability for legal assistance. Your next step will depend on the user's response.\
	If the user expresses a need for legal assistance in Pakistan, you will ask them to describe their case or problem.\
	After receiving the case or problem details from the user, you will provide the solutions and procedures according to the knowledge base and also give related penal codes and procedures. \
	However, if the user does not require legal assistance in Pakistan, you will immediately thank them and\
	say goodbye, ending the conversation. Remember to base your responses on the user's needs, providing accurate and\
	concise information regarding the Pakistan legal law and rights where applicable. Your interactions should be professional and\
	focused, ensuring the user's queries are addressed efficiently without deviating from the set flows.\
	CHAT HISTORY: {chat_history}
	QUESTION: {question}
	ANSWER:
	</s>[INST]
	"""

	CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

	# extracting text from pdf
	def get_pdf_text(docs):
	text=""
	for pdf in docs:
	pdf_reader=PdfReader(pdf)
	for page in pdf_reader.pages:
	text+=page.extract_text()
	return text

	# converting text to chunks
	def get_chunks(raw_text):
	text_splitter=CharacterTextSplitter(separator="\n",
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len)
	chunks=text_splitter.split_text(raw_text)
	return chunks

	# using all-MiniLm embeddings model and faiss to get vectorstore
	def get_vectorstore(chunks):
	embeddings=OpenAIEmbeddings()
	vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
	return vectorstore

	# generating conversation chain
	def get_conversationchain(vectorstore):
	llm=ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo-0125')
	memory = ConversationBufferMemory(memory_key='chat_history',
	return_messages=True,
	output_key='answer') # using conversation buffer memory to hold past information
	conversation_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vectorstore.as_retriever(),
	condense_question_prompt=CUSTOM_QUESTION_PROMPT,
	memory=memory)
	return conversation_chain

	# generating response from user queries and displaying them accordingly
	def handle_question(question):
	response=st.session_state.conversation({'question': question})
	st.session_state.chat_history=response["chat_history"]
	for i,msg in enumerate(st.session_state.chat_history):
	if i%2==0:
	st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
	else:
	st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)


	def main():
	load_dotenv()
	st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:")
	st.write(css,unsafe_allow_html=True)
	if "conversation" not in st.session_state:
	st.session_state.conversation=None

	if "chat_history" not in st.session_state:
	st.session_state.chat_history=None

	st.header("Chat with multiple PDFs :books:")
	question=st.text_input("Ask question from your document:")
	if question:
	handle_question(question)
	with st.sidebar:
	st.subheader("Your documents")
	docs=st.file_uploader("Upload your PDF here and click on 'Process'",accept_multiple_files=True)
	if st.button("Process"):
	with st.spinner("Processing"):

	#get the pdf
	raw_text=get_pdf_text(docs)

	#get the text chunks
	text_chunks=get_chunks(raw_text)

	#create vectorstore
	vectorstore=get_vectorstore(text_chunks)

	#create conversation chain
	st.session_state.conversation=get_conversationchain(vectorstore)


	if __name__ == '__main__':
	main()