Spaces:
Sleeping
Sleeping
# importing dependencies | |
from dotenv import load_dotenv | |
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import faiss | |
from langchain.prompts import PromptTemplate | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.chat_models import ChatOpenAI | |
from htmlTemplates import css, bot_template, user_template | |
from langchain.embeddings import openai | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
import os | |
from openai import OpenAI | |
api_key = os.getenv("OPENAI_API_KEY") | |
client = OpenAI(api_key=api_key) | |
# creating custom template to guide llm model | |
custom_template ="""<s>[INST]You will start the conversation by greeting the user and introducing yourself as qanoon-bot,\ | |
stating your availability for legal assistance. Your next step will depend on the user's response.\ | |
If the user expresses a need for legal assistance in Pakistan, you will ask them to describe their case or problem.\ | |
After receiving the case or problem details from the user, you will provide the solutions and procedures according to the knowledge base and also give related penal codes and procedures. \ | |
However, if the user does not require legal assistance in Pakistan, you will immediately thank them and\ | |
say goodbye, ending the conversation. Remember to base your responses on the user's needs, providing accurate and\ | |
concise information regarding the Pakistan legal law and rights where applicable. Your interactions should be professional and\ | |
focused, ensuring the user's queries are addressed efficiently without deviating from the set flows.\ | |
CHAT HISTORY: {chat_history} | |
QUESTION: {question} | |
ANSWER: | |
</s>[INST] | |
""" | |
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template) | |
# extracting text from pdf | |
def get_pdf_text(docs): | |
text="" | |
for pdf in docs: | |
pdf_reader=PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text+=page.extract_text() | |
return text | |
# converting text to chunks | |
def get_chunks(raw_text): | |
text_splitter=CharacterTextSplitter(separator="\n", | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len) | |
chunks=text_splitter.split_text(raw_text) | |
return chunks | |
# using all-MiniLm embeddings model and faiss to get vectorstore | |
def get_vectorstore(chunks): | |
embeddings=OpenAIEmbeddings() | |
vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings) | |
return vectorstore | |
# generating conversation chain | |
def get_conversationchain(vectorstore): | |
llm=ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo-0125') | |
memory = ConversationBufferMemory(memory_key='chat_history', | |
return_messages=True, | |
output_key='answer') # using conversation buffer memory to hold past information | |
conversation_chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=vectorstore.as_retriever(), | |
condense_question_prompt=CUSTOM_QUESTION_PROMPT, | |
memory=memory) | |
return conversation_chain | |
# generating response from user queries and displaying them accordingly | |
def handle_question(question): | |
response=st.session_state.conversation({'question': question}) | |
st.session_state.chat_history=response["chat_history"] | |
for i,msg in enumerate(st.session_state.chat_history): | |
if i%2==0: | |
st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True) | |
else: | |
st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True) | |
def main(): | |
load_dotenv() | |
st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:") | |
st.write(css,unsafe_allow_html=True) | |
if "conversation" not in st.session_state: | |
st.session_state.conversation=None | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history=None | |
st.header("Chat with multiple PDFs :books:") | |
question=st.text_input("Ask question from your document:") | |
if question: | |
handle_question(question) | |
with st.sidebar: | |
st.subheader("Your documents") | |
docs=st.file_uploader("Upload your PDF here and click on 'Process'",accept_multiple_files=True) | |
if st.button("Process"): | |
with st.spinner("Processing"): | |
#get the pdf | |
raw_text=get_pdf_text(docs) | |
#get the text chunks | |
text_chunks=get_chunks(raw_text) | |
#create vectorstore | |
vectorstore=get_vectorstore(text_chunks) | |
#create conversation chain | |
st.session_state.conversation=get_conversationchain(vectorstore) | |
if __name__ == '__main__': | |
main() |