# importing dependencies from dotenv import load_dotenv import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import faiss from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI from htmlTemplates import css, bot_template, user_template from langchain.embeddings import openai from langchain.embeddings.openai import OpenAIEmbeddings import os from openai import OpenAI api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) # creating custom template to guide llm model custom_template ="""[INST]You will start the conversation by greeting the user and introducing yourself as qanoon-bot,\ stating your availability for legal assistance. Your next step will depend on the user's response.\ If the user expresses a need for legal assistance in Pakistan, you will ask them to describe their case or problem.\ After receiving the case or problem details from the user, you will provide the solutions and procedures according to the knowledge base and also give related penal codes and procedures. \ However, if the user does not require legal assistance in Pakistan, you will immediately thank them and\ say goodbye, ending the conversation. Remember to base your responses on the user's needs, providing accurate and\ concise information regarding the Pakistan legal law and rights where applicable. Your interactions should be professional and\ focused, ensuring the user's queries are addressed efficiently without deviating from the set flows.\ CHAT HISTORY: {chat_history} QUESTION: {question} ANSWER: [INST] """ CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template) # extracting text from pdf def get_pdf_text(docs): text="" for pdf in docs: pdf_reader=PdfReader(pdf) for page in pdf_reader.pages: text+=page.extract_text() return text # converting text to chunks def get_chunks(raw_text): text_splitter=CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len) chunks=text_splitter.split_text(raw_text) return chunks # using all-MiniLm embeddings model and faiss to get vectorstore def get_vectorstore(chunks): embeddings=OpenAIEmbeddings() vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings) return vectorstore # generating conversation chain def get_conversationchain(vectorstore): llm=ChatOpenAI(temperature=0.2,model_name='gpt-3.5-turbo-0125') memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer') # using conversation buffer memory to hold past information conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vectorstore.as_retriever(), condense_question_prompt=CUSTOM_QUESTION_PROMPT, memory=memory) return conversation_chain # generating response from user queries and displaying them accordingly def handle_question(question): response=st.session_state.conversation({'question': question}) st.session_state.chat_history=response["chat_history"] for i,msg in enumerate(st.session_state.chat_history): if i%2==0: st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True) else: st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True) def main(): load_dotenv() st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:") st.write(css,unsafe_allow_html=True) if "conversation" not in st.session_state: st.session_state.conversation=None if "chat_history" not in st.session_state: st.session_state.chat_history=None st.header("Chat with multiple PDFs :books:") question=st.text_input("Ask question from your document:") if question: handle_question(question) with st.sidebar: st.subheader("Your documents") docs=st.file_uploader("Upload your PDF here and click on 'Process'",accept_multiple_files=True) if st.button("Process"): with st.spinner("Processing"): #get the pdf raw_text=get_pdf_text(docs) #get the text chunks text_chunks=get_chunks(raw_text) #create vectorstore vectorstore=get_vectorstore(text_chunks) #create conversation chain st.session_state.conversation=get_conversationchain(vectorstore) if __name__ == '__main__': main()