annas4421 commited on
Commit
5df6164
·
verified ·
1 Parent(s): 65e5b05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py CHANGED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing dependencies
2
+ from dotenv import load_dotenv
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import faiss
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from htmlTemplates import css, bot_template, user_template
13
+ from langchain.embeddings import openai
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+ import os
16
+
17
+
18
+ from openai import OpenAI
19
+ api_key = os.getenv("OPENAI_API_KEY")
20
+ client = OpenAI(api_key=api_key)
21
+
22
+
23
+ # creating custom template to guide llm model
24
+ custom_template ="""<s>[INST]You will start the conversation by greeting the user and introducing yourself as qanoon-bot,\
25
+ stating your availability for legal assistance. Your next step will depend on the user's response.\
26
+ If the user expresses a need for legal assistance in Pakistan, you will ask them to describe their case or problem.\
27
+ After receiving the case or problem details from the user, you will provide the solutions and procedures according to the knowledge base and also give related penal codes and procedures. \
28
+ However, if the user does not require legal assistance in Pakistan, you will immediately thank them and\
29
+ say goodbye, ending the conversation. Remember to base your responses on the user's needs, providing accurate and\
30
+ concise information regarding the Pakistan legal law and rights where applicable. Your interactions should be professional and\
31
+ focused, ensuring the user's queries are addressed efficiently without deviating from the set flows.\
32
+ CHAT HISTORY: {chat_history}
33
+ QUESTION: {question}
34
+ ANSWER:
35
+ </s>[INST]
36
+ """
37
+
38
+ CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
39
+
40
+ # extracting text from pdf
41
+ def get_pdf_text(docs):
42
+ text=""
43
+ for pdf in docs:
44
+ pdf_reader=PdfReader(pdf)
45
+ for page in pdf_reader.pages:
46
+ text+=page.extract_text()
47
+ return text
48
+
49
+ # converting text to chunks
50
+ def get_chunks(raw_text):
51
+ text_splitter=CharacterTextSplitter(separator="\n",
52
+ chunk_size=1000,
53
+ chunk_overlap=200,
54
+ length_function=len)
55
+ chunks=text_splitter.split_text(raw_text)
56
+ return chunks
57
+
58
+ # using all-MiniLm embeddings model and faiss to get vectorstore
59
+ def get_vectorstore(chunks):
60
+ embeddings=OpenAIEmbeddings()
61
+ vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
62
+ return vectorstore
63
+
64
+ # generating conversation chain
65
+ def get_conversationchain(vectorstore):
66
+ llm=ChatOpenAI(temperature=0.4,model_name='gpt-4o-mini')
67
+ memory = ConversationBufferMemory(memory_key='chat_history',
68
+ return_messages=True,
69
+ output_key='answer') # using conversation buffer memory to hold past information
70
+ conversation_chain = ConversationalRetrievalChain.from_llm(
71
+ llm=llm,
72
+ retriever=vectorstore.as_retriever(),
73
+ condense_question_prompt=CUSTOM_QUESTION_PROMPT,
74
+ memory=memory)
75
+ return conversation_chain
76
+
77
+ # generating response from user queries and displaying them accordingly
78
+ def handle_question(question):
79
+ response=st.session_state.conversation({'question': question})
80
+ st.session_state.chat_history=response["chat_history"]
81
+ for i,msg in enumerate(st.session_state.chat_history):
82
+ if i%2==0:
83
+ st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
84
+ else:
85
+ st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)
86
+
87
+
88
+ def main():
89
+ load_dotenv()
90
+ st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:")
91
+ st.write(css,unsafe_allow_html=True)
92
+ if "conversation" not in st.session_state:
93
+ st.session_state.conversation=None
94
+
95
+ if "chat_history" not in st.session_state:
96
+ st.session_state.chat_history=None
97
+
98
+ st.header("Chat with multiple PDFs :books:")
99
+ question=st.text_input("Ask question from your document:")
100
+ if question:
101
+ handle_question(question)
102
+ with st.sidebar:
103
+ st.subheader("Your documents")
104
+ docs=st.file_uploader("Upload your PDF here and click on 'Process'",accept_multiple_files=True)
105
+ if st.button("Process"):
106
+ with st.spinner("Processing"):
107
+
108
+ #get the pdf
109
+ raw_text=get_pdf_text(docs)
110
+
111
+ #get the text chunks
112
+ text_chunks=get_chunks(raw_text)
113
+
114
+ #create vectorstore
115
+ vectorstore=get_vectorstore(text_chunks)
116
+
117
+ #create conversation chain
118
+ st.session_state.conversation=get_conversationchain(vectorstore)
119
+
120
+
121
+ if __name__ == '__main__':
122
+ main()