annas4421 commited on
Commit
65e5b05
·
verified ·
1 Parent(s): 0d5cca5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -122
app.py CHANGED
@@ -1,122 +0,0 @@
1
- # importing dependencies
2
- from dotenv import load_dotenv
3
- import streamlit as st
4
- from PyPDF2 import PdfReader
5
- from langchain.text_splitter import CharacterTextSplitter
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.vectorstores import faiss
8
- from langchain.prompts import PromptTemplate
9
- from langchain.memory import ConversationBufferMemory
10
- from langchain.chains import ConversationalRetrievalChain
11
- from langchain.chat_models import ChatOpenAI
12
- from htmlTemplates import css, bot_template, user_template
13
- from langchain.embeddings import openai
14
- from langchain.embeddings.openai import OpenAIEmbeddings
15
- import os
16
-
17
-
18
- from openai import OpenAI
19
- api_key = os.getenv("OPENAI_API_KEY")
20
- client = OpenAI(api_key=api_key)
21
-
22
-
23
- # creating custom template to guide llm model
24
- custom_template ="""<s>[INST]You will start the conversation by greeting the user and introducing yourself as qanoon-bot,\
25
- stating your availability for legal assistance. Your next step will depend on the user's response.\
26
- If the user expresses a need for legal assistance in Pakistan, you will ask them to describe their case or problem.\
27
- After receiving the case or problem details from the user, you will provide the solutions and procedures according to the knowledge base and also give related penal codes and procedures. \
28
- However, if the user does not require legal assistance in Pakistan, you will immediately thank them and\
29
- say goodbye, ending the conversation. Remember to base your responses on the user's needs, providing accurate and\
30
- concise information regarding the Pakistan legal law and rights where applicable. Your interactions should be professional and\
31
- focused, ensuring the user's queries are addressed efficiently without deviating from the set flows.\
32
- CHAT HISTORY: {chat_history}
33
- QUESTION: {question}
34
- ANSWER:
35
- </s>[INST]
36
- """
37
-
38
- CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
39
-
40
- # extracting text from pdf
41
- def get_pdf_text(docs):
42
- text=""
43
- for pdf in docs:
44
- pdf_reader=PdfReader(pdf)
45
- for page in pdf_reader.pages:
46
- text+=page.extract_text()
47
- return text
48
-
49
- # converting text to chunks
50
- def get_chunks(raw_text):
51
- text_splitter=CharacterTextSplitter(separator="\n",
52
- chunk_size=1000,
53
- chunk_overlap=200,
54
- length_function=len)
55
- chunks=text_splitter.split_text(raw_text)
56
- return chunks
57
-
58
- # using all-MiniLm embeddings model and faiss to get vectorstore
59
- def get_vectorstore(chunks):
60
- embeddings=OpenAIEmbeddings()
61
- vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
62
- return vectorstore
63
-
64
- # generating conversation chain
65
- def get_conversationchain(vectorstore):
66
- llm=ChatOpenAI(temperature=0.4,model_name='gpt-4o-mini')
67
- memory = ConversationBufferMemory(memory_key='chat_history',
68
- return_messages=True,
69
- output_key='answer') # using conversation buffer memory to hold past information
70
- conversation_chain = ConversationalRetrievalChain.from_llm(
71
- llm=llm,
72
- retriever=vectorstore.as_retriever(),
73
- condense_question_prompt=CUSTOM_QUESTION_PROMPT,
74
- memory=memory)
75
- return conversation_chain
76
-
77
- # generating response from user queries and displaying them accordingly
78
- def handle_question(question):
79
- response=st.session_state.conversation({'question': question})
80
- st.session_state.chat_history=response["chat_history"]
81
- for i,msg in enumerate(st.session_state.chat_history):
82
- if i%2==0:
83
- st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
84
- else:
85
- st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)
86
-
87
-
88
- def main():
89
- load_dotenv()
90
- st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:")
91
- st.write(css,unsafe_allow_html=True)
92
- if "conversation" not in st.session_state:
93
- st.session_state.conversation=None
94
-
95
- if "chat_history" not in st.session_state:
96
- st.session_state.chat_history=None
97
-
98
- st.header("Chat with multiple PDFs :books:")
99
- question=st.text_input("Ask question from your document:")
100
- if question:
101
- handle_question(question)
102
- with st.sidebar:
103
- st.subheader("Your documents")
104
- docs=st.file_uploader("Upload your PDF here and click on 'Process'",accept_multiple_files=True)
105
- if st.button("Process"):
106
- with st.spinner("Processing"):
107
-
108
- #get the pdf
109
- raw_text=get_pdf_text(docs)
110
-
111
- #get the text chunks
112
- text_chunks=get_chunks(raw_text)
113
-
114
- #create vectorstore
115
- vectorstore=get_vectorstore(text_chunks)
116
-
117
- #create conversation chain
118
- st.session_state.conversation=get_conversationchain(vectorstore)
119
-
120
-
121
- if __name__ == '__main__':
122
- main()