Manasa1 commited on
Commit
dcc13e8
·
verified ·
1 Parent(s): a9716a9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +100 -0
  2. requirements.txt +12 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ from langchain_community.document_loaders import UnstructuredPDFLoader
4
+ from langchain_text_splitters.character import CharacterTextSplitter
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_groq import ChatGroq
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ import os
11
+ import nltk
12
+ nltk.download('punkt')
13
+
14
+ # Load secret API key
15
+ secret = os.getenv('Groq_api')
16
+ working_dir = os.path.dirname(os.path.abspath(__file__))
17
+
18
+ def load_documents(file_path):
19
+ loader = UnstructuredPDFLoader(file_path)
20
+ documents = loader.load()
21
+ return documents
22
+
23
+ def setup_vectorstore(documents):
24
+ embeddings = HuggingFaceEmbeddings()
25
+ text_splitter = CharacterTextSplitter(
26
+ separator="\n",
27
+ chunk_size=500,
28
+ chunk_overlap=100
29
+ )
30
+ doc_chunks = text_splitter.split_documents(documents)
31
+ vectorstores = FAISS.from_documents(doc_chunks, embeddings)
32
+ return vectorstores
33
+
34
+ def create_chain(vectorstores):
35
+ llm = ChatGroq(
36
+ api_key=secret,
37
+ model="llama-3.1-70b-versatile",
38
+ temperature=0.7 # Slightly higher for more natural, expressive language
39
+ )
40
+ retriever = vectorstores.as_retriever()
41
+ memory = ConversationBufferMemory(
42
+ llm=llm,
43
+ output_key="answer",
44
+ memory_key="chat_history",
45
+ return_messages=True
46
+ )
47
+ chain = ConversationalRetrievalChain.from_llm(
48
+ llm=llm,
49
+ retriever=retriever,
50
+ memory=memory,
51
+ verbose=True
52
+ )
53
+ return chain
54
+
55
+ # Streamlit setup
56
+ st.set_page_config(
57
+ page_title="Chat with your documents",
58
+ page_icon="📑",
59
+ layout="centered"
60
+ )
61
+
62
+ st.title("📝Chat With Your Docs 😎")
63
+
64
+ if "chat_history" not in st.session_state:
65
+ st.session_state.chat_history = []
66
+
67
+ uploaded_file = st.file_uploader(label="Upload your PDF")
68
+
69
+ if uploaded_file:
70
+ file_path = f"{working_dir}/{uploaded_file.name}"
71
+ with open(file_path, "wb") as f:
72
+ f.write(uploaded_file.getbuffer())
73
+
74
+ # Load and process the PDF document
75
+ if "vectorstores" not in st.session_state:
76
+ st.session_state.vectorstores = setup_vectorstore(load_documents(file_path))
77
+
78
+ # Create the conversational chain with style adaptation
79
+ if "conversation_chain" not in st.session_state:
80
+ st.session_state.conversation_chain = create_chain(st.session_state.vectorstores)
81
+
82
+ for message in st.session_state.chat_history:
83
+ with st.chat_message(message["role"]):
84
+ st.markdown(message["content"])
85
+
86
+ user_input = st.chat_input("Ask any questions relevant to uploaded PDF")
87
+
88
+ if user_input:
89
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
90
+ with st.chat_message("user"):
91
+ st.markdown(user_input)
92
+
93
+ # Add prompt for style adaptation
94
+ style_prompt = f"Answer in a similar style as the person in the document: {user_input}"
95
+ response = st.session_state.conversation_chain({"question": style_prompt})
96
+
97
+ assistant_response = response["answer"]
98
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
99
+ with st.chat_message("assistant"):
100
+ st.markdown(assistant_response)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv==1.0.1
2
+ streamlit==1.37.0
3
+ langchain-community==0.2.10
4
+ langchain-text-splitters==0.2.2
5
+ langchain-groq==0.1.9
6
+ unstructured
7
+ unstructured[pdf]
8
+ faiss-cpu
9
+ transformers==4.43.3
10
+ sentence-transformers==3.0.1
11
+ nltk==3.9.1
12
+ pi_heif