Manasa1 commited on
Commit
f747aa3
·
verified ·
1 Parent(s): 1ad6234

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +2 -0
  2. Dockerfile +39 -0
  3. app.py +99 -0
  4. requirements.txt +13 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ GROQ_API_KEY = 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
2
+ NLTK_DATA=C:\Users\sanath\AppData\Roaming\nltk_data\tokenizers\punkt_tab
Dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image from the Docker Hub
2
+ FROM python:3.10
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && \
6
+ apt-get install -y poppler-utils && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set up the working directory
10
+ WORKDIR /code
11
+
12
+ # Copy the current directory contents into the container
13
+ COPY requirements.txt /code/requirements.txt
14
+
15
+ # Install the requirements from the requirements.txt
16
+ RUN pip install --no-cache-dir -r /code/requirements.txt
17
+
18
+ # Create a new user and switch to it
19
+ RUN useradd -m user
20
+ USER user
21
+
22
+ # Set the environment variables for the user's home and path
23
+ ENV HOME=/home/user \
24
+ PATH=$HOME/.local/bin:$PATH
25
+
26
+ # Set the working directory to the user's home directory
27
+ WORKDIR $HOME/app
28
+
29
+ # Copy the current directory contents into the container's $HOME/app folder, setting owner to user
30
+ COPY --chown=user . $HOME/app
31
+
32
+ # Copy the .env file into the container
33
+ COPY --chown=user .env $HOME/app/.env
34
+
35
+ # Make port 7860 available to the world outside this container
36
+ EXPOSE 7860
37
+
38
+ # Start the Streamlit app
39
+ CMD ["streamlit", "run", "app.py"]
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ from langchain_community.document_loaders import UnstructuredPDFLoader
4
+ from langchain_text_splitters.character import CharacterTextSplitter
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_groq import ChatGroq
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ import os
11
+ import nltk
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ import nltk
17
+ nltk.download('punkt_tab')
18
+
19
+
20
+ working_dir = os.getcwd()
21
+
22
+ def load_documents(file_path):
23
+ loader = UnstructuredPDFLoader(file_path)
24
+ documents = loader.load()
25
+ return documents
26
+
27
+ def setup_vectorstore(documents):
28
+ embeddings = HuggingFaceEmbeddings()
29
+ text_splitter = CharacterTextSplitter(
30
+ separator="/n",
31
+ chunk_size=1000,
32
+ chunk_overlap=200
33
+ )
34
+ doc_chunks = text_splitter.split_documents(documents)
35
+ vectorstores = FAISS.from_documents(doc_chunks, embeddings)
36
+ return vectorstores
37
+
38
+ def create_chain(vectorstores):
39
+ llm = ChatGroq(
40
+ model="llama-3.1-70b-versatile",
41
+ temperature=0
42
+ )
43
+ retriever = vectorstores.as_retriever()
44
+ memory = ConversationBufferMemory(
45
+ llm=llm,
46
+ output_key="answer",
47
+ memory_key="chat_history",
48
+ return_messages=True
49
+ )
50
+ chain = ConversationalRetrievalChain.from_llm(
51
+ llm=llm,
52
+ retriever=retriever,
53
+ memory=memory,
54
+ verbose=True
55
+ )
56
+ return chain
57
+
58
+ st.set_page_config(
59
+ page_title="Chat with your documents",
60
+ page_icon="📑",
61
+ layout="centered"
62
+ )
63
+
64
+ st.title("📝Chat With your docs 😎")
65
+
66
+ if "chat_history" not in st.session_state:
67
+ st.session_state.chat_history = []
68
+
69
+ uploaded_file = st.file_uploader(label="Upload your PDF")
70
+
71
+ if uploaded_file:
72
+ file_path = f"{working_dir}{uploaded_file.name}"
73
+ with open(file_path, "wb") as f:
74
+ f.write(uploaded_file.getbuffer())
75
+
76
+ if "vectorstores" not in st.session_state:
77
+ st.session_state.vectorstores = setup_vectorstore(load_documents(file_path))
78
+
79
+ if "conversation_chain" not in st.session_state:
80
+ st.session_state.conversation_chain = create_chain(st.session_state.vectorstores)
81
+
82
+ for message in st.session_state.chat_history:
83
+ with st.chat_message(message["role"]):
84
+ st.markdown(message["content"])
85
+
86
+ user_input = st.chat_input("Ask any questions relevant to uploaded pdf")
87
+
88
+ if user_input:
89
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
90
+ with st.chat_message("user"):
91
+ st.markdown(user_input)
92
+
93
+ with st.chat_message("assistant"):
94
+ response = st.session_state.conversation_chain({"question": user_input})
95
+ assistant_response = response["answer"]
96
+ st.markdown(assistant_response)
97
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
98
+
99
+
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv==1.0.1
2
+ streamlit==1.37.0
3
+ langchain-community==0.2.10
4
+ langchain-text-splitters==0.2.2
5
+ langchain-groq==0.1.9
6
+ unstructured
7
+ unstructured[pdf]
8
+ faiss-cpu
9
+ transformers==4.43.3
10
+ sentence-transformers==3.0.1
11
+ nltk==3.9.1
12
+ pi_heif
13
+