Anirudh1993 commited on
Commit
a6d6e36
·
verified ·
1 Parent(s): 1539dfd

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +34 -0
  2. document_chat.py +48 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from document_chat import ingest_pdf, process_query_with_memory
3
+
4
+ #configure streamlit app
5
+ st.set_page_config(page_title="AI Document Q&A Chatbot", layout="wide")
6
+ st.title("📄 AI-Powered Document Chatbot")
7
+ st.write("Upload a document and ask questions!")
8
+
9
+ #upload document
10
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
11
+ if uploaded_file:
12
+ file_path = "uploaded_doc.pdf"
13
+ with open(file_path, "wb") as f:
14
+ f.write(uploaded_file.getbuffer())
15
+
16
+ st.success("File uploaded! Processing...")
17
+ ingest_pdf(file_path)
18
+
19
+ if "chat_history" not in st.session_state:
20
+ st.session_state["chat_history"] = []
21
+
22
+ query = st.text_input("Ask a question:")
23
+ if query:
24
+ with st.spinner("Thinking..."):
25
+ response = process_query_with_memory(query, st.session_state["chat_history"])
26
+ st.session_state["chat_history"].append((query, response))
27
+ st.write(response)
28
+
29
+ # Show chat history
30
+ if st.session_state["chat_history"]:
31
+ st.subheader("Chat History")
32
+ for q, a in st.session_state["chat_history"]:
33
+ st.write(f"**User:** {q}")
34
+ st.write(f"**Bot:** {a}")
document_chat.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.vectorstores import Chroma
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain.document_loaders import PyMUPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.memory import ConversationalBufferMemory
8
+ from langchain.llms import HuggingFaceHub
9
+
10
+ #Constants
11
+ CHROMA_DB_PATH = "chroma_db"
12
+ SENTENCE_TRANSFORMER_MODEL = "sentence-ransformers/all-MiniLM-L6=v2"
13
+ LLM_Model = "HuggingFaceH4/zephyr-7b-beta"
14
+
15
+ #Initialize vector store
16
+ def initialize_vector_store():
17
+ embeddings = HuggingFaceEmbeddings(model_name = SENTENCE_TRANSFORMER_MODEL)
18
+ vector_store = Chroma(persist_directory = CHROMA_DB_PATH, embedding_fnction = embeddings)
19
+ return vector_store
20
+ vector_store = initialize_vector_store()
21
+ def ingest_pdf(pdf_path):
22
+ loader = PyMUPDFLoader(pdf_path)
23
+ documents = loader.load()
24
+
25
+ #split text into smaller chunks
26
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
27
+ splitdocs = text_splitter.split_documents(documents)
28
+
29
+ #store in vector db
30
+ vector_store.add_documents(splitdocs)
31
+ vector_store.persist()
32
+
33
+ def process_query_with_memory(query, chat_history=[]):
34
+ retriever = vector_store.as_retriever()
35
+
36
+ #Initialize chat memory
37
+ memory = ConversationalBufferMemory(memory_key = "chat_history", return_messages = True)
38
+
39
+ #Load a free hugging face model
40
+ llm = HuggingFaceHub(repo_id = LLM_Model, model_kwargs = {"max_new_tokens": 500})
41
+
42
+ #Create a conversational retrieval chain
43
+ qa_chain = ConversationalRetrievalChain(
44
+ llm = llm,
45
+ retriever = retriever,
46
+ memory = memory)
47
+ return qa_chain.run({"question":query, "chat_history": chat_history})
48
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ chromadb
3
+ pdfminer.six
4
+ sentence-transformers
5
+ transformers
6
+ torch
7
+ streamlit
8
+ huggingface_hub