Anirudh1993 commited on
Commit
dca455a
·
verified ·
1 Parent(s): bd02b78

Update document_chat.py

Browse files
Files changed (1) hide show
  1. document_chat.py +26 -20
document_chat.py CHANGED
@@ -7,42 +7,48 @@ from langchain.chains import ConversationalRetrievalChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.llms import HuggingFaceHub
9
 
10
- #Constants
11
  CHROMA_DB_PATH = "chroma_db"
12
- SENTENCE_TRANSFORMER_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
13
- LLM_Model = "HuggingFaceH4/zephyr-7b-beta"
14
 
15
- #Initialize vector store
16
  def initialize_vector_store():
17
- embeddings = HuggingFaceEmbeddings(model_name = SENTENCE_TRANSFORMER_MODEL)
18
- vector_store = Chroma(persist_directory = CHROMA_DB_PATH, embedding_fnction = embeddings)
 
19
  return vector_store
 
20
  vector_store = initialize_vector_store()
 
21
  def ingest_pdf(pdf_path):
 
22
  loader = PyMuPDFLoader(pdf_path)
23
  documents = loader.load()
24
 
25
- #split text into smaller chunks
26
- text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
27
- splitdocs = text_splitter.split_documents(documents)
28
 
29
- #store in vector db
30
- vector_store.add_documents(splitdocs)
31
  vector_store.persist()
32
 
33
  def process_query_with_memory(query, chat_history=[]):
 
34
  retriever = vector_store.as_retriever()
35
 
36
- #Initialize chat memory
37
- memory = ConversationalBufferMemory(memory_key = "chat_history", return_messages = True)
38
 
39
- #Load a free hugging face model
40
- llm = HuggingFaceHub(repo_id = LLM_Model, model_kwargs = {"max_new_tokens": 500})
41
 
42
- #Create a conversational retrieval chain
43
  qa_chain = ConversationalRetrievalChain(
44
- llm = llm,
45
- retriever = retriever,
46
- memory = memory)
47
- return qa_chain.run({"question":query, "chat_history": chat_history})
48
 
 
 
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain.llms import HuggingFaceHub
9
 
10
+ # Constants
11
  CHROMA_DB_PATH = "chroma_db"
12
+ SENTENCE_TRANSFORMER_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Corrected model name
13
+ LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" # Free chatbot model from Hugging Face
14
 
15
+ # Initialize vector store
16
  def initialize_vector_store():
17
+ """Initialize or load ChromaDB vector store"""
18
+ embeddings = HuggingFaceEmbeddings(model_name=SENTENCE_TRANSFORMER_MODEL)
19
+ vector_store = Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embeddings)
20
  return vector_store
21
+
22
  vector_store = initialize_vector_store()
23
+
24
  def ingest_pdf(pdf_path):
25
+ """Processes a PDF, splits text, and stores embeddings in ChromaDB."""
26
  loader = PyMuPDFLoader(pdf_path)
27
  documents = loader.load()
28
 
29
+ # Split text into smaller chunks
30
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
31
+ split_docs = text_splitter.split_documents(documents)
32
 
33
+ # Store in vector database
34
+ vector_store.add_documents(split_docs)
35
  vector_store.persist()
36
 
37
  def process_query_with_memory(query, chat_history=[]):
38
+ """Retrieves relevant document chunks and generates a conversational response."""
39
  retriever = vector_store.as_retriever()
40
 
41
+ # Initialize chat memory
42
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
43
 
44
+ # Load a free Hugging Face model
45
+ llm = HuggingFaceHub(repo_id=LLM_MODEL, model_kwargs={"max_new_tokens": 500})
46
 
47
+ # Create a conversational retrieval chain
48
  qa_chain = ConversationalRetrievalChain(
49
+ llm=llm,
50
+ retriever=retriever,
51
+ memory=memory
52
+ )
53
 
54
+ return qa_chain.run({"question": query, "chat_history": chat_history})