Spaces:

Anirudh1993
/

Pdf_mugger

Sleeping

App Files Files Community

Anirudh1993 commited on Mar 15

Commit

dd2b131

verified ·

1 Parent(s): 3e1a55c

Update document_chat.py

Browse files

Files changed (1) hide show

document_chat.py +13 -35

document_chat.py CHANGED Viewed

@@ -6,62 +6,40 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain.llms import HuggingFaceHub
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from langchain.chains.combine_documents import StuffDocumentsChain  # Corrected import
 # Constants
 CHROMA_DB_PATH = "chroma_db"
-SENTENCE_TRANSFORMER_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Sentence Transformers model
-LLM_Model = "HuggingFaceH4/zephyr-7b-beta"  # Hugging Face model for conversation
-# Initialize vector store with Hugging Face embeddings
 def initialize_vector_store():
     embeddings = HuggingFaceEmbeddings(model_name=SENTENCE_TRANSFORMER_MODEL)
-    vector_store = Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embeddings)  # Fixed the typo here
-    return vector_store
 vector_store = initialize_vector_store()
-# Function to ingest and store the PDF content into the vector store
 def ingest_pdf(pdf_path):
     loader = PyMuPDFLoader(pdf_path)
     documents = loader.load()
-    # Split text into smaller chunks for processing
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     split_docs = text_splitter.split_documents(documents)
-    # Store the split documents in the vector store
     vector_store.add_documents(split_docs)
     vector_store.persist()
-# Function to process queries with memory and a retrieval chain
-def process_query_with_memory(query, chat_history=[]):
     retriever = vector_store.as_retriever()
-    # Initialize conversation memory to keep track of the chat history
-    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-    # Load the LLM model from Hugging Face
-    llm = HuggingFaceHub(repo_id=LLM_Model, model_kwargs={"max_new_tokens": 500})
-    # Create a PromptTemplate for the question generator
-    question_generator_template = "Generate a question based on the user's request: {query}"
-    question_generator = LLMChain(llm=llm, prompt=PromptTemplate(template=question_generator_template, input_variables=["query"]))
-    # Use StuffDocumentsChain to combine the retrieved documents
-    combine_docs_chain = StuffDocumentsChain(llm=llm)  # Corrected use of StuffDocumentsChain
-    # Create a ConversationalRetrievalChain with the loaded model and retriever
     qa_chain = ConversationalRetrievalChain(
-        llm=llm,
         retriever=retriever,
-        memory=memory,
-        question_generator=question_generator,
-        combine_docs_chain=combine_docs_chain
     )
-    # Run the query with the current chat history and return the response
-    response = qa_chain.run({"question": query, "chat_history": chat_history})
-    return response

 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain.llms import HuggingFaceHub
 # Constants
 CHROMA_DB_PATH = "chroma_db"
+SENTENCE_TRANSFORMER_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta"
+# Initialize vector store
 def initialize_vector_store():
     embeddings = HuggingFaceEmbeddings(model_name=SENTENCE_TRANSFORMER_MODEL)
+    return Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embeddings)
 vector_store = initialize_vector_store()
 def ingest_pdf(pdf_path):
+    """Loads, splits, and stores PDF content in a vector database."""
     loader = PyMuPDFLoader(pdf_path)
     documents = loader.load()
+    # Split text into smaller chunks
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     split_docs = text_splitter.split_documents(documents)
+    # Re-initialize vector store to ensure persistence
     vector_store.add_documents(split_docs)
     vector_store.persist()
+def process_query_with_memory(query, chat_history):
+    """Processes user queries while maintaining conversational memory."""
     retriever = vector_store.as_retriever()
+    # Use session memory (should be handled in Streamlit app)
     qa_chain = ConversationalRetrievalChain(
+        llm=HuggingFaceHub(repo_id=LLM_MODEL, model_kwargs={"max_new_tokens": 500}),
         retriever=retriever,
+        memory=chat_history
     )
+    return qa_chain.run({"question": query, "chat_history": chat_history.memory if chat_history else []})