import os from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from langchain.document_loaders import PyMuPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain.llms import HuggingFaceHub #Constants CHROMA_DB_PATH = "chroma_db" SENTENCE_TRANSFORMER_MODEL = "sentence-ransformers/all-MiniLM-L6-v2" LLM_Model = "HuggingFaceH4/zephyr-7b-beta" #Initialize vector store def initialize_vector_store(): embeddings = HuggingFaceEmbeddings(model_name = SENTENCE_TRANSFORMER_MODEL) vector_store = Chroma(persist_directory = CHROMA_DB_PATH, embedding_fnction = embeddings) return vector_store vector_store = initialize_vector_store() def ingest_pdf(pdf_path): loader = PyMuPDFLoader(pdf_path) documents = loader.load() #split text into smaller chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100) splitdocs = text_splitter.split_documents(documents) #store in vector db vector_store.add_documents(splitdocs) vector_store.persist() def process_query_with_memory(query, chat_history=[]): retriever = vector_store.as_retriever() #Initialize chat memory memory = ConversationalBufferMemory(memory_key = "chat_history", return_messages = True) #Load a free hugging face model llm = HuggingFaceHub(repo_id = LLM_Model, model_kwargs = {"max_new_tokens": 500}) #Create a conversational retrieval chain qa_chain = ConversationalRetrievalChain( llm = llm, retriever = retriever, memory = memory) return qa_chain.run({"question":query, "chat_history": chat_history})