pdf-rag-chatbot

Running

farmax commited on Oct 12, 2024

Commit

cd982b5

verified ·

1 Parent(s): 6739a2f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,7 +38,6 @@ class PDFDocument(Document):
 def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progress()):
     logger.info("Initializing database...")
-    embedding_function = Chroma.from_pretrained("chroma-rt")
     documents = []
     for file in document:
         loader = UnstructuredPDFLoader(file.name)
@@ -48,11 +47,12 @@ def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progres
             pages = splitter.split_document(doc)
             for page in pages:
                 documents.append(PDFDocument(page_content=page.page_content, metadata={"filename": file.name}))
-    vectorstore = Chroma.create_index(embedding_function, documents)
     progress.update(0.5)
     logger.info("Database initialized successfully.")
     return vectorstore, "Initialized"
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress(), language="italian"):
     logger.info("Initializing LLM chain...")
     llm_name = list_llm[llm_option]

 def initialize_database(document, chunk_size, chunk_overlap, progress=gr.Progress()):
     logger.info("Initializing database...")
     documents = []
     for file in document:
         loader = UnstructuredPDFLoader(file.name)
             pages = splitter.split_document(doc)
             for page in pages:
                 documents.append(PDFDocument(page_content=page.page_content, metadata={"filename": file.name}))
+    vectorstore = Chroma.from_documents(documents, embedding_function)
     progress.update(0.5)
     logger.info("Database initialized successfully.")
     return vectorstore, "Initialized"
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress(), language="italian"):
     logger.info("Initializing LLM chain...")
     llm_name = list_llm[llm_option]