Spaces:
Build error
Build error
Update ingest.py
Browse files
ingest.py
CHANGED
|
@@ -1,16 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
import logging
|
| 3 |
-
import faiss
|
| 4 |
from langchain.document_loaders import PDFMinerLoader
|
| 5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 6 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 7 |
-
from langchain.vectorstores import
|
| 8 |
|
| 9 |
# Configure logging
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
-
def
|
| 14 |
documents = []
|
| 15 |
docs_dir = "docs"
|
| 16 |
if not os.path.exists(docs_dir):
|
|
@@ -55,11 +54,11 @@ def create_faiss_index():
|
|
| 55 |
return
|
| 56 |
|
| 57 |
try:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
logger.info(f"Created
|
| 61 |
except Exception as e:
|
| 62 |
-
logger.error(f"Failed to create
|
| 63 |
|
| 64 |
if __name__ == "__main__":
|
| 65 |
-
|
|
|
|
| 1 |
import os
|
| 2 |
import logging
|
|
|
|
| 3 |
from langchain.document_loaders import PDFMinerLoader
|
| 4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
+
from langchain.vectorstores import Chroma
|
| 7 |
|
| 8 |
# Configure logging
|
| 9 |
logging.basicConfig(level=logging.INFO)
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
+
def create_vector_store():
|
| 13 |
documents = []
|
| 14 |
docs_dir = "docs"
|
| 15 |
if not os.path.exists(docs_dir):
|
|
|
|
| 54 |
return
|
| 55 |
|
| 56 |
try:
|
| 57 |
+
vector_store = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db")
|
| 58 |
+
vector_store.persist()
|
| 59 |
+
logger.info(f"Created Chroma vector store with {len(texts)} vectors.")
|
| 60 |
except Exception as e:
|
| 61 |
+
logger.error(f"Failed to create Chroma vector store: {e}")
|
| 62 |
|
| 63 |
if __name__ == "__main__":
|
| 64 |
+
create_vector_store()
|