Update ingest.py
Browse files
ingest.py
CHANGED
@@ -1,16 +1,15 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
-
import faiss
|
4 |
from langchain.document_loaders import PDFMinerLoader
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
-
from langchain.vectorstores import
|
8 |
|
9 |
# Configure logging
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
-
def
|
14 |
documents = []
|
15 |
docs_dir = "docs"
|
16 |
if not os.path.exists(docs_dir):
|
@@ -55,11 +54,11 @@ def create_faiss_index():
|
|
55 |
return
|
56 |
|
57 |
try:
|
58 |
-
|
59 |
-
|
60 |
-
logger.info(f"Created
|
61 |
except Exception as e:
|
62 |
-
logger.error(f"Failed to create
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
-
|
|
|
1 |
import os
|
2 |
import logging
|
|
|
3 |
from langchain.document_loaders import PDFMinerLoader
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
+
from langchain.vectorstores import Chroma
|
7 |
|
8 |
# Configure logging
|
9 |
logging.basicConfig(level=logging.INFO)
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
+
def create_vector_store():
|
13 |
documents = []
|
14 |
docs_dir = "docs"
|
15 |
if not os.path.exists(docs_dir):
|
|
|
54 |
return
|
55 |
|
56 |
try:
|
57 |
+
vector_store = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db")
|
58 |
+
vector_store.persist()
|
59 |
+
logger.info(f"Created Chroma vector store with {len(texts)} vectors.")
|
60 |
except Exception as e:
|
61 |
+
logger.error(f"Failed to create Chroma vector store: {e}")
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
+
create_vector_store()
|