jarif commited on
Commit
ada8299
·
verified ·
1 Parent(s): 49c6974

Update ingest.py

Browse files
Files changed (1) hide show
  1. ingest.py +7 -8
ingest.py CHANGED
@@ -1,16 +1,15 @@
1
  import os
2
  import logging
3
- import faiss
4
  from langchain.document_loaders import PDFMinerLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.vectorstores import FAISS
8
 
9
  # Configure logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- def create_faiss_index():
14
  documents = []
15
  docs_dir = "docs"
16
  if not os.path.exists(docs_dir):
@@ -55,11 +54,11 @@ def create_faiss_index():
55
  return
56
 
57
  try:
58
- faiss_index = FAISS.from_documents(texts, embeddings)
59
- faiss_index.save_local("faiss_index")
60
- logger.info(f"Created FAISS index with {len(texts)} vectors.")
61
  except Exception as e:
62
- logger.error(f"Failed to create FAISS index: {e}")
63
 
64
  if __name__ == "__main__":
65
- create_faiss_index()
 
1
  import os
2
  import logging
 
3
  from langchain.document_loaders import PDFMinerLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import Chroma
7
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
+ def create_vector_store():
13
  documents = []
14
  docs_dir = "docs"
15
  if not os.path.exists(docs_dir):
 
54
  return
55
 
56
  try:
57
+ vector_store = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db")
58
+ vector_store.persist()
59
+ logger.info(f"Created Chroma vector store with {len(texts)} vectors.")
60
  except Exception as e:
61
+ logger.error(f"Failed to create Chroma vector store: {e}")
62
 
63
  if __name__ == "__main__":
64
+ create_vector_store()