jarif commited on
Commit
6cdbf89
·
verified ·
1 Parent(s): a144f48

Update ingest.py

Browse files
Files changed (1) hide show
  1. ingest.py +12 -6
ingest.py CHANGED
@@ -1,14 +1,15 @@
1
  import os
2
  import logging
 
 
3
  from langchain_community.document_loaders import PDFMinerLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
- from langchain_community.vectorstores import Chroma
7
 
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
- def create_chroma_db():
12
  documents = []
13
  docs_dir = "docs"
14
 
@@ -52,12 +53,17 @@ def create_chroma_db():
52
  logger.error(f"Failed to initialize embeddings: {e}")
53
  return
54
 
 
 
 
55
  try:
56
- db = Chroma.from_documents(texts, embeddings, persist_directory="chroma_db")
57
- logger.info(f"Created Chroma database with {len(texts)} vectors.")
 
 
58
  except Exception as e:
59
- logger.error(f"Failed to create Chroma database: {e}")
60
  return
61
 
62
  if __name__ == "__main__":
63
- create_chroma_db()
 
1
  import os
2
  import logging
3
+ import faiss
4
+ import numpy as np
5
  from langchain_community.document_loaders import PDFMinerLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
8
 
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
+ def create_faiss_index():
13
  documents = []
14
  docs_dir = "docs"
15
 
 
53
  logger.error(f"Failed to initialize embeddings: {e}")
54
  return
55
 
56
+ embedding_vectors = np.array([embeddings.embed(text) for text in texts])
57
+ dimension = embedding_vectors.shape[1]
58
+
59
  try:
60
+ faiss_index = faiss.IndexFlatL2(dimension)
61
+ faiss_index.add(embedding_vectors)
62
+ faiss.write_index(faiss_index, "faiss_index.index")
63
+ logger.info(f"Created FAISS index with {len(texts)} vectors.")
64
  except Exception as e:
65
+ logger.error(f"Failed to create FAISS index: {e}")
66
  return
67
 
68
  if __name__ == "__main__":
69
+ create_faiss_index()