Update ingest.py
Browse files
ingest.py
CHANGED
@@ -16,11 +16,10 @@ def create_faiss_index():
|
|
16 |
"""
|
17 |
documents = []
|
18 |
docs_dir = "docs"
|
19 |
-
|
20 |
if not os.path.exists(docs_dir):
|
21 |
logger.error(f"The directory '{docs_dir}' does not exist.")
|
22 |
return
|
23 |
-
|
24 |
for root, dirs, files in os.walk(docs_dir):
|
25 |
for file in files:
|
26 |
if file.endswith(".pdf"):
|
@@ -46,6 +45,7 @@ def create_faiss_index():
|
|
46 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
47 |
texts = text_splitter.split_documents(documents)
|
48 |
logger.info(f"Created {len(texts)} text chunks.")
|
|
|
49 |
if not texts:
|
50 |
logger.error("No text chunks created. Check the text splitting process.")
|
51 |
return
|
@@ -64,4 +64,4 @@ def create_faiss_index():
|
|
64 |
logger.error(f"Failed to create FAISS index: {e}")
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
-
create_faiss_index()
|
|
|
16 |
"""
|
17 |
documents = []
|
18 |
docs_dir = "docs"
|
|
|
19 |
if not os.path.exists(docs_dir):
|
20 |
logger.error(f"The directory '{docs_dir}' does not exist.")
|
21 |
return
|
22 |
+
|
23 |
for root, dirs, files in os.walk(docs_dir):
|
24 |
for file in files:
|
25 |
if file.endswith(".pdf"):
|
|
|
45 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
46 |
texts = text_splitter.split_documents(documents)
|
47 |
logger.info(f"Created {len(texts)} text chunks.")
|
48 |
+
|
49 |
if not texts:
|
50 |
logger.error("No text chunks created. Check the text splitting process.")
|
51 |
return
|
|
|
64 |
logger.error(f"Failed to create FAISS index: {e}")
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
+
create_faiss_index()
|