Spaces:
Runtime error
Runtime error
Update prepare_vector_dp.py
Browse files- prepare_vector_dp.py +5 -0
prepare_vector_dp.py
CHANGED
@@ -68,11 +68,16 @@ def create_db_from_text():
|
|
68 |
|
69 |
def create_dp_from_files():
|
70 |
|
|
|
71 |
|
72 |
# Khai bao loader de quet toan bo thu muc data
|
73 |
loader = DirectoryLoader(pdf_data_path, glob="*.pdf",loader_cls=PyPDFLoader)
|
74 |
documents = loader.load()
|
75 |
|
|
|
|
|
|
|
|
|
76 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
77 |
chunks = text_splitter.split_documents(documents)
|
78 |
|
|
|
68 |
|
69 |
def create_dp_from_files():
|
70 |
|
71 |
+
print("Files in directory:", os.listdir(pdf_data_path))
|
72 |
|
73 |
# Khai bao loader de quet toan bo thu muc data
|
74 |
loader = DirectoryLoader(pdf_data_path, glob="*.pdf",loader_cls=PyPDFLoader)
|
75 |
documents = loader.load()
|
76 |
|
77 |
+
print(f"Loaded {len(documents)} documents")
|
78 |
+
for doc in documents:
|
79 |
+
print("Preview:", doc.page_content[:300])
|
80 |
+
|
81 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
82 |
chunks = text_splitter.split_documents(documents)
|
83 |
|