Spaces:
Runtime error
Runtime error
Update prepare_vector_dp.py
Browse files- prepare_vector_dp.py +5 -0
prepare_vector_dp.py
CHANGED
|
@@ -68,11 +68,16 @@ def create_db_from_text():
|
|
| 68 |
|
| 69 |
def create_dp_from_files():
|
| 70 |
|
|
|
|
| 71 |
|
| 72 |
# Khai bao loader de quet toan bo thu muc data
|
| 73 |
loader = DirectoryLoader(pdf_data_path, glob="*.pdf",loader_cls=PyPDFLoader)
|
| 74 |
documents = loader.load()
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
| 77 |
chunks = text_splitter.split_documents(documents)
|
| 78 |
|
|
|
|
| 68 |
|
| 69 |
def create_dp_from_files():
|
| 70 |
|
| 71 |
+
print("Files in directory:", os.listdir(pdf_data_path))
|
| 72 |
|
| 73 |
# Khai bao loader de quet toan bo thu muc data
|
| 74 |
loader = DirectoryLoader(pdf_data_path, glob="*.pdf",loader_cls=PyPDFLoader)
|
| 75 |
documents = loader.load()
|
| 76 |
|
| 77 |
+
print(f"Loaded {len(documents)} documents")
|
| 78 |
+
for doc in documents:
|
| 79 |
+
print("Preview:", doc.page_content[:300])
|
| 80 |
+
|
| 81 |
text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
|
| 82 |
chunks = text_splitter.split_documents(documents)
|
| 83 |
|