Pudding48 commited on
Commit
c8baa8d
·
verified ·
1 Parent(s): 0489f33

Update prepare_vector_dp.py

Browse files
Files changed (1) hide show
  1. prepare_vector_dp.py +5 -0
prepare_vector_dp.py CHANGED
@@ -68,11 +68,16 @@ def create_db_from_text():
68
 
69
  def create_dp_from_files():
70
 
 
71
 
72
  # Khai bao loader de quet toan bo thu muc data
73
  loader = DirectoryLoader(pdf_data_path, glob="*.pdf",loader_cls=PyPDFLoader)
74
  documents = loader.load()
75
 
 
 
 
 
76
  text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
77
  chunks = text_splitter.split_documents(documents)
78
 
 
68
 
69
  def create_dp_from_files():
70
 
71
+ print("Files in directory:", os.listdir(pdf_data_path))
72
 
73
  # Khai bao loader de quet toan bo thu muc data
74
  loader = DirectoryLoader(pdf_data_path, glob="*.pdf",loader_cls=PyPDFLoader)
75
  documents = loader.load()
76
 
77
+ print(f"Loaded {len(documents)} documents")
78
+ for doc in documents:
79
+ print("Preview:", doc.page_content[:300])
80
+
81
  text_splitter = CharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
82
  chunks = text_splitter.split_documents(documents)
83