Spaces:
Sleeping
Sleeping
Update file_loader.py
Browse files- file_loader.py +10 -2
file_loader.py
CHANGED
@@ -60,7 +60,7 @@ def get_vectorstore():
|
|
60 |
# print('Vectorstore ready!')
|
61 |
# return vectorstore
|
62 |
|
63 |
-
folder_path = "
|
64 |
docx_files = list_docx_files(folder_path)
|
65 |
all_splits = [] # Khởi tạo danh sách lưu kết quả
|
66 |
for i, file_path in enumerate(tqdm(docx_files, desc="Đang xử lý", unit="file")):
|
@@ -69,7 +69,15 @@ def get_vectorstore():
|
|
69 |
splits = get_splits(file_path, output_json_path)
|
70 |
splits_with_metadata = update_documents_metadata(splits, metadata)
|
71 |
all_splits += splits_with_metadata
|
72 |
-
if i == 1: break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
print('Get embedding model /paraphrase-multilingual-MiniLM-L12-v2')
|
75 |
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") #"VoVanPhuc/sup-SimCSE-VietNamese-phobert-base")
|
|
|
60 |
# print('Vectorstore ready!')
|
61 |
# return vectorstore
|
62 |
|
63 |
+
folder_path = "syllabus_nct_pdf_format/" #'/content/chatbot4nct_test2/syllabus_nct_word_format'
|
64 |
docx_files = list_docx_files(folder_path)
|
65 |
all_splits = [] # Khởi tạo danh sách lưu kết quả
|
66 |
for i, file_path in enumerate(tqdm(docx_files, desc="Đang xử lý", unit="file")):
|
|
|
69 |
splits = get_splits(file_path, output_json_path)
|
70 |
splits_with_metadata = update_documents_metadata(splits, metadata)
|
71 |
all_splits += splits_with_metadata
|
72 |
+
# if i == 1: break
|
73 |
+
|
74 |
+
FAQ_path = "syllabus_nct_word_format/FAQ.json"
|
75 |
+
FAQ_splits = get_json_splits_only(FAQ_path)
|
76 |
+
all_splits += FAQ_splits
|
77 |
+
|
78 |
+
FAQ_path = "syllabus_nct_word_format/FAQ2.json"
|
79 |
+
FAQ_splits = get_json_splits_only(FAQ_path)
|
80 |
+
all_splits += FAQ_splits
|
81 |
|
82 |
print('Get embedding model /paraphrase-multilingual-MiniLM-L12-v2')
|
83 |
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") #"VoVanPhuc/sup-SimCSE-VietNamese-phobert-base")
|