quoc-khanh commited on
Commit
14d88e9
·
verified ·
1 Parent(s): 4c0ef43

Update file_loader.py

Browse files
Files changed (1) hide show
  1. file_loader.py +6 -1
file_loader.py CHANGED
@@ -28,9 +28,14 @@ def get_vectorstore():
28
  FAQ_path = "syllabus_nct_word_format/FAQ.json"
29
  FAQ_splits = get_json_splits_only(FAQ_path)
30
  all_splits += FAQ_splits
31
-
 
32
  website_content = get_web_documents(base_url='https://nct.neu.edu.vn/')
33
  all_splits += website_content
 
 
 
 
34
 
35
  # Lưu vào vectorstore với nhúng từ Google GenAI
36
  # embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
 
28
  FAQ_path = "syllabus_nct_word_format/FAQ.json"
29
  FAQ_splits = get_json_splits_only(FAQ_path)
30
  all_splits += FAQ_splits
31
+
32
+ print('Crawing from https://nct.neu.edu.vn/')
33
  website_content = get_web_documents(base_url='https://nct.neu.edu.vn/')
34
  all_splits += website_content
35
+
36
+ print('Crawing from https://neu.edu.vn/')
37
+ website_content = get_web_documents(base_url='https://neu.edu.vn/')
38
+ all_splits += website_content
39
 
40
  # Lưu vào vectorstore với nhúng từ Google GenAI
41
  # embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")