hsuwill000 commited on
Commit
be9030e
·
verified ·
1 Parent(s): d615446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -49,7 +49,9 @@ pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
49
  # 讀取剛剛存的 txt 檔案
50
  documents = SimpleDirectoryReader("./data").load_data()
51
  texts = [doc.get_content() for doc in documents]
52
-
 
 
53
  # 使用 jieba 斷詞做 BM25
54
  tokenized_corpus = [list(jieba.cut(text)) for text in texts]
55
  bm25 = BM25Okapi(tokenized_corpus)
 
49
  # 讀取剛剛存的 txt 檔案
50
  documents = SimpleDirectoryReader("./data").load_data()
51
  texts = [doc.get_content() for doc in documents]
52
+ print("==================")
53
+ print(texts[0][:500])
54
+ print("==================")
55
  # 使用 jieba 斷詞做 BM25
56
  tokenized_corpus = [list(jieba.cut(text)) for text in texts]
57
  bm25 = BM25Okapi(tokenized_corpus)