Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,9 @@ pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
|
|
49 |
# 讀取剛剛存的 txt 檔案
|
50 |
documents = SimpleDirectoryReader("./data").load_data()
|
51 |
texts = [doc.get_content() for doc in documents]
|
52 |
-
|
|
|
|
|
53 |
# 使用 jieba 斷詞做 BM25
|
54 |
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
|
55 |
bm25 = BM25Okapi(tokenized_corpus)
|
|
|
49 |
# 讀取剛剛存的 txt 檔案
|
50 |
documents = SimpleDirectoryReader("./data").load_data()
|
51 |
texts = [doc.get_content() for doc in documents]
|
52 |
+
print("==================")
|
53 |
+
print(texts[0][:500])
|
54 |
+
print("==================")
|
55 |
# 使用 jieba 斷詞做 BM25
|
56 |
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
|
57 |
bm25 = BM25Okapi(tokenized_corpus)
|