Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,22 +6,10 @@ import time
|
|
6 |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
|
7 |
import nncf
|
8 |
from llama_index.core import SimpleDirectoryReader
|
|
|
9 |
from rank_bm25 import BM25Okapi
|
10 |
import jieba
|
11 |
|
12 |
-
import subprocess
|
13 |
-
import os
|
14 |
-
os.makedirs("./data/", exist_ok=True)
|
15 |
-
url = "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html"
|
16 |
-
output_dir = "./data/"
|
17 |
-
cmd = ["wget", "-P", output_dir, url]
|
18 |
-
|
19 |
-
try:
|
20 |
-
subprocess.run(cmd, check=True)
|
21 |
-
print("下載成功")
|
22 |
-
except subprocess.CalledProcessError as e:
|
23 |
-
print("下載失敗:", e)
|
24 |
-
|
25 |
import huggingface_hub as hf_hub
|
26 |
# 初始化 OpenVINO 模型
|
27 |
#model_id = "hsuwill000/BitCPM4-1B_int4_ov"
|
@@ -34,12 +22,18 @@ config.max_new_tokens = 4096
|
|
34 |
config.top_p = 0.9;
|
35 |
config.top_k = 30;
|
36 |
|
37 |
-
|
38 |
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
39 |
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
|
40 |
|
41 |
-
|
42 |
-
documents =
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
texts = [doc.get_content() for doc in documents]
|
44 |
|
45 |
# 使用 jieba 斷詞做 BM25
|
@@ -75,7 +69,7 @@ def generate_stream(prompt):
|
|
75 |
|
76 |
# 拼接 prompt,避免全文貼上,只用 top3 段落
|
77 |
context = "\n\n".join(retrieved_texts)
|
78 |
-
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{
|
79 |
|
80 |
print("=== 最終 prompt ===")
|
81 |
print(final_prompt)
|
|
|
6 |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
|
7 |
import nncf
|
8 |
from llama_index.core import SimpleDirectoryReader
|
9 |
+
from llama_index.readers.web import BeautifulSoupWebReader
|
10 |
from rank_bm25 import BM25Okapi
|
11 |
import jieba
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
import huggingface_hub as hf_hub
|
14 |
# 初始化 OpenVINO 模型
|
15 |
#model_id = "hsuwill000/BitCPM4-1B_int4_ov"
|
|
|
22 |
config.top_p = 0.9;
|
23 |
config.top_k = 30;
|
24 |
|
25 |
+
reader = BeautifulSoupWebReader()
|
26 |
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
27 |
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
|
28 |
|
29 |
+
|
30 |
+
documents = reader.load_data([
|
31 |
+
"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
|
32 |
+
"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
|
33 |
+
"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
|
34 |
+
"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
|
35 |
+
])
|
36 |
+
|
37 |
texts = [doc.get_content() for doc in documents]
|
38 |
|
39 |
# 使用 jieba 斷詞做 BM25
|
|
|
69 |
|
70 |
# 拼接 prompt,避免全文貼上,只用 top3 段落
|
71 |
context = "\n\n".join(retrieved_texts)
|
72 |
+
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:"
|
73 |
|
74 |
print("=== 最終 prompt ===")
|
75 |
print(final_prompt)
|