Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,22 @@ import threading
|
|
5 |
import time
|
6 |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
|
7 |
import nncf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
import huggingface_hub as hf_hub
|
10 |
# 初始化 OpenVINO 模型
|
@@ -20,8 +36,16 @@ config.top_k = 30;
|
|
20 |
|
21 |
|
22 |
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
#pipe.start_chat()
|
25 |
|
26 |
def start_chat():
|
27 |
pipe.start_chat()
|
@@ -36,7 +60,26 @@ def finish_chat():
|
|
36 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
37 |
def generate_stream(prompt):
|
38 |
prompt = prompt #+ " /no_think" + " 答案短且明瞭"
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
q = queue.Queue()
|
41 |
tps_result = ""
|
42 |
def streamer(subword):
|
@@ -47,7 +90,7 @@ def generate_stream(prompt):
|
|
47 |
def worker():
|
48 |
# 在背景 thread 中做推論
|
49 |
nonlocal tps_result
|
50 |
-
gen_result = pipe.generate([
|
51 |
tps = gen_result.perf_metrics.get_throughput().mean
|
52 |
tps_result = f"{tps:.2f} tokens/s"
|
53 |
q.put(None) # 結束符號
|
|
|
5 |
import time
|
6 |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
|
7 |
import nncf
|
8 |
+
from llama_index.core import SimpleDirectoryReader
|
9 |
+
from rank_bm25 import BM25Okapi
|
10 |
+
import jieba
|
11 |
+
|
12 |
+
import subprocess
|
13 |
+
import os
|
14 |
+
os.makedirs("./data/", exist_ok=True)
|
15 |
+
url = "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html"
|
16 |
+
output_dir = "./data/"
|
17 |
+
cmd = ["wget", "-P", output_dir, url]
|
18 |
+
|
19 |
+
try:
|
20 |
+
subprocess.run(cmd, check=True)
|
21 |
+
print("下載成功")
|
22 |
+
except subprocess.CalledProcessError as e:
|
23 |
+
print("下載失敗:", e)
|
24 |
|
25 |
import huggingface_hub as hf_hub
|
26 |
# 初始化 OpenVINO 模型
|
|
|
36 |
|
37 |
|
38 |
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
39 |
+
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
|
40 |
+
|
41 |
+
# 載入文件(放 ./data 資料夾,支持多檔案)
|
42 |
+
documents = SimpleDirectoryReader("./data").load_data()
|
43 |
+
texts = [doc.get_content() for doc in documents]
|
44 |
+
|
45 |
+
# 使用 jieba 斷詞做 BM25
|
46 |
+
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
|
47 |
+
bm25 = BM25Okapi(tokenized_corpus)
|
48 |
|
|
|
49 |
|
50 |
def start_chat():
|
51 |
pipe.start_chat()
|
|
|
60 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
61 |
def generate_stream(prompt):
|
62 |
prompt = prompt #+ " /no_think" + " 答案短且明瞭"
|
63 |
+
tokenized_query = list(jieba.cut(prompt))
|
64 |
+
|
65 |
+
# BM25 取得 top 3 相關文件段落
|
66 |
+
top_k = 1
|
67 |
+
doc_scores = bm25.get_scores(tokenized_query)
|
68 |
+
top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
|
69 |
+
|
70 |
+
retrieved_texts = [texts[i] for i in top_k_indices]
|
71 |
+
|
72 |
+
print("=== 檢索到的相關段落 ===")
|
73 |
+
for i, txt in enumerate(retrieved_texts, 1):
|
74 |
+
print(f"--- 段落 {i} ---\n{txt}\n")
|
75 |
+
|
76 |
+
# 拼接 prompt,避免全文貼上,只用 top3 段落
|
77 |
+
context = "\n\n".join(retrieved_texts)
|
78 |
+
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{query}\n回答:"
|
79 |
+
|
80 |
+
print("=== 最終 prompt ===")
|
81 |
+
print(final_prompt)
|
82 |
+
|
83 |
q = queue.Queue()
|
84 |
tps_result = ""
|
85 |
def streamer(subword):
|
|
|
90 |
def worker():
|
91 |
# 在背景 thread 中做推論
|
92 |
nonlocal tps_result
|
93 |
+
gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
|
94 |
tps = gen_result.perf_metrics.get_throughput().mean
|
95 |
tps_result = f"{tps:.2f} tokens/s"
|
96 |
q.put(None) # 結束符號
|