hsuwill000 commited on
Commit
0bc0417
·
verified ·
1 Parent(s): 19fcbc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -6,41 +6,50 @@ import time
6
  from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
7
  import nncf
8
  from llama_index.core import SimpleDirectoryReader
9
- from llama_index.readers.web import BeautifulSoupWebReader
10
  from rank_bm25 import BM25Okapi
11
  import jieba
 
 
 
12
 
13
  import huggingface_hub as hf_hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # 初始化 OpenVINO 模型
15
- #model_id = "hsuwill000/BitCPM4-1B_int4_ov"
16
  model_id = "hsuwill000/MiniCPM3-4B_int4_ov"
17
  model_path = "ov"
18
  hf_hub.snapshot_download(model_id, local_dir=model_path)
19
 
20
  config = ov_genai.GenerationConfig()
21
  config.max_new_tokens = 4096
22
- config.top_p = 0.9;
23
- config.top_k = 30;
24
 
25
- reader = BeautifulSoupWebReader()
26
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
27
  pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
28
 
29
-
30
- documents = reader.load_data([
31
- "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
32
- "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
33
- "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
34
- "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
35
- ])
36
-
37
  texts = [doc.get_content() for doc in documents]
38
 
39
  # 使用 jieba 斷詞做 BM25
40
  tokenized_corpus = [list(jieba.cut(text)) for text in texts]
41
  bm25 = BM25Okapi(tokenized_corpus)
42
 
43
-
44
  def start_chat():
45
  pipe.start_chat()
46
  return "✅ 開始對話!"
@@ -49,44 +58,36 @@ def finish_chat():
49
  pipe.finish_chat()
50
  return "🛑 結束對話!"
51
 
52
-
53
-
54
- # 建立推論函式:使用 streamer 並回傳 generator 結果
55
  def generate_stream(prompt):
56
- prompt = prompt #+ " /no_think" + " 答案短且明瞭"
57
  tokenized_query = list(jieba.cut(prompt))
58
-
59
- # BM25 取得 top 3 相關文件段落
60
  top_k = 1
61
  doc_scores = bm25.get_scores(tokenized_query)
62
  top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
63
-
64
  retrieved_texts = [texts[i] for i in top_k_indices]
65
-
66
  print("=== 檢索到的相關段落 ===")
67
  for i, txt in enumerate(retrieved_texts, 1):
68
  print(f"--- 段落 {i} ---\n{txt}\n")
69
-
70
- # 拼接 prompt,避免全文貼上,只用 top3 段落
71
  context = "\n\n".join(retrieved_texts)
72
  final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:"
73
-
74
  print("=== 最終 prompt ===")
75
  print(final_prompt)
76
-
77
  q = queue.Queue()
78
  tps_result = ""
 
79
  def streamer(subword):
80
  print(subword, end='', flush=True)
81
  q.put(subword)
82
  return ov_genai.StreamingStatus.RUNNING
83
 
84
  def worker():
85
- # 在背景 thread 中做推論
86
  nonlocal tps_result
87
  gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
88
  tps = gen_result.perf_metrics.get_throughput().mean
89
- tps_result = f"{tps:.2f} tokens/s"
90
  q.put(None) # 結束符號
91
 
92
  threading.Thread(target=worker).start()
@@ -97,7 +98,7 @@ def generate_stream(prompt):
97
  if token is None:
98
  break
99
  result += token
100
- yield result,"" # 把逐步結果傳給 output textbox
101
  yield result, tps_result
102
 
103
  with gr.Blocks() as demo:
@@ -111,13 +112,12 @@ with gr.Blocks() as demo:
111
  with gr.Row():
112
  textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
113
  button = gr.Button("Submit")
114
-
115
  textbox_output = gr.Markdown(label="robot answer:", elem_id="scroll_output")
116
 
117
  start_btn.click(fn=start_chat, outputs=status_box)
118
- end_btn.click(fn=finish_chat, outputs=status_box)
119
 
120
- # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
121
  button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
122
 
123
- demo.launch()
 
6
  from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
7
  import nncf
8
  from llama_index.core import SimpleDirectoryReader
 
9
  from rank_bm25 import BM25Okapi
10
  import jieba
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
+ import os
14
 
15
  import huggingface_hub as hf_hub
16
+
17
+ # 先下載網頁並存成 .txt
18
+ os.makedirs("./data", exist_ok=True)
19
+ urls = [
20
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
21
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
22
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
23
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
24
+ ]
25
+ for i, url in enumerate(urls):
26
+ resp = requests.get(url)
27
+ soup = BeautifulSoup(resp.text, "html.parser")
28
+ text = soup.get_text(separator="\n", strip=True)
29
+ with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f:
30
+ f.write(text)
31
+
32
  # 初始化 OpenVINO 模型
 
33
  model_id = "hsuwill000/MiniCPM3-4B_int4_ov"
34
  model_path = "ov"
35
  hf_hub.snapshot_download(model_id, local_dir=model_path)
36
 
37
  config = ov_genai.GenerationConfig()
38
  config.max_new_tokens = 4096
39
+ config.top_p = 0.9
40
+ config.top_k = 30
41
 
 
42
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
43
  pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
44
 
45
+ # 讀取剛剛存的 txt 檔案
46
+ documents = SimpleDirectoryReader("./data").load_data()
 
 
 
 
 
 
47
  texts = [doc.get_content() for doc in documents]
48
 
49
  # 使用 jieba 斷詞做 BM25
50
  tokenized_corpus = [list(jieba.cut(text)) for text in texts]
51
  bm25 = BM25Okapi(tokenized_corpus)
52
 
 
53
  def start_chat():
54
  pipe.start_chat()
55
  return "✅ 開始對話!"
 
58
  pipe.finish_chat()
59
  return "🛑 結束對話!"
60
 
 
 
 
61
  def generate_stream(prompt):
 
62
  tokenized_query = list(jieba.cut(prompt))
 
 
63
  top_k = 1
64
  doc_scores = bm25.get_scores(tokenized_query)
65
  top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
 
66
  retrieved_texts = [texts[i] for i in top_k_indices]
67
+
68
  print("=== 檢索到的相關段落 ===")
69
  for i, txt in enumerate(retrieved_texts, 1):
70
  print(f"--- 段落 {i} ---\n{txt}\n")
71
+
 
72
  context = "\n\n".join(retrieved_texts)
73
  final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:"
74
+
75
  print("=== 最終 prompt ===")
76
  print(final_prompt)
77
+
78
  q = queue.Queue()
79
  tps_result = ""
80
+
81
  def streamer(subword):
82
  print(subword, end='', flush=True)
83
  q.put(subword)
84
  return ov_genai.StreamingStatus.RUNNING
85
 
86
  def worker():
 
87
  nonlocal tps_result
88
  gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
89
  tps = gen_result.perf_metrics.get_throughput().mean
90
+ tps_result = f"{tps:.2f} tokens/s"
91
  q.put(None) # 結束符號
92
 
93
  threading.Thread(target=worker).start()
 
98
  if token is None:
99
  break
100
  result += token
101
+ yield result, ""
102
  yield result, tps_result
103
 
104
  with gr.Blocks() as demo:
 
112
  with gr.Row():
113
  textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
114
  button = gr.Button("Submit")
115
+
116
  textbox_output = gr.Markdown(label="robot answer:", elem_id="scroll_output")
117
 
118
  start_btn.click(fn=start_chat, outputs=status_box)
119
+ end_btn.click(fn=finish_chat, outputs=status_box)
120
 
 
121
  button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
122
 
123
+ demo.launch()