hsuwill000 commited on
Commit
19fcbc7
·
verified ·
1 Parent(s): 687cb99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -6,22 +6,10 @@ import time
6
  from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
7
  import nncf
8
  from llama_index.core import SimpleDirectoryReader
 
9
  from rank_bm25 import BM25Okapi
10
  import jieba
11
 
12
- import subprocess
13
- import os
14
- os.makedirs("./data/", exist_ok=True)
15
- url = "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html"
16
- output_dir = "./data/"
17
- cmd = ["wget", "-P", output_dir, url]
18
-
19
- try:
20
- subprocess.run(cmd, check=True)
21
- print("下載成功")
22
- except subprocess.CalledProcessError as e:
23
- print("下載失敗:", e)
24
-
25
  import huggingface_hub as hf_hub
26
  # 初始化 OpenVINO 模型
27
  #model_id = "hsuwill000/BitCPM4-1B_int4_ov"
@@ -34,12 +22,18 @@ config.max_new_tokens = 4096
34
  config.top_p = 0.9;
35
  config.top_k = 30;
36
 
37
-
38
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
39
  pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
40
 
41
- # 載入文件(放 ./data 資料夾,支持多檔案)
42
- documents = SimpleDirectoryReader("./data").load_data()
 
 
 
 
 
 
43
  texts = [doc.get_content() for doc in documents]
44
 
45
  # 使用 jieba 斷詞做 BM25
@@ -75,7 +69,7 @@ def generate_stream(prompt):
75
 
76
  # 拼接 prompt,避免全文貼上,只用 top3 段落
77
  context = "\n\n".join(retrieved_texts)
78
- final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{query}\n回答:"
79
 
80
  print("=== 最終 prompt ===")
81
  print(final_prompt)
 
6
  from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
7
  import nncf
8
  from llama_index.core import SimpleDirectoryReader
9
+ from llama_index.readers.web import BeautifulSoupWebReader
10
  from rank_bm25 import BM25Okapi
11
  import jieba
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import huggingface_hub as hf_hub
14
  # 初始化 OpenVINO 模型
15
  #model_id = "hsuwill000/BitCPM4-1B_int4_ov"
 
22
  config.top_p = 0.9;
23
  config.top_k = 30;
24
 
25
+ reader = BeautifulSoupWebReader()
26
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
27
  pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
28
 
29
+
30
+ documents = reader.load_data([
31
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html",
32
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html",
33
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html",
34
+ "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html",
35
+ ])
36
+
37
  texts = [doc.get_content() for doc in documents]
38
 
39
  # 使用 jieba 斷詞做 BM25
 
69
 
70
  # 拼接 prompt,避免全文貼上,只用 top3 段落
71
  context = "\n\n".join(retrieved_texts)
72
+ final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:"
73
 
74
  print("=== 最終 prompt ===")
75
  print(final_prompt)