hsuwill000 commited on
Commit
ecb186b
·
verified ·
1 Parent(s): cd67085

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -3,7 +3,6 @@ import openvino_genai as ov_genai
3
  import queue
4
  import threading
5
  import time
6
- from transformers import AutoTokenizer
7
  from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
8
  import nncf
9
  from llama_index.core import SimpleDirectoryReader
@@ -41,7 +40,7 @@ for i, url in enumerate(urls):
41
  story_default_text = response.text.strip()
42
 
43
  # 初始化 OpenVINO 模型
44
- model_id = "hsuwill000/BitCPM4-1B_int4_ov"
45
  #model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
46
  model_path = "ov"
47
  hf_hub.snapshot_download(model_id, local_dir=model_path)
@@ -52,9 +51,7 @@ config.top_p = 0.9
52
  config.top_k = 30
53
 
54
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
55
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
56
-
57
- #pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
58
 
59
  # 讀取剛剛存的 txt 檔案
60
  documents = SimpleDirectoryReader("./data").load_data()
@@ -101,12 +98,7 @@ def generate_stream(prompt):
101
 
102
  def worker():
103
  nonlocal tps_result
104
- text = tokenizer.apply_chat_template(
105
- final_prompt,
106
- tokenize=False,
107
- add_generation_prompt=True
108
- )
109
- gen_result = pipe.generate([text], streamer=streamer, config=config)
110
  tps = gen_result.perf_metrics.get_throughput().mean
111
  tps_result = f"{tps:.2f} tokens/s"
112
  q.put(None) # 結束符號
 
3
  import queue
4
  import threading
5
  import time
 
6
  from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
7
  import nncf
8
  from llama_index.core import SimpleDirectoryReader
 
40
  story_default_text = response.text.strip()
41
 
42
  # 初始化 OpenVINO 模型
43
+ model_id = "hsuwill000/TinyMixtral-4x248M-MoE_int4_ov"
44
  #model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
45
  model_path = "ov"
46
  hf_hub.snapshot_download(model_id, local_dir=model_path)
 
51
  config.top_k = 30
52
 
53
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
54
+ pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
 
 
55
 
56
  # 讀取剛剛存的 txt 檔案
57
  documents = SimpleDirectoryReader("./data").load_data()
 
98
 
99
  def worker():
100
  nonlocal tps_result
101
+ gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
 
 
 
 
 
102
  tps = gen_result.perf_metrics.get_throughput().mean
103
  tps_result = f"{tps:.2f} tokens/s"
104
  q.put(None) # 結束符號