Spaces:

hsuwill000
/

maxtest01

Running

hsuwill000 commited on Jul 4

Commit

ecb186b

verified ·

1 Parent(s): cd67085

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import openvino_genai as ov_genai
 import queue
 import threading
 import time
-from transformers import AutoTokenizer
 from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
 import nncf
 from llama_index.core import SimpleDirectoryReader
@@ -41,7 +40,7 @@ for i, url in enumerate(urls):
     story_default_text = response.text.strip()
 # 初始化 OpenVINO 模型
-model_id = "hsuwill000/BitCPM4-1B_int4_ov"
 #model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
 model_path = "ov"
 hf_hub.snapshot_download(model_id, local_dir=model_path)
@@ -52,9 +51,7 @@ config.top_p = 0.9
 config.top_k = 30
 pipe = ov_genai.LLMPipeline(model_path, "CPU")
-tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-#pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
 # 讀取剛剛存的 txt 檔案
 documents = SimpleDirectoryReader("./data").load_data()
@@ -101,12 +98,7 @@ def generate_stream(prompt):
     def worker():
         nonlocal tps_result
-        text = tokenizer.apply_chat_template(
-            final_prompt,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        gen_result = pipe.generate([text], streamer=streamer, config=config)
         tps = gen_result.perf_metrics.get_throughput().mean
         tps_result = f"{tps:.2f} tokens/s"
         q.put(None)  # 結束符號

 import queue
 import threading
 import time
 from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
 import nncf
 from llama_index.core import SimpleDirectoryReader
     story_default_text = response.text.strip()
 # 初始化 OpenVINO 模型
+model_id = "hsuwill000/TinyMixtral-4x248M-MoE_int4_ov"
 #model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
 model_path = "ov"
 hf_hub.snapshot_download(model_id, local_dir=model_path)
 config.top_k = 30
 pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
 # 讀取剛剛存的 txt 檔案
 documents = SimpleDirectoryReader("./data").load_data()
     def worker():
         nonlocal tps_result
+        gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
         tps = gen_result.perf_metrics.get_throughput().mean
         tps_result = f"{tps:.2f} tokens/s"
         q.put(None)  # 結束符號