Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import openvino_genai as ov_genai
|
|
3 |
import queue
|
4 |
import threading
|
5 |
import time
|
6 |
-
from transformers import AutoTokenizer
|
7 |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
|
8 |
import nncf
|
9 |
from llama_index.core import SimpleDirectoryReader
|
@@ -41,7 +40,7 @@ for i, url in enumerate(urls):
|
|
41 |
story_default_text = response.text.strip()
|
42 |
|
43 |
# 初始化 OpenVINO 模型
|
44 |
-
model_id = "hsuwill000/
|
45 |
#model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
|
46 |
model_path = "ov"
|
47 |
hf_hub.snapshot_download(model_id, local_dir=model_path)
|
@@ -52,9 +51,7 @@ config.top_p = 0.9
|
|
52 |
config.top_k = 30
|
53 |
|
54 |
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
55 |
-
|
56 |
-
|
57 |
-
#pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
|
58 |
|
59 |
# 讀取剛剛存的 txt 檔案
|
60 |
documents = SimpleDirectoryReader("./data").load_data()
|
@@ -101,12 +98,7 @@ def generate_stream(prompt):
|
|
101 |
|
102 |
def worker():
|
103 |
nonlocal tps_result
|
104 |
-
|
105 |
-
final_prompt,
|
106 |
-
tokenize=False,
|
107 |
-
add_generation_prompt=True
|
108 |
-
)
|
109 |
-
gen_result = pipe.generate([text], streamer=streamer, config=config)
|
110 |
tps = gen_result.perf_metrics.get_throughput().mean
|
111 |
tps_result = f"{tps:.2f} tokens/s"
|
112 |
q.put(None) # 結束符號
|
|
|
3 |
import queue
|
4 |
import threading
|
5 |
import time
|
|
|
6 |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
|
7 |
import nncf
|
8 |
from llama_index.core import SimpleDirectoryReader
|
|
|
40 |
story_default_text = response.text.strip()
|
41 |
|
42 |
# 初始化 OpenVINO 模型
|
43 |
+
model_id = "hsuwill000/TinyMixtral-4x248M-MoE_int4_ov"
|
44 |
#model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
|
45 |
model_path = "ov"
|
46 |
hf_hub.snapshot_download(model_id, local_dir=model_path)
|
|
|
51 |
config.top_k = 30
|
52 |
|
53 |
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
54 |
+
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
|
|
|
|
|
55 |
|
56 |
# 讀取剛剛存的 txt 檔案
|
57 |
documents = SimpleDirectoryReader("./data").load_data()
|
|
|
98 |
|
99 |
def worker():
|
100 |
nonlocal tps_result
|
101 |
+
gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
|
|
|
|
|
|
|
|
|
|
|
102 |
tps = gen_result.perf_metrics.get_throughput().mean
|
103 |
tps_result = f"{tps:.2f} tokens/s"
|
104 |
q.put(None) # 結束符號
|