Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import time
|
|
6 |
|
7 |
import huggingface_hub as hf_hub
|
8 |
# 初始化 OpenVINO 模型
|
9 |
-
model_id = "OpenVINO/
|
10 |
model_path = "ov"
|
11 |
|
12 |
|
@@ -27,7 +27,7 @@ def finish_chat():
|
|
27 |
|
28 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
29 |
def generate_stream(prompt):
|
30 |
-
prompt = prompt
|
31 |
|
32 |
q = queue.Queue()
|
33 |
tps_result = ""
|
@@ -39,7 +39,7 @@ def generate_stream(prompt):
|
|
39 |
def worker():
|
40 |
# 在背景 thread 中做推論
|
41 |
nonlocal tps_result
|
42 |
-
gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=
|
43 |
tps = gen_result.perf_metrics.get_throughput().mean
|
44 |
tps_result = f"{tps:.2f} tokens/s"
|
45 |
q.put(None) # 結束符號
|
|
|
6 |
|
7 |
import huggingface_hub as hf_hub
|
8 |
# 初始化 OpenVINO 模型
|
9 |
+
model_id = "OpenVINO/Qwen3-8B-int4-ov"
|
10 |
model_path = "ov"
|
11 |
|
12 |
|
|
|
27 |
|
28 |
# 建立推論函式:使用 streamer 並回傳 generator 結果
|
29 |
def generate_stream(prompt):
|
30 |
+
prompt = prompt + " /no_think" + " 答案短且明瞭"
|
31 |
|
32 |
q = queue.Queue()
|
33 |
tps_result = ""
|
|
|
39 |
def worker():
|
40 |
# 在背景 thread 中做推論
|
41 |
nonlocal tps_result
|
42 |
+
gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=2048)
|
43 |
tps = gen_result.perf_metrics.get_throughput().mean
|
44 |
tps_result = f"{tps:.2f} tokens/s"
|
45 |
q.put(None) # 結束符號
|