Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,10 @@ import huggingface_hub as hf_hub
|
|
9 |
model_id = "OpenVINO/Qwen3-8B-int4-ov"
|
10 |
model_path = "ov"
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
hf_hub.snapshot_download(model_id, local_dir=model_path)
|
14 |
|
@@ -39,7 +43,7 @@ def generate_stream(prompt):
|
|
39 |
def worker():
|
40 |
# 在背景 thread 中做推論
|
41 |
nonlocal tps_result
|
42 |
-
gen_result = pipe.generate([prompt], streamer=streamer,
|
43 |
tps = gen_result.perf_metrics.get_throughput().mean
|
44 |
tps_result = f"{tps:.2f} tokens/s"
|
45 |
q.put(None) # 結束符號
|
|
|
9 |
model_id = "OpenVINO/Qwen3-8B-int4-ov"
|
10 |
model_path = "ov"
|
11 |
|
12 |
+
config = openvino_genai.GenerationConfig()
|
13 |
+
config.max_new_tokens = 2048
|
14 |
+
config.top_p = 0.9;
|
15 |
+
config.top_k = 10;
|
16 |
|
17 |
hf_hub.snapshot_download(model_id, local_dir=model_path)
|
18 |
|
|
|
43 |
def worker():
|
44 |
# 在背景 thread 中做推論
|
45 |
nonlocal tps_result
|
46 |
+
gen_result = pipe.generate([prompt], streamer=streamer, config)
|
47 |
tps = gen_result.perf_metrics.get_throughput().mean
|
48 |
tps_result = f"{tps:.2f} tokens/s"
|
49 |
q.put(None) # 結束符號
|