Spaces:

hsuwill000
/

maxtest01

Running

hsuwill000 commited on Jul 1

Commit

5adf214

verified ·

1 Parent(s): 30994f6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,10 @@ import huggingface_hub as hf_hub
 model_id = "OpenVINO/Qwen3-8B-int4-ov"
 model_path = "ov"
 hf_hub.snapshot_download(model_id, local_dir=model_path)
@@ -39,7 +43,7 @@ def generate_stream(prompt):
     def worker():
         # 在背景 thread 中做推論
         nonlocal tps_result
-        gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=2048)
         tps = gen_result.perf_metrics.get_throughput().mean
         tps_result = f"{tps:.2f} tokens/s"
         q.put(None)  # 結束符號

 model_id = "OpenVINO/Qwen3-8B-int4-ov"
 model_path = "ov"
+config = openvino_genai.GenerationConfig()
+config.max_new_tokens = 2048
+config.top_p = 0.9;
+config.top_k = 10;
 hf_hub.snapshot_download(model_id, local_dir=model_path)
     def worker():
         # 在背景 thread 中做推論
         nonlocal tps_result
+        gen_result = pipe.generate([prompt], streamer=streamer, config)
         tps = gen_result.perf_metrics.get_throughput().mean
         tps_result = f"{tps:.2f} tokens/s"
         q.put(None)  # 結束符號