hsuwill000 commited on
Commit
5adf214
·
verified ·
1 Parent(s): 30994f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -9,6 +9,10 @@ import huggingface_hub as hf_hub
9
  model_id = "OpenVINO/Qwen3-8B-int4-ov"
10
  model_path = "ov"
11
 
 
 
 
 
12
 
13
  hf_hub.snapshot_download(model_id, local_dir=model_path)
14
 
@@ -39,7 +43,7 @@ def generate_stream(prompt):
39
  def worker():
40
  # 在背景 thread 中做推論
41
  nonlocal tps_result
42
- gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=2048)
43
  tps = gen_result.perf_metrics.get_throughput().mean
44
  tps_result = f"{tps:.2f} tokens/s"
45
  q.put(None) # 結束符號
 
9
  model_id = "OpenVINO/Qwen3-8B-int4-ov"
10
  model_path = "ov"
11
 
12
+ config = openvino_genai.GenerationConfig()
13
+ config.max_new_tokens = 2048
14
+ config.top_p = 0.9;
15
+ config.top_k = 10;
16
 
17
  hf_hub.snapshot_download(model_id, local_dir=model_path)
18
 
 
43
  def worker():
44
  # 在背景 thread 中做推論
45
  nonlocal tps_result
46
+ gen_result = pipe.generate([prompt], streamer=streamer, config)
47
  tps = gen_result.perf_metrics.get_throughput().mean
48
  tps_result = f"{tps:.2f} tokens/s"
49
  q.put(None) # 結束符號