hsuwill000 commited on
Commit
6868d73
Β·
verified Β·
1 Parent(s): 0dedb6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -6,10 +6,10 @@ import time
6
 
7
  import huggingface_hub as hf_hub
8
  # εˆε§‹εŒ– OpenVINO ζ¨‘εž‹
9
- #model_id = "OpenVINO/Qwen3-4B-int8-ov"
10
- #model_path = "Qwen3-4B-int8-ov"
11
- model_id = "OpenVINO/Phi-4-mini-instruct-int4-ov"
12
- model_path = "Phi-4-mini-instruct-int4-ov"
13
 
14
 
15
 
@@ -40,7 +40,8 @@ def generate_stream(prompt):
40
  def worker():
41
  # εœ¨θƒŒζ™― thread δΈ­εšζŽ¨θ«–
42
  nonlocal tps_result
43
- gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
 
44
  tps = gen_result.perf_metrics.get_throughput().mean
45
  tps_result = f"{tps:.2f} tokens/s"
46
  q.put(None) # η΅ζŸη¬¦θ™Ÿ
 
6
 
7
  import huggingface_hub as hf_hub
8
  # εˆε§‹εŒ– OpenVINO ζ¨‘εž‹
9
+ model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
10
+ model_path = "Qwen3-0.6B-int4-ov"
11
+ #model_id = "OpenVINO/Phi-4-mini-instruct-int4-ov"
12
+ #model_path = "Phi-4-mini-instruct-int4-ov"
13
 
14
 
15
 
 
40
  def worker():
41
  # εœ¨θƒŒζ™― thread δΈ­εšζŽ¨θ«–
42
  nonlocal tps_result
43
+ prompt = prompt + "/no_think"
44
+ gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=32767)
45
  tps = gen_result.perf_metrics.get_throughput().mean
46
  tps_result = f"{tps:.2f} tokens/s"
47
  q.put(None) # η΅ζŸη¬¦θ™Ÿ