hsuwill000 commited on
Commit
8b31668
·
verified ·
1 Parent(s): 7d7759a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -9
app.py CHANGED
@@ -18,25 +18,39 @@ tokenizer = pipe.get_tokenizer()
18
  tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
 
 
 
 
 
21
  def generate_response(prompt):
22
  try:
23
- generated = pipe.generate([prompt], max_length=1024)
24
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- return tokenpersec, generated
27
  except Exception as e:
28
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
29
-
30
 
31
  # 建立 Gradio 介面
32
  demo = gr.Interface(
33
  fn=generate_response,
34
  inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
35
  outputs=[
36
- gr.Textbox(label="tokens/sec"),
37
- #gr.Textbox(label="思考過程"),
38
- #gr.Textbox(label="最終回應")
39
- gr.Textbox(label="回應")
40
  ],
41
  title="Qwen3-0.6B-int4-ov ",
42
  description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。"
 
18
  tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
21
+ def streamer(subword):
22
+ yield subword
23
+ return ov_genai.StreamingStatus.RUNNING
24
+
25
+
26
  def generate_response(prompt):
27
  try:
28
+ full_response = ""
29
+ token_count = 0
30
+ start_time = time.time()
31
+
32
+ for text in pipe.generate(prompt, streamer=streamer, max_new_tokens=1024):
33
+ full_response += text
34
+ token_count += 1
35
+ yield (None, full_response) # 每次 yield 都会刷新界面
36
+
37
+ end_time = time.time()
38
+ elapsed_time = end_time - start_time
39
+ tokens_per_sec = token_count / elapsed_time if elapsed_time > 0 else 0
40
+ tokenpersec=f'{tokens_per_sec:.2f}'
41
+
42
+ yield (tokenpersec, full_response) # 最终 yield, 保证输出完整.
43
 
 
44
  except Exception as e:
45
+ yield ("發生錯誤", f"生成回應時發生錯誤:{e}") # 使用 yield 错误信息
 
46
 
47
  # 建立 Gradio 介面
48
  demo = gr.Interface(
49
  fn=generate_response,
50
  inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
51
  outputs=[
52
+ gr.Textbox(label="tokens/sec"),
53
+ gr.Textbox(label="回應", streaming=True)
 
 
54
  ],
55
  title="Qwen3-0.6B-int4-ov ",
56
  description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。"