hsuwill000 commited on
Commit
ac387f0
·
verified ·
1 Parent(s): f30bfa5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -35,10 +35,13 @@ tokenizer.set_chat_template(tokenizer.chat_template)
35
  config = ov_genai.GenerationConfig()
36
  config.max_new_tokens = 1024
37
 
38
- def streamer(subword):
39
- print(subword, end='', flush=True)
40
- return False
 
 
41
 
 
42
  def generate_response(prompt, model_name):
43
  global pipe, tokenizer, InUsed_model_name
44
  if(InUsed_model_name!=model_name):
@@ -52,12 +55,17 @@ def generate_response(prompt, model_name):
52
  InUsed_model_name=model_name
53
 
54
  try:
55
- generated = pipe.generate([prompt], config, streamer)
56
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
 
 
 
 
 
 
57
 
58
- return tokenpersec, generated
59
  except Exception as e:
60
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
61
 
62
 
63
  # 建立 Gradio 介面
@@ -71,11 +79,12 @@ demo = gr.Interface(
71
  ],
72
  outputs=[
73
  gr.Textbox(label="tokens/sec"),
74
- gr.Textbox(label="回應", streaming=True),
75
  ],
76
  title="Qwen3 Model Inference",
77
- description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
 
78
  )
79
 
80
  if __name__ == "__main__":
81
- demo.launch()
 
35
  config = ov_genai.GenerationConfig()
36
  config.max_new_tokens = 1024
37
 
38
+ # 修改 streamer 函數,使其 yield 結果
39
+ def streamer():
40
+ def cb(subword):
41
+ yield subword
42
+ return cb
43
 
44
+ # 修改 generate_response 函數,使用 yield 傳回結果,並傳回tokenpersec
45
  def generate_response(prompt, model_name):
46
  global pipe, tokenizer, InUsed_model_name
47
  if(InUsed_model_name!=model_name):
 
55
  InUsed_model_name=model_name
56
 
57
  try:
58
+ response = ""
59
+ callback = streamer()
60
+ for token in pipe.generate([prompt], config, callback):
61
+ response += token
62
+ yield '', response
63
+
64
+ tokenpersec = f'{pipe.last_metrics.get_throughput().mean:.2f}'
65
+ yield tokenpersec, response # 傳回最終結果
66
 
 
67
  except Exception as e:
68
+ yield "發生錯誤", f"生成回應時發生錯誤:{e}"
69
 
70
 
71
  # 建立 Gradio 介面
 
79
  ],
80
  outputs=[
81
  gr.Textbox(label="tokens/sec"),
82
+ gr.Textbox(label="回應"),
83
  ],
84
  title="Qwen3 Model Inference",
85
+ description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。",
86
+ live=True # 啟用即時更新
87
  )
88
 
89
  if __name__ == "__main__":
90
+ demo.queue().launch()