hsuwill000 commited on
Commit
1af7192
·
verified ·
1 Parent(s): ac387f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -19
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import huggingface_hub as hf_hub
2
  import time
3
  import openvino_genai as ov_genai
@@ -35,13 +36,10 @@ tokenizer.set_chat_template(tokenizer.chat_template)
35
  config = ov_genai.GenerationConfig()
36
  config.max_new_tokens = 1024
37
 
38
- # 修改 streamer 函數,使其 yield 結果
39
- def streamer():
40
- def cb(subword):
41
- yield subword
42
- return cb
43
 
44
- # 修改 generate_response 函數,使用 yield 傳回結果,並傳回tokenpersec
45
  def generate_response(prompt, model_name):
46
  global pipe, tokenizer, InUsed_model_name
47
  if(InUsed_model_name!=model_name):
@@ -55,17 +53,12 @@ def generate_response(prompt, model_name):
55
  InUsed_model_name=model_name
56
 
57
  try:
58
- response = ""
59
- callback = streamer()
60
- for token in pipe.generate([prompt], config, callback):
61
- response += token
62
- yield '', response
63
-
64
- tokenpersec = f'{pipe.last_metrics.get_throughput().mean:.2f}'
65
- yield tokenpersec, response # 傳回最終結果
66
-
67
  except Exception as e:
68
- yield "發生錯誤", f"生成回應時發生錯誤:{e}"
69
 
70
 
71
  # 建立 Gradio 介面
@@ -82,9 +75,8 @@ demo = gr.Interface(
82
  gr.Textbox(label="回應"),
83
  ],
84
  title="Qwen3 Model Inference",
85
- description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。",
86
- live=True # 啟用即時更新
87
  )
88
 
89
  if __name__ == "__main__":
90
- demo.queue().launch()
 
1
+
2
  import huggingface_hub as hf_hub
3
  import time
4
  import openvino_genai as ov_genai
 
36
  config = ov_genai.GenerationConfig()
37
  config.max_new_tokens = 1024
38
 
39
+ def streamer(subword):
40
+ print(subword, end='', flush=True)
41
+ return False
 
 
42
 
 
43
  def generate_response(prompt, model_name):
44
  global pipe, tokenizer, InUsed_model_name
45
  if(InUsed_model_name!=model_name):
 
53
  InUsed_model_name=model_name
54
 
55
  try:
56
+ generated = pipe.generate([prompt], config, streamer)
57
+ tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
58
+ print(f"tokenpersec:{tokenpersec}\n")
59
+ return tokenpersec, generated
 
 
 
 
 
60
  except Exception as e:
61
+ return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
62
 
63
 
64
  # 建立 Gradio 介面
 
75
  gr.Textbox(label="回應"),
76
  ],
77
  title="Qwen3 Model Inference",
78
+ description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
 
79
  )
80
 
81
  if __name__ == "__main__":
82
+ demo.launch()