hsuwill000 commited on
Commit
d140707
·
verified ·
1 Parent(s): f372999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -26
app.py CHANGED
@@ -1,6 +1,10 @@
 
1
  import huggingface_hub as hf_hub
 
2
  import openvino_genai as ov_genai
 
3
  import gradio as gr
 
4
 
5
  # 下載模型
6
  model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
@@ -14,41 +18,28 @@ pipe = ov_genai.LLMPipeline(model_path, device)
14
  tokenizer = pipe.get_tokenizer()
15
  tokenizer.set_chat_template(tokenizer.chat_template)
16
 
17
- # 完整流式處理的 generate_response 函數
18
  def generate_response(prompt):
19
- response = ""
20
  try:
21
- # 定義流式處理的回調函數
22
- def streamer(subword):
23
- nonlocal response
24
- response += subword # 拼接實時輸出的內容
25
- yield response # 每次生成一部分內容即時返回給 Gradio
26
- return ov_genai.StreamingStatus.RUNNING
27
-
28
- # 啟動流式生成
29
- pipe.start_chat()
30
- generated = pipe.generate([prompt], streamer=streamer, max_length=1024)
31
- pipe.finish_chat()
32
-
33
- # 最後返回完整輸出與性能數據
34
- token_per_sec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
35
- yield f"生成完成:每秒生成 {token_per_sec} tokens。\n\n最終回應:{response}"
36
 
 
37
  except Exception as e:
38
- # 捕獲錯誤並即時返回錯誤訊息
39
- yield f"生成過程中發生錯誤:{e}"
40
 
41
- # 使用 Gradio 流式介面
42
  demo = gr.Interface(
43
- fn=generate_response, # 流式處理函數
44
  inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
45
  outputs=[
46
- gr.Textbox(label="流式處理的回應") # 輸出將逐步更新,顯示即時生成內容
 
47
  ],
48
- title="Qwen3-0.6B-int4-ov 流式處理",
49
- description="基於 Qwen3-0.6B-int4-ov 推理應用,支援實時輸出到 Gradio 介面。"
50
  )
51
 
52
- # 啟動 Gradio 服務
53
  if __name__ == "__main__":
54
- demo.queue().launch()
 
1
+
2
  import huggingface_hub as hf_hub
3
+ import time
4
  import openvino_genai as ov_genai
5
+ import numpy as np
6
  import gradio as gr
7
+ import re
8
 
9
  # 下載模型
10
  model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
 
18
  tokenizer = pipe.get_tokenizer()
19
  tokenizer.set_chat_template(tokenizer.chat_template)
20
 
21
+
22
  def generate_response(prompt):
 
23
  try:
24
+ generated = pipe.generate([prompt], max_length=1024)
25
+ tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ return tokenpersec, generated
28
  except Exception as e:
29
+ return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
30
+
31
 
32
+ # 建立 Gradio 介面
33
  demo = gr.Interface(
34
+ fn=generate_response,
35
  inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
36
  outputs=[
37
+ gr.Textbox(label="tokens/sec"),
38
+ gr.Textbox(label="回應")
39
  ],
40
+ title="Qwen3-0.6B-int4-ov ",
41
+ description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。"
42
  )
43
 
 
44
  if __name__ == "__main__":
45
+ demo.launch()