hsuwill000 commited on
Commit
2574109
·
verified ·
1 Parent(s): 6587c04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -19,26 +19,46 @@ tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
21
  def generate_response(prompt):
 
 
 
 
 
 
 
 
 
22
  try:
23
- generated = pipe.generate([prompt], max_length=1024)
 
 
 
 
 
 
 
 
 
24
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
25
 
26
- return tokenpersec, generated
 
27
  except Exception as e:
28
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
29
-
30
 
31
  # 建立 Gradio 介面
32
  demo = gr.Interface(
33
  fn=generate_response,
34
- inputs=gr.Textbox(lines=1, label="輸入提示 (Prompt)"),
35
  outputs=[
36
  gr.Textbox(label="tokens/sec"),
37
- gr.Textbox(label="回應")
38
  ],
39
  title="Qwen3-0.6B-int4-ov ",
40
- description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。"
41
  )
42
 
43
  if __name__ == "__main__":
 
44
  demo.launch()
 
19
 
20
 
21
  def generate_response(prompt):
22
+ full_response = ""
23
+ tokenpersec = "" # 初始化
24
+
25
+ def streamer(subword):
26
+ nonlocal full_response
27
+ nonlocal tokenpersec # 声明 nonlocal
28
+ full_response += subword
29
+ yield tokenpersec, full_response # 使用 yield 逐步更新
30
+
31
  try:
32
+ pipe.start_chat()
33
+ generator = pipe.generate(prompt, streamer=streamer, max_new_tokens=1024) # 建立生成器
34
+
35
+ # 迭代生成器,產生流式更新
36
+ for tokenpersec, response_chunk in generator:
37
+ yield tokenpersec, response_chunk # 產生中間更新
38
+
39
+ pipe.finish_chat()
40
+
41
+ generated = pipe.generate([prompt], max_length=1024) # 為了得到 perf_metrics
42
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
43
 
44
+ yield tokenpersec, full_response # 產生最終完整更新
45
+
46
  except Exception as e:
47
+ yield "發生錯誤", f"生成回應時發生錯誤:{e}"
48
+
49
 
50
  # 建立 Gradio 介面
51
  demo = gr.Interface(
52
  fn=generate_response,
53
+ inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
54
  outputs=[
55
  gr.Textbox(label="tokens/sec"),
56
+ gr.Markdown(label="回應") # 使用 Markdown
57
  ],
58
  title="Qwen3-0.6B-int4-ov ",
59
+ description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。 使用Markdown顯示"
60
  )
61
 
62
  if __name__ == "__main__":
63
+ demo.queue()
64
  demo.launch()