hsuwill000 commited on
Commit
1dd12ee
·
verified ·
1 Parent(s): 4b02b47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -35
app.py CHANGED
@@ -1,45 +1,55 @@
1
-
2
- import huggingface_hub as hf_hub
3
- import time
4
- import openvino_genai as ov_genai
5
- import numpy as np
6
  import gradio as gr
7
- import re
 
8
 
9
- # 下載模型
10
  model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
11
  model_path = "Qwen3-0.6B-int4-ov"
12
 
13
  hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
14
 
15
- # 建立推理管線
16
- device = "CPU"
17
- pipe = ov_genai.LLMPipeline(model_path, device)
18
- tokenizer = pipe.get_tokenizer()
19
- tokenizer.set_chat_template(tokenizer.chat_template)
20
-
21
-
22
- def generate_response(prompt):
23
- try:
24
- generated = pipe.generate([prompt], max_length=1024)
25
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
26
-
27
- return tokenpersec, generated
28
- except Exception as e:
29
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
30
-
31
-
32
- # 建立 Gradio 介面
33
- demo = gr.Interface(
34
- fn=generate_response,
35
- inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
36
- outputs=[
37
- gr.Textbox(label="tokens/sec"),
38
- gr.Textbox(label="回應")
39
- ],
40
- title="Qwen3-0.6B-int4-ov ",
41
- description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。"
42
- )
 
 
 
 
 
43
 
44
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
45
  demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import openvino_genai as ov_genai
3
+
4
 
 
5
  model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
6
  model_path = "Qwen3-0.6B-int4-ov"
7
 
8
  hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
9
 
10
+
11
+ model_path = "YOUR_MODEL_PATH" # 替換成你的模型路徑
12
+ pipe = ov_genai.LLMPipeline(model_path, "CPU")
13
+ pipe.start_chat() # 初始化聊天狀態
14
+
15
+ def generate(prompt, history):
16
+ """
17
+ LLM 互動,並使用 `yield` 串流輸出回應。
18
+ """
19
+ global pipe # 允許修改全域的 pipeline 物件
20
+ full_response = ""
21
+
22
+ def streamer(subword):
23
+ nonlocal full_response # 允許修改封閉範圍的 full_response 變數
24
+ full_response += subword
25
+ yield full_response # 逐步產生回應
26
+ return ov_genai.StreamingStatus.RUNNING
27
+
28
+ # Gradio 會傳入 history,所以我們用它建立 prompts
29
+ # 如果需要更複雜的prompt 處理,可以在這裡添加
30
+ # 例如: 建立 system prompt, 用過去的對話建立完整的prompts
31
+
32
+ for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
33
+ yield value # Streamer 已經在yield full_response了,這裡只需要把streamer的產出再次yield出去
34
+
35
+ # 結束生成後,可以添加一些邏輯,例如記錄對話或更新狀態
36
+ # ...
37
+
38
+ def on_close():
39
+ global pipe
40
+ pipe.finish_chat() # 在應用結束時清理pipeline
41
+ print("Chat finished and pipeline closed.")
42
+
43
 
44
  if __name__ == "__main__":
45
+ demo = gr.ChatInterface(
46
+ generate,
47
+ chatbot=gr.Chatbot(height=300),
48
+ textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
49
+ title="LLM 串流輸出範例 (OpenVINO)",
50
+ description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
51
+ theme="soft",
52
+ examples=["你好", "請自我介紹一下", "今天的氣候如何?"],
53
+ )
54
+ demo.close(on_close) # 添加在應用關閉時的清理函數
55
  demo.launch()