hsuwill000 commited on
Commit
759aea4
·
verified ·
1 Parent(s): 07268e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -37
app.py CHANGED
@@ -5,53 +5,78 @@ import numpy as np
5
  import gradio as gr
6
  import re
7
 
8
- model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
9
- model_path = "Qwen3-0.6B-int4-ov"
 
 
 
 
 
10
 
11
- hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
 
 
 
 
 
12
 
 
 
 
 
 
 
 
13
 
14
- pipe = ov_genai.LLMPipeline(model_path, "CPU")
15
- pipe.start_chat() # 初始化聊天狀態
 
 
16
 
17
- def generate(prompt, history):
18
- """
19
- 與 LLM 互動,並使用 `yield` 串流輸出回應。
20
- """
21
- global pipe # 允許修改全域的 pipeline 物件
22
- full_response = ""
23
 
24
- def streamer(subword):
25
- nonlocal full_response # 允許修改封閉範圍的 full_response 變數
26
- full_response += subword
27
- yield full_response # 逐步產生回應
28
- return ov_genai.StreamingStatus.RUNNING
29
 
30
- # Gradio 會傳入 history,所以我們用它建立 prompts
31
- # 如果需要更複雜的prompt 處理,可以在這裡添加
32
- # 例如: 建立 system prompt, 用過去的對話建立完整的prompts
33
 
34
- for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
35
- yield value # Streamer 已經在yield full_response了,這裡只需要把streamer的產出再次yield出去
 
36
 
37
- # 結束生成後,可以添加一些邏輯,例如記錄對話或更新狀態
38
- # ...
 
 
 
39
 
40
- def on_close():
41
- global pipe
42
- pipe.finish_chat() # 在應用結束時清理pipeline
43
- print("Chat finished and pipeline closed.")
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  if __name__ == "__main__":
47
- demo = gr.ChatInterface(
48
- generate,
49
- chatbot=gr.Chatbot(height=300),
50
- textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
51
- title="LLM 串流輸出範例 (OpenVINO)",
52
- description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
53
- theme="soft",
54
- examples=["你好", "請自我介紹一下", "今天的氣候如何?"],
55
- )
56
- demo.close(on_close) # 添加在應用關閉時的清理函數
57
  demo.launch()
 
5
  import gradio as gr
6
  import re
7
 
8
+ # 下載模型
9
+ model_ids = [
10
+ "OpenVINO/Qwen3-0.6B-int4-ov",
11
+ "OpenVINO/Qwen3-0.6B-int8-ov",
12
+ "OpenVINO/Qwen3-1.7B-int4-ov",
13
+ "OpenVINO/Qwen3-1.7B-int8-ov"
14
+ ]
15
 
16
+ model_name_to_id = {
17
+ "Qwen3-0.6B-int4-ov": "OpenVINO/Qwen3-0.6B-int4-ov",
18
+ "Qwen3-0.6B-int8-ov": "OpenVINO/Qwen3-0.6B-int8-ov",
19
+ "Qwen3-1.7B-int4-ov": "OpenVINO/Qwen3-1.7B-int4-ov",
20
+ "Qwen3-1.7B-int8-ov": "OpenVINO/Qwen3-1.7B-int8-ov"
21
+ }
22
 
23
+ for model_id in model_ids:
24
+ model_path = model_id.split("/")[-1] # Extract model name
25
+ try:
26
+ hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
27
+ print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
28
+ except Exception as e:
29
+ print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
30
 
31
+ # 建立推理管線 (Initialize with a default model first)
32
+ device = "CPU"
33
+ default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
34
+ model_path = model_name_to_id[default_model_name].split("/")[-1]
35
 
36
+ pipe = ov_genai.LLMPipeline(model_path, device)
37
+ tokenizer = pipe.get_tokenizer()
38
+ tokenizer.set_chat_template(tokenizer.chat_template)
 
 
 
39
 
 
 
 
 
 
40
 
41
+ def generate_response(prompt, model_name):
42
+ global pipe, tokenizer # Access the global variables
 
43
 
44
+ # Check if the model needs to be changed
45
+ model_id = model_name_to_id[model_name]
46
+ new_model_path = model_id.split("/")[-1]
47
 
48
+ if pipe.model_name != new_model_path: # Assuming the LLMPipeline has a model_name property
49
+ print(f"Switching to model: {model_name}")
50
+ pipe = ov_genai.LLMPipeline(new_model_path, device)
51
+ tokenizer = pipe.get_tokenizer()
52
+ tokenizer.set_chat_template(tokenizer.chat_template)
53
 
 
 
 
 
54
 
55
+ try:
56
+ generated = pipe.generate([prompt], max_length=1024)
57
+ tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
58
+
59
+ return tokenpersec, generated
60
+ except Exception as e:
61
+ return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
62
+
63
+
64
+ # 建立 Gradio 介面
65
+ model_choices = list(model_name_to_id.keys())
66
+
67
+ demo = gr.Interface(
68
+ fn=generate_response,
69
+ inputs=[
70
+ gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
71
+ gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
72
+ ],
73
+ outputs=[
74
+ gr.Textbox(label="tokens/sec"),
75
+ gr.Textbox(label="回應")
76
+ ],
77
+ title="Qwen3 Model Inference",
78
+ description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
79
+ )
80
 
81
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
82
  demo.launch()