Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,53 +5,78 @@ import numpy as np
|
|
5 |
import gradio as gr
|
6 |
import re
|
7 |
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
"""
|
21 |
-
global pipe # 允許修改全域的 pipeline 物件
|
22 |
-
full_response = ""
|
23 |
|
24 |
-
def streamer(subword):
|
25 |
-
nonlocal full_response # 允許修改封閉範圍的 full_response 變數
|
26 |
-
full_response += subword
|
27 |
-
yield full_response # 逐步產生回應
|
28 |
-
return ov_genai.StreamingStatus.RUNNING
|
29 |
|
30 |
-
|
31 |
-
#
|
32 |
-
# 例如: 建立 system prompt, 用過去的對話建立完整的prompts
|
33 |
|
34 |
-
|
35 |
-
|
|
|
36 |
|
37 |
-
#
|
38 |
-
|
|
|
|
|
|
|
39 |
|
40 |
-
def on_close():
|
41 |
-
global pipe
|
42 |
-
pipe.finish_chat() # 在應用結束時清理pipeline
|
43 |
-
print("Chat finished and pipeline closed.")
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
if __name__ == "__main__":
|
47 |
-
demo = gr.ChatInterface(
|
48 |
-
generate,
|
49 |
-
chatbot=gr.Chatbot(height=300),
|
50 |
-
textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
|
51 |
-
title="LLM 串流輸出範例 (OpenVINO)",
|
52 |
-
description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
|
53 |
-
theme="soft",
|
54 |
-
examples=["你好", "請自我介紹一下", "今天的氣候如何?"],
|
55 |
-
)
|
56 |
-
demo.close(on_close) # 添加在應用關閉時的清理函數
|
57 |
demo.launch()
|
|
|
5 |
import gradio as gr
|
6 |
import re
|
7 |
|
8 |
+
# 下載模型
|
9 |
+
model_ids = [
|
10 |
+
"OpenVINO/Qwen3-0.6B-int4-ov",
|
11 |
+
"OpenVINO/Qwen3-0.6B-int8-ov",
|
12 |
+
"OpenVINO/Qwen3-1.7B-int4-ov",
|
13 |
+
"OpenVINO/Qwen3-1.7B-int8-ov"
|
14 |
+
]
|
15 |
|
16 |
+
model_name_to_id = {
|
17 |
+
"Qwen3-0.6B-int4-ov": "OpenVINO/Qwen3-0.6B-int4-ov",
|
18 |
+
"Qwen3-0.6B-int8-ov": "OpenVINO/Qwen3-0.6B-int8-ov",
|
19 |
+
"Qwen3-1.7B-int4-ov": "OpenVINO/Qwen3-1.7B-int4-ov",
|
20 |
+
"Qwen3-1.7B-int8-ov": "OpenVINO/Qwen3-1.7B-int8-ov"
|
21 |
+
}
|
22 |
|
23 |
+
for model_id in model_ids:
|
24 |
+
model_path = model_id.split("/")[-1] # Extract model name
|
25 |
+
try:
|
26 |
+
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
|
27 |
+
print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
|
28 |
+
except Exception as e:
|
29 |
+
print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
|
30 |
|
31 |
+
# 建立推理管線 (Initialize with a default model first)
|
32 |
+
device = "CPU"
|
33 |
+
default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
|
34 |
+
model_path = model_name_to_id[default_model_name].split("/")[-1]
|
35 |
|
36 |
+
pipe = ov_genai.LLMPipeline(model_path, device)
|
37 |
+
tokenizer = pipe.get_tokenizer()
|
38 |
+
tokenizer.set_chat_template(tokenizer.chat_template)
|
|
|
|
|
|
|
39 |
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
def generate_response(prompt, model_name):
|
42 |
+
global pipe, tokenizer # Access the global variables
|
|
|
43 |
|
44 |
+
# Check if the model needs to be changed
|
45 |
+
model_id = model_name_to_id[model_name]
|
46 |
+
new_model_path = model_id.split("/")[-1]
|
47 |
|
48 |
+
if pipe.model_name != new_model_path: # Assuming the LLMPipeline has a model_name property
|
49 |
+
print(f"Switching to model: {model_name}")
|
50 |
+
pipe = ov_genai.LLMPipeline(new_model_path, device)
|
51 |
+
tokenizer = pipe.get_tokenizer()
|
52 |
+
tokenizer.set_chat_template(tokenizer.chat_template)
|
53 |
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
try:
|
56 |
+
generated = pipe.generate([prompt], max_length=1024)
|
57 |
+
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
58 |
+
|
59 |
+
return tokenpersec, generated
|
60 |
+
except Exception as e:
|
61 |
+
return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
|
62 |
+
|
63 |
+
|
64 |
+
# 建立 Gradio 介面
|
65 |
+
model_choices = list(model_name_to_id.keys())
|
66 |
+
|
67 |
+
demo = gr.Interface(
|
68 |
+
fn=generate_response,
|
69 |
+
inputs=[
|
70 |
+
gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
|
71 |
+
gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
|
72 |
+
],
|
73 |
+
outputs=[
|
74 |
+
gr.Textbox(label="tokens/sec"),
|
75 |
+
gr.Textbox(label="回應")
|
76 |
+
],
|
77 |
+
title="Qwen3 Model Inference",
|
78 |
+
description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
|
79 |
+
)
|
80 |
|
81 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
demo.launch()
|