Update app.py
Browse files
app.py
CHANGED
@@ -4,43 +4,29 @@ import psutil
|
|
4 |
from optimum.intel import OVModelForCausalLM
|
5 |
from transformers import AutoTokenizer, pipeline
|
6 |
|
7 |
-
#
|
8 |
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
|
9 |
-
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") #
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
|
12 |
-
#
|
13 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
14 |
|
15 |
def respond(message):
|
16 |
-
#
|
17 |
cpu_usage = psutil.cpu_percent(interval=0.1)
|
18 |
if cpu_usage > 50:
|
19 |
-
# CPU
|
20 |
-
|
21 |
-
<div style="
|
22 |
-
position: fixed; top: 0; left: 0; width: 100%; height: 100%;
|
23 |
-
background-color: rgba(0, 0, 0, 0.5);
|
24 |
-
display: flex; justify-content: center; align-items: center;
|
25 |
-
z-index: 9999;">
|
26 |
-
<div style="
|
27 |
-
background-color: white; padding: 20px; border-radius: 8px;
|
28 |
-
font-size: 20px; font-weight: bold;">
|
29 |
-
系統目前忙碌中,請稍候...
|
30 |
-
</div>
|
31 |
-
</div>
|
32 |
-
"""
|
33 |
-
# 此處同時將 busy 資訊加入對話中(也可只顯示 modal,不更新聊天記錄)
|
34 |
-
return ([(message, "系統目前忙碌中,請稍候...")], busy_modal)
|
35 |
else:
|
36 |
start_time = time.time()
|
37 |
-
# 強化
|
38 |
instruction = (
|
39 |
"請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
|
40 |
"User: " + message + "\n"
|
41 |
"Assistant: "
|
42 |
)
|
43 |
-
#
|
44 |
response = pipe(
|
45 |
instruction,
|
46 |
max_length=200, # 限制最大輸出長度
|
@@ -51,35 +37,30 @@ def respond(message):
|
|
51 |
repetition_penalty=1.5,
|
52 |
)
|
53 |
generated_text = response[0]['generated_text'].strip()
|
54 |
-
|
55 |
-
# 從生成文本中提取 "Assistant:" 之後的部分
|
56 |
if "Assistant:" in generated_text:
|
57 |
reply = generated_text.split("Assistant:")[-1].strip()
|
58 |
else:
|
59 |
reply = generated_text
|
60 |
-
|
61 |
inference_time = time.time() - start_time
|
62 |
print(f"Inference time: {inference_time:.4f} seconds")
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
# 定義一個清空文字框的函數(如果需要額外控制,這裡可保留)
|
67 |
def clear_textbox():
|
68 |
return gr.update(value="")
|
69 |
|
70 |
-
#
|
71 |
with gr.Blocks() as demo:
|
72 |
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
|
73 |
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
|
74 |
|
75 |
chatbot = gr.Chatbot()
|
76 |
msg = gr.Textbox(label="Your Message")
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
# 當使用者送出訊息時,同時更新聊天記錄和 modal 區塊
|
81 |
-
# 如果 CPU 過忙,respond() 會回傳 busy modal,否則 modal 區塊維持空字串
|
82 |
-
msg.submit(respond, inputs=msg, outputs=[chatbot, modal_html]).then(clear_textbox, None, msg)
|
83 |
|
84 |
if __name__ == "__main__":
|
85 |
demo.launch(share=True)
|
|
|
4 |
from optimum.intel import OVModelForCausalLM
|
5 |
from transformers import AutoTokenizer, pipeline
|
6 |
|
7 |
+
# 載入模型和 tokenizer
|
8 |
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
|
9 |
+
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明確指定設備
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
|
12 |
+
# 建立生成 pipeline
|
13 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
14 |
|
15 |
def respond(message):
|
16 |
+
# 取得 CPU 使用率(0.1 秒內的平均值)
|
17 |
cpu_usage = psutil.cpu_percent(interval=0.1)
|
18 |
if cpu_usage > 50:
|
19 |
+
# CPU 使用率超過 50%,直接返回忙碌提示訊息
|
20 |
+
return [(message, "系統目前忙碌中,請稍候...")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
else:
|
22 |
start_time = time.time()
|
23 |
+
# 強化 prompt,要求回答簡明且不重複
|
24 |
instruction = (
|
25 |
"請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
|
26 |
"User: " + message + "\n"
|
27 |
"Assistant: "
|
28 |
)
|
29 |
+
# 呼叫生成管道產生回答
|
30 |
response = pipe(
|
31 |
instruction,
|
32 |
max_length=200, # 限制最大輸出長度
|
|
|
37 |
repetition_penalty=1.5,
|
38 |
)
|
39 |
generated_text = response[0]['generated_text'].strip()
|
40 |
+
# 提取 "Assistant:" 之後的部分
|
|
|
41 |
if "Assistant:" in generated_text:
|
42 |
reply = generated_text.split("Assistant:")[-1].strip()
|
43 |
else:
|
44 |
reply = generated_text
|
45 |
+
|
46 |
inference_time = time.time() - start_time
|
47 |
print(f"Inference time: {inference_time:.4f} seconds")
|
48 |
+
return [(message, reply)]
|
49 |
+
|
50 |
+
# 定義清空輸入框的函數
|
|
|
51 |
def clear_textbox():
|
52 |
return gr.update(value="")
|
53 |
|
54 |
+
# 設定 Gradio 聊天介面
|
55 |
with gr.Blocks() as demo:
|
56 |
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
|
57 |
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
|
58 |
|
59 |
chatbot = gr.Chatbot()
|
60 |
msg = gr.Textbox(label="Your Message")
|
61 |
+
|
62 |
+
# 當使用者送出訊息時,先觸發 respond() 再清空輸入框
|
63 |
+
msg.submit(respond, inputs=msg, outputs=chatbot).then(clear_textbox, None, msg)
|
|
|
|
|
|
|
64 |
|
65 |
if __name__ == "__main__":
|
66 |
demo.launch(share=True)
|