Update app.py
Browse files
app.py
CHANGED
@@ -1,42 +1,39 @@
|
|
1 |
import gradio as gr
|
2 |
import time
|
3 |
-
import psutil
|
4 |
from optimum.intel import OVModelForCausalLM
|
5 |
from transformers import AutoTokenizer, pipeline
|
6 |
|
7 |
-
#
|
8 |
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
|
9 |
-
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") #
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
11 |
|
12 |
-
#
|
13 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
14 |
|
15 |
def respond(message):
|
16 |
-
|
17 |
-
cpu_usage = psutil.cpu_percent(interval=0.1)
|
18 |
-
if cpu_usage > 80:
|
19 |
-
# CPU 使用率超過 50%,直接返回忙碌提示訊息
|
20 |
-
return [(message, "系統目前忙碌中,請稍候...")]
|
21 |
-
else:
|
22 |
start_time = time.time()
|
23 |
-
|
|
|
24 |
instruction = (
|
25 |
"請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
|
26 |
"User: " + message + "\n"
|
27 |
"Assistant: "
|
28 |
)
|
29 |
-
|
|
|
30 |
response = pipe(
|
31 |
instruction,
|
32 |
-
max_length=
|
33 |
truncation=True,
|
34 |
num_return_sequences=1,
|
35 |
-
temperature=0.3,
|
36 |
-
top_p=0.8,
|
37 |
-
repetition_penalty=1.5,
|
38 |
)
|
39 |
generated_text = response[0]['generated_text'].strip()
|
|
|
40 |
# 提取 "Assistant:" 之後的部分
|
41 |
if "Assistant:" in generated_text:
|
42 |
reply = generated_text.split("Assistant:")[-1].strip()
|
@@ -45,13 +42,14 @@ def respond(message):
|
|
45 |
|
46 |
inference_time = time.time() - start_time
|
47 |
print(f"Inference time: {inference_time:.4f} seconds")
|
48 |
-
return [(message, reply)]
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
53 |
|
54 |
-
#
|
55 |
with gr.Blocks() as demo:
|
56 |
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
|
57 |
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
|
@@ -59,8 +57,7 @@ with gr.Blocks() as demo:
|
|
59 |
chatbot = gr.Chatbot()
|
60 |
msg = gr.Textbox(label="Your Message")
|
61 |
|
62 |
-
|
63 |
-
msg.submit(respond, inputs=msg, outputs=chatbot).then(clear_textbox, None, msg)
|
64 |
|
65 |
if __name__ == "__main__":
|
66 |
demo.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
import time
|
|
|
3 |
from optimum.intel import OVModelForCausalLM
|
4 |
from transformers import AutoTokenizer, pipeline
|
5 |
|
6 |
+
# Load the model and tokenizer
|
7 |
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
|
8 |
+
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
10 |
|
11 |
+
# Create generation pipeline
|
12 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
13 |
|
14 |
def respond(message):
|
15 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
16 |
start_time = time.time()
|
17 |
+
|
18 |
+
# 強化 Prompt 讓模型輸出更合理
|
19 |
instruction = (
|
20 |
"請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
|
21 |
"User: " + message + "\n"
|
22 |
"Assistant: "
|
23 |
)
|
24 |
+
|
25 |
+
# Generate response with improved settings
|
26 |
response = pipe(
|
27 |
instruction,
|
28 |
+
max_length=1024, # 限制最大輸出長度,防止無限重複
|
29 |
truncation=True,
|
30 |
num_return_sequences=1,
|
31 |
+
temperature=0.3, # 保持一定創意但減少胡言亂語
|
32 |
+
top_p=0.8, # 增加答案多樣性,減少無意義重複
|
33 |
+
repetition_penalty=1.5, # 降低重複字詞的機率
|
34 |
)
|
35 |
generated_text = response[0]['generated_text'].strip()
|
36 |
+
|
37 |
# 提取 "Assistant:" 之後的部分
|
38 |
if "Assistant:" in generated_text:
|
39 |
reply = generated_text.split("Assistant:")[-1].strip()
|
|
|
42 |
|
43 |
inference_time = time.time() - start_time
|
44 |
print(f"Inference time: {inference_time:.4f} seconds")
|
|
|
45 |
|
46 |
+
return [(message, reply)]
|
47 |
+
|
48 |
+
except Exception as e:
|
49 |
+
print(f"Error: {e}")
|
50 |
+
return [(message, "Sorry, something went wrong. Please try again.")]
|
51 |
|
52 |
+
# Set up Gradio chat interface
|
53 |
with gr.Blocks() as demo:
|
54 |
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
|
55 |
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
|
|
|
57 |
chatbot = gr.Chatbot()
|
58 |
msg = gr.Textbox(label="Your Message")
|
59 |
|
60 |
+
msg.submit(respond, msg, chatbot)
|
|
|
61 |
|
62 |
if __name__ == "__main__":
|
63 |
demo.launch(share=True)
|