hsuwill000 commited on
Commit
a9b4927
·
verified ·
1 Parent(s): 3ee81c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -36
app.py CHANGED
@@ -4,43 +4,29 @@ import psutil
4
  from optimum.intel import OVModelForCausalLM
5
  from transformers import AutoTokenizer, pipeline
6
 
7
- # Load the model and tokenizer
8
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
9
- model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明確指定设备
10
  tokenizer = AutoTokenizer.from_pretrained(model_id)
11
 
12
- # Create generation pipeline
13
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
14
 
15
  def respond(message):
16
- # 檢查 CPU 使用率,間隔設定 0.1 秒取得近期數值
17
  cpu_usage = psutil.cpu_percent(interval=0.1)
18
  if cpu_usage > 50:
19
- # CPU 使用率過高,返回 busy 訊息,不進行模型生成
20
- busy_modal = """
21
- <div style="
22
- position: fixed; top: 0; left: 0; width: 100%; height: 100%;
23
- background-color: rgba(0, 0, 0, 0.5);
24
- display: flex; justify-content: center; align-items: center;
25
- z-index: 9999;">
26
- <div style="
27
- background-color: white; padding: 20px; border-radius: 8px;
28
- font-size: 20px; font-weight: bold;">
29
- 系統目前忙碌中,請稍候...
30
- </div>
31
- </div>
32
- """
33
- # 此處同時將 busy 資訊加入對話中(也可只顯示 modal,不更新聊天記錄)
34
- return ([(message, "系統目前忙碌中,請稍候...")], busy_modal)
35
  else:
36
  start_time = time.time()
37
- # 強化 Prompt,要求回答簡明且不重複
38
  instruction = (
39
  "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
40
  "User: " + message + "\n"
41
  "Assistant: "
42
  )
43
- # 生成回答
44
  response = pipe(
45
  instruction,
46
  max_length=200, # 限制最大輸出長度
@@ -51,35 +37,30 @@ def respond(message):
51
  repetition_penalty=1.5,
52
  )
53
  generated_text = response[0]['generated_text'].strip()
54
-
55
- # 從生成文本中提取 "Assistant:" 之後的部分
56
  if "Assistant:" in generated_text:
57
  reply = generated_text.split("Assistant:")[-1].strip()
58
  else:
59
  reply = generated_text
60
-
61
  inference_time = time.time() - start_time
62
  print(f"Inference time: {inference_time:.4f} seconds")
63
- # 隱藏 modal(傳回空字串代表不顯示)
64
- return ([(message, reply)], "")
65
-
66
- # 定義一個清空文字框的函數(如果需要額外控制,這裡可保留)
67
  def clear_textbox():
68
  return gr.update(value="")
69
 
70
- # Set up Gradio chat interface
71
  with gr.Blocks() as demo:
72
  gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
73
  gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
74
 
75
  chatbot = gr.Chatbot()
76
  msg = gr.Textbox(label="Your Message")
77
- # 新增一個 gr.HTML 元件用來顯示 modal,目前預設隱藏(值為空字串)
78
- modal_html = gr.HTML(value="")
79
-
80
- # 當使用者送出訊息時,同時更新聊天記錄和 modal 區塊
81
- # 如果 CPU 過忙,respond() 會回傳 busy modal,否則 modal 區塊維持空字串
82
- msg.submit(respond, inputs=msg, outputs=[chatbot, modal_html]).then(clear_textbox, None, msg)
83
 
84
  if __name__ == "__main__":
85
  demo.launch(share=True)
 
4
  from optimum.intel import OVModelForCausalLM
5
  from transformers import AutoTokenizer, pipeline
6
 
7
+ # 載入模型和 tokenizer
8
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
9
+ model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明確指定設備
10
  tokenizer = AutoTokenizer.from_pretrained(model_id)
11
 
12
+ # 建立生成 pipeline
13
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
14
 
15
  def respond(message):
16
+ # 取得 CPU 使用率(0.1 秒內的平均值)
17
  cpu_usage = psutil.cpu_percent(interval=0.1)
18
  if cpu_usage > 50:
19
+ # CPU 使用率超過 50%,直接返回忙碌提示訊息
20
+ return [(message, "系統目前忙碌中,請稍候...")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  else:
22
  start_time = time.time()
23
+ # 強化 prompt,要求回答簡明且不重複
24
  instruction = (
25
  "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
26
  "User: " + message + "\n"
27
  "Assistant: "
28
  )
29
+ # 呼叫生成管道產生回答
30
  response = pipe(
31
  instruction,
32
  max_length=200, # 限制最大輸出長度
 
37
  repetition_penalty=1.5,
38
  )
39
  generated_text = response[0]['generated_text'].strip()
40
+ # 提取 "Assistant:" 之後的部分
 
41
  if "Assistant:" in generated_text:
42
  reply = generated_text.split("Assistant:")[-1].strip()
43
  else:
44
  reply = generated_text
45
+
46
  inference_time = time.time() - start_time
47
  print(f"Inference time: {inference_time:.4f} seconds")
48
+ return [(message, reply)]
49
+
50
+ # 定義清空輸入框的函數
 
51
  def clear_textbox():
52
  return gr.update(value="")
53
 
54
+ # 設定 Gradio 聊天介面
55
  with gr.Blocks() as demo:
56
  gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
57
  gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
58
 
59
  chatbot = gr.Chatbot()
60
  msg = gr.Textbox(label="Your Message")
61
+
62
+ # 當使用者送出訊息時,先觸發 respond() 再清空輸入框
63
+ msg.submit(respond, inputs=msg, outputs=chatbot).then(clear_textbox, None, msg)
 
 
 
64
 
65
  if __name__ == "__main__":
66
  demo.launch(share=True)