hsuwill000 commited on
Commit
97e41b8
·
verified ·
1 Parent(s): 91a184a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -23
app.py CHANGED
@@ -1,42 +1,39 @@
1
  import gradio as gr
2
  import time
3
- import psutil
4
  from optimum.intel import OVModelForCausalLM
5
  from transformers import AutoTokenizer, pipeline
6
 
7
- # 載入模型和 tokenizer
8
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
9
- model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明確指定設備
10
  tokenizer = AutoTokenizer.from_pretrained(model_id)
11
 
12
- # 建立生成 pipeline
13
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
14
 
15
  def respond(message):
16
- # 取得 CPU 使用率(0.1 秒內的平均值)
17
- cpu_usage = psutil.cpu_percent(interval=0.1)
18
- if cpu_usage > 80:
19
- # CPU 使用率超過 50%,直接返回忙碌提示訊息
20
- return [(message, "系統目前忙碌中,請稍候...")]
21
- else:
22
  start_time = time.time()
23
- # 強化 prompt,要求回答簡明且不重複
 
24
  instruction = (
25
  "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
26
  "User: " + message + "\n"
27
  "Assistant: "
28
  )
29
- # 呼叫生成管道產生回答
 
30
  response = pipe(
31
  instruction,
32
- max_length=200, # 限制最大輸出長度
33
  truncation=True,
34
  num_return_sequences=1,
35
- temperature=0.3,
36
- top_p=0.8,
37
- repetition_penalty=1.5,
38
  )
39
  generated_text = response[0]['generated_text'].strip()
 
40
  # 提取 "Assistant:" 之後的部分
41
  if "Assistant:" in generated_text:
42
  reply = generated_text.split("Assistant:")[-1].strip()
@@ -45,13 +42,14 @@ def respond(message):
45
 
46
  inference_time = time.time() - start_time
47
  print(f"Inference time: {inference_time:.4f} seconds")
48
- return [(message, reply)]
49
 
50
- # 定義清空輸入框的函數
51
- def clear_textbox():
52
- return gr.update(value="")
 
 
53
 
54
- # 設定 Gradio 聊天介面
55
  with gr.Blocks() as demo:
56
  gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
57
  gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
@@ -59,8 +57,7 @@ with gr.Blocks() as demo:
59
  chatbot = gr.Chatbot()
60
  msg = gr.Textbox(label="Your Message")
61
 
62
- # 當使用者送出訊息時,先觸發 respond() 再清空輸入框
63
- msg.submit(respond, inputs=msg, outputs=chatbot).then(clear_textbox, None, msg)
64
 
65
  if __name__ == "__main__":
66
  demo.launch(share=True)
 
1
  import gradio as gr
2
  import time
 
3
  from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer, pipeline
5
 
6
+ # Load the model and tokenizer
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
+ model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
11
+ # Create generation pipeline
12
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
  def respond(message):
15
+ try:
 
 
 
 
 
16
  start_time = time.time()
17
+
18
+ # 強化 Prompt 讓模型輸出更合理
19
  instruction = (
20
  "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
21
  "User: " + message + "\n"
22
  "Assistant: "
23
  )
24
+
25
+ # Generate response with improved settings
26
  response = pipe(
27
  instruction,
28
+ max_length=1024, # 限制最大輸出長度,防止無限重複
29
  truncation=True,
30
  num_return_sequences=1,
31
+ temperature=0.3, # 保持一定創意但減少胡言亂語
32
+ top_p=0.8, # 增加答案多樣性,減少無意義重複
33
+ repetition_penalty=1.5, # 降低重複字詞的機率
34
  )
35
  generated_text = response[0]['generated_text'].strip()
36
+
37
  # 提取 "Assistant:" 之後的部分
38
  if "Assistant:" in generated_text:
39
  reply = generated_text.split("Assistant:")[-1].strip()
 
42
 
43
  inference_time = time.time() - start_time
44
  print(f"Inference time: {inference_time:.4f} seconds")
 
45
 
46
+ return [(message, reply)]
47
+
48
+ except Exception as e:
49
+ print(f"Error: {e}")
50
+ return [(message, "Sorry, something went wrong. Please try again.")]
51
 
52
+ # Set up Gradio chat interface
53
  with gr.Blocks() as demo:
54
  gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
55
  gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
 
57
  chatbot = gr.Chatbot()
58
  msg = gr.Textbox(label="Your Message")
59
 
60
+ msg.submit(respond, msg, chatbot)
 
61
 
62
  if __name__ == "__main__":
63
  demo.launch(share=True)