Spaces:

hsuwill000
/

DeepSeek-R1-Distill-Qwen-1.5B-openvino

Running

App Files Files Community

hsuwill000 commited on Feb 4

Commit

a9b4927

verified ·

1 Parent(s): 3ee81c0

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -36

app.py CHANGED Viewed

@@ -4,43 +4,29 @@ import psutil
 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer, pipeline
-# Load the model and tokenizer
 model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
-model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明確指定设备
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-# Create generation pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message):
-    # 檢查 CPU 使用率，間隔設定 0.1 秒取得近期數值
     cpu_usage = psutil.cpu_percent(interval=0.1)
     if cpu_usage > 50:
-        # CPU 使用率過高，返回 busy 訊息，不進行模型生成
-        busy_modal = """
-        <div style="
-            position: fixed; top: 0; left: 0; width: 100%; height: 100%;
-            background-color: rgba(0, 0, 0, 0.5);
-            display: flex; justify-content: center; align-items: center;
-            z-index: 9999;">
-            <div style="
-                background-color: white; padding: 20px; border-radius: 8px;
-                font-size: 20px; font-weight: bold;">
-                系統目前忙碌中，請稍候...
-            </div>
-        </div>
-        """
-        # 此處同時將 busy 資訊加入對話中（也可只顯示 modal，不更新聊天記錄）
-        return ([(message, "系統目前忙碌中，請稍候...")], busy_modal)
     else:
         start_time = time.time()
-        # 強化 Prompt，要求回答簡明且不重複
         instruction = (
             "請用簡單、準確的語言回答問題，避免冗長和重複內容。\n"
             "User: " + message + "\n"
             "Assistant: "
         )
-        # 生成回答
         response = pipe(
             instruction,
             max_length=200,  # 限制最大輸出長度
@@ -51,35 +37,30 @@ def respond(message):
             repetition_penalty=1.5,
         )
         generated_text = response[0]['generated_text'].strip()
-        # 從生成文本中提取 "Assistant:" 之後的部分
         if "Assistant:" in generated_text:
             reply = generated_text.split("Assistant:")[-1].strip()
         else:
             reply = generated_text
         inference_time = time.time() - start_time
         print(f"Inference time: {inference_time:.4f} seconds")
-        # 隱藏 modal（傳回空字串代表不顯示）
-        return ([(message, reply)], "")
-# 定義一個清空文字框的函數（如果需要額外控制，這裡可保留）
 def clear_textbox():
     return gr.update(value="")
-# Set up Gradio chat interface
 with gr.Blocks() as demo:
     gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
     gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Your Message")
-    # 新增一個 gr.HTML 元件用來顯示 modal，目前預設隱藏（值為空字串）
-    modal_html = gr.HTML(value="")
-    # 當使用者送出訊息時，同時更新聊天記錄和 modal 區塊
-    # 如果 CPU 過忙，respond() 會回傳 busy modal，否則 modal 區塊維持空字串
-    msg.submit(respond, inputs=msg, outputs=[chatbot, modal_html]).then(clear_textbox, None, msg)
 if __name__ == "__main__":
     demo.launch(share=True)

 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer, pipeline
+# 載入模型和 tokenizer
 model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
+model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明確指定設備
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+# 建立生成 pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message):
+    # 取得 CPU 使用率（0.1 秒內的平均值）
     cpu_usage = psutil.cpu_percent(interval=0.1)
     if cpu_usage > 50:
+        # CPU 使用率超過 50%，直接返回忙碌提示訊息
+        return [(message, "系統目前忙碌中，請稍候...")]
     else:
         start_time = time.time()
+        # 強化 prompt，要求回答簡明且不重複
         instruction = (
             "請用簡單、準確的語言回答問題，避免冗長和重複內容。\n"
             "User: " + message + "\n"
             "Assistant: "
         )
+        # 呼叫生成管道產生回答
         response = pipe(
             instruction,
             max_length=200,  # 限制最大輸出長度
             repetition_penalty=1.5,
         )
         generated_text = response[0]['generated_text'].strip()
+        # 提取 "Assistant:" 之後的部分
         if "Assistant:" in generated_text:
             reply = generated_text.split("Assistant:")[-1].strip()
         else:
             reply = generated_text
         inference_time = time.time() - start_time
         print(f"Inference time: {inference_time:.4f} seconds")
+        return [(message, reply)]
+# 定義清空輸入框的函數
 def clear_textbox():
     return gr.update(value="")
+# 設定 Gradio 聊天介面
 with gr.Blocks() as demo:
     gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
     gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Your Message")
+    # 當使用者送出訊息時，先觸發 respond() 再清空輸入框
+    msg.submit(respond, inputs=msg, outputs=chatbot).then(clear_textbox, None, msg)
 if __name__ == "__main__":
     demo.launch(share=True)