Spaces:

hsuwill000
/

DeepSeek-R1-Distill-Qwen-1.5B-openvino

Paused

App Files Files Community

hsuwill000 commited on Feb 4

Commit

93c8729

verified ·

1 Parent(s): f906bd4

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -6

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from transformers import AutoTokenizer, pipeline
 # Load the model and tokenizer
 model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
-model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明确指定设备
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Create generation pipeline
@@ -25,12 +25,12 @@ def respond(message):
         # Generate response with improved settings
         response = pipe(
             instruction,
-            max_length=200,  # 限制最大輸出長度，防止無限重複
             truncation=True,
             num_return_sequences=1,
-            temperature=0.3,  # 保持一定創意但減少胡言亂語
-            top_p=0.8,  # 增加答案多樣性，減少無意義重複
-            repetition_penalty=1.5,  # 降低重複字詞的機率
         )
         generated_text = response[0]['generated_text'].strip()
@@ -43,12 +43,17 @@ def respond(message):
         inference_time = time.time() - start_time
         print(f"Inference time: {inference_time:.4f} seconds")
         return [(message, reply)]
     except Exception as e:
         print(f"Error: {e}")
         return [(message, "Sorry, something went wrong. Please try again.")]
 # Set up Gradio chat interface
 with gr.Blocks() as demo:
     gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
@@ -57,7 +62,8 @@ with gr.Blocks() as demo:
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Your Message")
-    msg.submit(respond, msg, chatbot)
 if __name__ == "__main__":
     demo.launch(share=True)

 # Load the model and tokenizer
 model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
+model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明確指定设备
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Create generation pipeline
         # Generate response with improved settings
         response = pipe(
             instruction,
+            max_length=200,  # 限制最大輸出長度
             truncation=True,
             num_return_sequences=1,
+            temperature=0.3,
+            top_p=0.8,
+            repetition_penalty=1.5,
         )
         generated_text = response[0]['generated_text'].strip()
         inference_time = time.time() - start_time
         print(f"Inference time: {inference_time:.4f} seconds")
+        # 返回對話記錄更新結果 (以 tuple 的方式累加歷史訊息)
         return [(message, reply)]
     except Exception as e:
         print(f"Error: {e}")
         return [(message, "Sorry, something went wrong. Please try again.")]
+# 定義一個清空文字框的函數
+def clear_textbox():
+    return gr.update(value="")
 # Set up Gradio chat interface
 with gr.Blocks() as demo:
     gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Your Message")
+    # submit 完成後，使用 .then() 來清空輸入框
+    msg.submit(respond, inputs=msg, outputs=chatbot).then(clear_textbox, None, msg)
 if __name__ == "__main__":
     demo.launch(share=True)