File size: 2,175 Bytes
4d871c7
0d30833
4d871c7
 
 
68d71c5
6453441
e69a5b4
4d871c7
 
68d71c5
4d871c7
 
3c3f47e
e69a5b4
0d30833
 
f906bd4
 
 
 
 
 
 
 
e69a5b4
f906bd4
 
e69a5b4
 
f906bd4
 
 
e69a5b4
c9ac48b
f906bd4
 
c9ac48b
 
 
 
e69a5b4
0d30833
 
f906bd4
0e6fea8
4d871c7
e69a5b4
 
0e6fea8
f48f0af
68d71c5
f48f0af
6453441
 
f48f0af
 
 
 
3c3f47e
4d871c7
 
0e6fea8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def respond(message):
    try:
        start_time = time.time()
        
        # 強化 Prompt 讓模型輸出更合理
        instruction = (
            "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
            "User: " + message + "\n"
            "Assistant: "
        )

        # Generate response with improved settings
        response = pipe(
            instruction,
            max_length=200,  # 限制最大輸出長度,防止無限重複
            truncation=True,
            num_return_sequences=1,
            temperature=0.3,  # 保持一定創意但減少胡言亂語
            top_p=0.8,  # 增加答案多樣性,減少無意義重複
            repetition_penalty=1.5,  # 降低重複字詞的機率
        )
        generated_text = response[0]['generated_text'].strip()

        # 提取 "Assistant:" 之後的部分
        if "Assistant:" in generated_text:
            reply = generated_text.split("Assistant:")[-1].strip()
        else:
            reply = generated_text
        
        inference_time = time.time() - start_time
        print(f"Inference time: {inference_time:.4f} seconds")

        return [(message, reply)]
    
    except Exception as e:
        print(f"Error: {e}")
        return [(message, "Sorry, something went wrong. Please try again.")]

# Set up Gradio chat interface
with gr.Blocks() as demo:
    gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
    gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
    
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your Message")
    
    msg.submit(respond, msg, chatbot)

if __name__ == "__main__":
    demo.launch(share=True)