File size: 2,176 Bytes
4d871c7
0d30833
4d871c7
 
 
97e41b8
6453441
97e41b8
4d871c7
 
97e41b8
4d871c7
 
3c3f47e
97e41b8
0d30833
97e41b8
 
f906bd4
3920413
f906bd4
 
 
97e41b8
 
e69a5b4
f906bd4
b12dd94
e69a5b4
 
97e41b8
 
 
e69a5b4
c9ac48b
97e41b8
a9b4927
c9ac48b
 
 
 
a9b4927
0d30833
 
a9b4927
97e41b8
 
 
 
 
f48f0af
97e41b8
f48f0af
6453441
 
3ee81c0
 
 
a9b4927
97e41b8
4d871c7
 
0e6fea8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def respond(message):
    try:
        start_time = time.time()
        
        # 強化 Prompt 讓模型輸出更合理
        instruction = (
            "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
            "User: " + message + "\n"
            "Assistant: "
        )

        # Generate response with improved settings
        response = pipe(
            instruction,
            max_length=4096,  # 限制最大輸出長度,防止無限重複
            truncation=True,
            num_return_sequences=1,
            temperature=0.3,  # 保持一定創意但減少胡言亂語
            top_p=0.8,  # 增加答案多樣性,減少無意義重複
            repetition_penalty=1.5,  # 降低重複字詞的機率
        )
        generated_text = response[0]['generated_text'].strip()

        # 提取 "Assistant:" 之後的部分
        if "Assistant:" in generated_text:
            reply = generated_text.split("Assistant:")[-1].strip()
        else:
            reply = generated_text
        
        inference_time = time.time() - start_time
        print(f"Inference time: {inference_time:.4f} seconds")

        return [(message, reply)]
    
    except Exception as e:
        print(f"Error: {e}")
        return [(message, "Sorry, something went wrong. Please try again.")]

# Set up Gradio chat interface
with gr.Blocks() as demo:
    gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
    gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
    
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your Message")
    
    msg.submit(respond, msg, chatbot)

if __name__ == "__main__":
    demo.launch(share=True)