File size: 2,104 Bytes
4d871c7
8b4afb4
4d871c7
 
 
8b4afb4
6453441
7160766
8b4afb4
7160766
7f4a0a3
 
 
4d871c7
7160766
 
8b4afb4
a7464e5
7160766
7764a77
8b4afb4
 
 
 
 
7160766
 
 
 
 
 
 
8b4afb4
 
a7464e5
6d5d9fc
 
 
7160766
 
 
 
8b4afb4
 
 
7160766
7764a77
49a4759
246dff9
7160766
76da388
7160766
8b4afb4
7160766
 
6d5d9fc
 
7160766
4d871c7
 
7160766
96fef83
f7b39eb
d4c56cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from huggingface_hub import InferenceClient
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# 載入模型和標記器
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True,)



def respond(prompt, history):
    # 構建聊天模板
    messages = [
        {"role": "system", "content": "使用中文。"},
        {"role": "user", "content": prompt}
    ]    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    print("Chat template text:", text)

    # 將文本轉換為模型輸入
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    print("Model inputs:", model_inputs)

    # 生成回應
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=4096,
        temperature=0.7,  # 降低隨機性
        top_p=0.9,        # 限制生成的多樣性
        do_sample=True    # 啟用採樣
    )
    print("Generated IDs:", generated_ids)

    # 解碼生成的 token IDs
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print("Decoded response:", response)
    # **去除 `<think>` 及其他無用內容**
    response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
    # 返回回應
    return response

# 設定 Gradio 的聊天界面
demo = gr.ChatInterface(
    fn=respond,
    title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
    description="DeepSeek-R1-Distill-Qwen-1.5B-openvino"
)

if __name__ == "__main__":
    print("Launching Gradio app...")
    #demo.launch(server_name="0.0.0.0", server_port=7860)
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)