File size: 2,114 Bytes
4d871c7
8b4afb4
4d871c7
 
 
8b4afb4
6453441
7160766
8b4afb4
7160766
7f4a0a3
 
 
4d871c7
7160766
 
8b4afb4
397bd8a
7160766
7764a77
8b4afb4
 
 
 
 
7160766
 
 
 
 
 
 
8b4afb4
 
6d5d9fc
 
 
 
7160766
 
 
 
8b4afb4
 
 
7160766
7764a77
49a4759
246dff9
7160766
76da388
7160766
8b4afb4
7160766
 
6d5d9fc
 
7160766
4d871c7
 
7160766
6d5d9fc
 
d4c56cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from huggingface_hub import InferenceClient
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# 載入模型和標記器
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True,)



def respond(prompt, history):
    # 構建聊天模板
    messages = [
        {"role": "system", "content": "使用中文,直接回答用戶的問題,盡量簡潔在1024 token內。"},
        {"role": "user", "content": prompt}
    ]    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    print("Chat template text:", text)

    # 將文本轉換為模型輸入
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    print("Model inputs:", model_inputs)

    # 生成回應
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=2048,
        temperature=0.7,  # 降低隨機性
        top_p=0.9,        # 限制生成的多樣性
        do_sample=True    # 啟用採樣
    )
    print("Generated IDs:", generated_ids)

    # 解碼生成的 token IDs
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print("Decoded response:", response)
    # **去除 `<think>` 及其他無用內容**
    response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
    # 返回回應
    return response

# 設定 Gradio 的聊天界面
demo = gr.ChatInterface(
    fn=respond,
    title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
    description="DeepSeek-R1-Distill-Qwen-1.5B-openvino"
)

if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(server_name="0.0.0.0", server_port=7860)
    #demo.launch()