File size: 2,603 Bytes
4d871c7
76cb536
0801ebc
4d871c7
0801ebc
6453441
7160766
8b4afb4
7160766
155b74f
4d871c7
7160766
8b4afb4
a7464e5
7160766
155b74f
8b4afb4
 
 
 
 
7160766
8b4afb4
 
a7464e5
155b74f
 
 
7160766
8b4afb4
 
 
7160766
246dff9
76da388
7160766
0801ebc
 
 
155b74f
0801ebc
 
 
 
 
 
 
 
 
76cb536
 
 
 
 
 
 
0801ebc
76cb536
0801ebc
249ad4e
9871750
 
 
 
 
 
 
 
 
 
249ad4e
9871750
4d871c7
7160766
094f0d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM

# 模型與標記器載入(你的原始代碼)
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

def respond(prompt, history):
    messages = [
        {"role": "system", "content": "使用中文。"},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=4096,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
    return response

def maxtest(prompt):
    return prompt

with gr.Blocks() as demo:
    gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
    with gr.Tabs():
        with gr.TabItem("聊天"):
            chat = gr.ChatInterface(
                fn=respond,
                title="聊天介面",
                description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
            )
    # 將隱藏的接口作為一個組件加入 Blocks,設定 visible=False
    hidden_api = gr.Interface(
        fn=maxtest,
        inputs=gr.Textbox(label="Prompt"),
        outputs="text",
        api_name="/maxtest",
        title="MaxTest API",
        description="回傳輸入內容的測試 API",
        visible=False
    )
    # 使用 .render() 將 hidden_api 組件加入佈局,雖然 UI 不會顯示,但 API 端點仍會註冊
    #hidden_api.render()
    # 將隱藏的接口作為一個組件加入 Blocks,設定 visible=False
    hidden_api2 = gr.Interface(
        fn=respond,
        inputs=gr.Textbox(label="Prompt"),
        outputs="text",
        api_name="/hchat",
        title="hidden chat",
        description="hidden chat",
        visible=False
    )
    #hidden_api2.render()
    
if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)