File size: 2,254 Bytes
4d871c7
76cb536
0801ebc
4d871c7
0801ebc
6453441
7160766
8b4afb4
7160766
155b74f
4d871c7
7160766
8b4afb4
a7464e5
7160766
155b74f
8b4afb4
 
 
 
 
7160766
8b4afb4
 
a7464e5
155b74f
 
 
7160766
8b4afb4
 
 
7160766
246dff9
76da388
7160766
0801ebc
 
 
155b74f
0801ebc
 
 
 
 
 
 
 
e5d3a7a
 
 
 
 
 
 
 
 
 
 
 
0745678
4d871c7
e5d3a7a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM

# 模型與標記器載入(你的原始代碼)
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

def respond(prompt, history):
    messages = [
        {"role": "system", "content": "使用中文。"},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=4096,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
    return response

def maxtest(prompt):
    return prompt

with gr.Blocks() as demo:
    gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
    with gr.Tabs():
        with gr.TabItem("聊天"):
            chat = gr.ChatInterface(
                fn=respond,
                title="聊天介面",
                description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
            )
    # 將隱藏的接口作為一個組件加入 Blocks,設定 visible=False
    hidden_api = gr.Interface(
        fn=respond,
        inputs=gr.Textbox(label="Prompt"),
        outputs="text",
        api_name="/hchat",
        title="MaxTest API",
        description="回傳輸入內容的測試 API",
        visible=False
    )
    # 使用 .render() 將 hidden_api 組件加入佈局,雖然 UI 不會顯示,但 API 端點仍會註冊
    #hidden_api.render()

if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)