File size: 1,991 Bytes
4d871c7
76cb536
0801ebc
4d871c7
0801ebc
6453441
7160766
8b4afb4
7160766
155b74f
4d871c7
7160766
8b4afb4
a7464e5
7160766
155b74f
8b4afb4
 
 
 
 
7160766
8b4afb4
 
a7464e5
155b74f
 
 
7160766
8b4afb4
 
 
7160766
246dff9
76da388
7160766
0801ebc
 
 
155b74f
0801ebc
 
 
7fae2e6
0801ebc
7fae2e6
 
3486524
7fae2e6
 
 
0801ebc
3486524
0745678
4d871c7
e5d3a7a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM

# 模型與標記器載入(你的原始代碼)
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

def respond(prompt, history):
    messages = [
        {"role": "system", "content": "使用中文。"},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=4096,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
    return response

def maxtest(prompt):
    return prompt

with gr.Blocks() as demo:
    gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
    with gr.Tabs():
        with gr.TabItem("聊天"):
            chat_if = gr.Interface(
                fn=respond,
                inputs=gr.Textbox(label="Prompt", placeholder="請輸入訊息..."),
                outputs=gr.Textbox(label="Response", interactive=False),
                api_name="/hchat",
                title="MaxTest API",
                description="回傳輸入內容的測試 API",
                layout="vertical"  
            )


if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)