File size: 2,254 Bytes
4d871c7 76cb536 0801ebc 4d871c7 0801ebc 6453441 7160766 8b4afb4 7160766 155b74f 4d871c7 7160766 8b4afb4 a7464e5 7160766 155b74f 8b4afb4 7160766 8b4afb4 a7464e5 155b74f 7160766 8b4afb4 7160766 246dff9 76da388 7160766 0801ebc 155b74f 0801ebc e5d3a7a 0745678 4d871c7 e5d3a7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM
# 模型與標記器載入(你的原始代碼)
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
def respond(prompt, history):
messages = [
{"role": "system", "content": "使用中文。"},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=4096,
temperature=0.7,
top_p=0.9,
do_sample=True
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
return response
def maxtest(prompt):
return prompt
with gr.Blocks() as demo:
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
with gr.Tabs():
with gr.TabItem("聊天"):
chat = gr.ChatInterface(
fn=respond,
title="聊天介面",
description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
)
# 將隱藏的接口作為一個組件加入 Blocks,設定 visible=False
hidden_api = gr.Interface(
fn=respond,
inputs=gr.Textbox(label="Prompt"),
outputs="text",
api_name="/hchat",
title="MaxTest API",
description="回傳輸入內容的測試 API",
visible=False
)
# 使用 .render() 將 hidden_api 組件加入佈局,雖然 UI 不會顯示,但 API 端點仍會註冊
#hidden_api.render()
if __name__ == "__main__":
print("Launching Gradio app...")
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|