File size: 1,479 Bytes
4d871c7 8b4afb4 4d871c7 8b4afb4 6453441 8b4afb4 4d871c7 8b4afb4 97e41b8 8b4afb4 3ee81c0 8b4afb4 a9b4927 8b4afb4 4d871c7 8b4afb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from huggingface_hub import InferenceClient
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# 載入模型和標記器
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
def respond(prompt):
messages = [
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
{"role": "user", "content": prompt }
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# 返回新的消息格式
print(f"Messages: {messages}")
print(f"Reply: {response}")
return response
# 設定 Gradio 的聊天界面
demo = gr.ChatInterface(fn=respond, title="# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat", description="Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.", type='messages')
if __name__ == "__main__":
demo.launch() |