File size: 2,114 Bytes
4d871c7 8b4afb4 4d871c7 8b4afb4 6453441 7160766 8b4afb4 7160766 7f4a0a3 4d871c7 7160766 8b4afb4 397bd8a 7160766 7764a77 8b4afb4 7160766 8b4afb4 6d5d9fc 7160766 8b4afb4 7160766 7764a77 49a4759 246dff9 7160766 76da388 7160766 8b4afb4 7160766 6d5d9fc 7160766 4d871c7 7160766 6d5d9fc d4c56cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from huggingface_hub import InferenceClient
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# 載入模型和標記器
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
print("Loading model...")
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True,)
def respond(prompt, history):
# 構建聊天模板
messages = [
{"role": "system", "content": "使用中文,直接回答用戶的問題,盡量簡潔在1024 token內。"},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
print("Chat template text:", text)
# 將文本轉換為模型輸入
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
print("Model inputs:", model_inputs)
# 生成回應
generated_ids = model.generate(
**model_inputs,
max_new_tokens=2048,
temperature=0.7, # 降低隨機性
top_p=0.9, # 限制生成的多樣性
do_sample=True # 啟用採樣
)
print("Generated IDs:", generated_ids)
# 解碼生成的 token IDs
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print("Decoded response:", response)
# **去除 `<think>` 及其他無用內容**
response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
# 返回回應
return response
# 設定 Gradio 的聊天界面
demo = gr.ChatInterface(
fn=respond,
title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
description="DeepSeek-R1-Distill-Qwen-1.5B-openvino"
)
if __name__ == "__main__":
print("Launching Gradio app...")
demo.launch(server_name="0.0.0.0", server_port=7860)
#demo.launch()
|