hsuwill000's picture
Update app.py
76da388 verified
raw
history blame
2.55 kB
import gradio as gr
from huggingface_hub import InferenceClient
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# 載入模型和標記器
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
def respond(prompt, history):
# 建立初始的系統訊息
messages = [
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."}
]
# 將歷史對話內容整合進 messages(注意這裡轉換成字典)
for msg in history:
# 假設 history 中每個元素原本是字典格式 (如果不是,請自行轉換)
messages.append({"role": "user", "content": msg["user"]})
messages.append({"role": "assistant", "content": msg["assistant"]})
# 加入當前用戶輸入
messages.append({"role": "user", "content": prompt})
# 構造模型輸入並生成回覆
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(f"Messages: {messages}")
print(f"Reply: {response}")
return response
'''''
# 更新並返回完整的聊天歷史(改為字典列表)
new_history = history.copy()
new_history.append({"user": prompt, "assistant": response})
# 最後返回的是一個消息列表,每一條消息為字典格式(可進一步轉換為 ChatMessage 格式)
final_messages = []
# 如果需要顯示完整對話,可將歷史中每一條對話分拆成兩條消息
for item in new_history:
final_messages.append({"role": "user", "content": item["user"]})
final_messages.append({"role": "assistant", "content": item["assistant"]})
return final_messages
'''''
# 設定 Gradio 的聊天界面
demo = gr.ChatInterface(
fn=respond,
title="DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat",
description="Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.",
type="messages"
)
if __name__ == "__main__":
demo.launch()