File size: 2,320 Bytes
4d871c7 0d30833 4d871c7 68d71c5 6453441 e69a5b4 4d871c7 68d71c5 4d871c7 3c3f47e e69a5b4 0d30833 c9ac48b e69a5b4 c9ac48b e69a5b4 9c592ef e69a5b4 c9ac48b e69a5b4 0d30833 0e6fea8 4d871c7 e69a5b4 0e6fea8 f48f0af 68d71c5 f48f0af 6453441 f48f0af 3c3f47e 4d871c7 0e6fea8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def respond(message):
try:
# Record the start time
start_time = time.time()
# 修改 prompt:在對話中加入指令,要求只輸出最終答案,不顯示推理過程
instruction = "請只輸出最終答案,不要展示任何中間推理過程。"
input_text = f"User: {message}\nAssistant: {instruction}\nAssistant:"
# Generate response
response = pipe(
input_text,
max_length=1024,
truncation=True,
num_return_sequences=1,
temperature=0.2, # 控制生成多样性
top_p=0.1, # 控制生成质量
)
generated_text = response[0]['generated_text'].strip()
# 提取模型輸出中最後一次出現 "Assistant:" 之後的部分(假設模型按照格式輸出)
# 若模型輸出格式不同,可根據實際情況進行調整
if "Assistant:" in generated_text:
reply = generated_text.split("Assistant:")[-1].strip()
else:
reply = generated_text
# Calculate inference time
inference_time = time.time() - start_time
print(f"Inference time: {inference_time:.4f} seconds")
# Return as a tuple (user message, bot reply)
return [(message, reply)]
except Exception as e:
print(f"Error: {e}")
return [(message, "Sorry, something went wrong. Please try again.")]
# Set up Gradio chat interface
with gr.Blocks() as demo:
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your Message")
msg.submit(respond, msg, chatbot)
if __name__ == "__main__":
demo.launch(share=True)
|