hsuwill000's picture
Update app.py
b12dd94 verified
raw
history blame
2.18 kB
import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def respond(message):
try:
start_time = time.time()
# 強化 Prompt 讓模型輸出更合理
instruction = (
"請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
"User: " + message + "\n"
"Assistant: "
)
# Generate response with improved settings
response = pipe(
instruction,
max_length=4096, # 限制最大輸出長度,防止無限重複
truncation=True,
num_return_sequences=1,
temperature=0.3, # 保持一定創意但減少胡言亂語
top_p=0.8, # 增加答案多樣性,減少無意義重複
repetition_penalty=1.5, # 降低重複字詞的機率
)
generated_text = response[0]['generated_text'].strip()
# 提取 "Assistant:" 之後的部分
if "Assistant:" in generated_text:
reply = generated_text.split("Assistant:")[-1].strip()
else:
reply = generated_text
inference_time = time.time() - start_time
print(f"Inference time: {inference_time:.4f} seconds")
return [(message, reply)]
except Exception as e:
print(f"Error: {e}")
return [(message, "Sorry, something went wrong. Please try again.")]
# Set up Gradio chat interface
with gr.Blocks() as demo:
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your Message")
msg.submit(respond, msg, chatbot)
if __name__ == "__main__":
demo.launch(share=True)