hsuwill000's picture
Update app.py
6453441 verified
raw
history blame
1.77 kB
import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def respond(message):
try:
# Record the start time
start_time = time.time()
# Only use the current message as input (no history)
input_text = f"User: {message}"
# Generate response
response = pipe(
input_text,
max_length=1024,
truncation=True,
num_return_sequences=1,
temperature=0.7, # 控制生成多样性
top_p=0.9, # 控制生成质量
)
reply = response[0]['generated_text'].strip()
# Calculate inference time
inference_time = time.time() - start_time
print(f"Inference time: {inference_time:.4f} seconds")
# Return as a tuple (user message, bot reply)
return [(message, reply)]
except Exception as e:
print(f"Error: {e}")
return [(message, "Sorry, something went wrong. Please try again.")]
# Set up Gradio chat interface
with gr.Blocks() as demo:
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your Message")
msg.submit(respond, msg, chatbot)
if __name__ == "__main__":
demo.launch(share=True)