Spaces:

hsuwill000
/

DeepSeek-R1-Distill-Qwen-1.5B-openvino

Running

File size: 1,773 Bytes

4d871c7
0d30833
4d871c7
 
 
68d71c5
6453441
e69a5b4
4d871c7
 
68d71c5
4d871c7
 
3c3f47e
e69a5b4
0d30833
 
 
3c3f47e
 
 
e69a5b4
 
 
3933f4f
e69a5b4
 
 
 
 
 
 
0d30833
 
 
 
0e6fea8
 
4d871c7
e69a5b4
 
0e6fea8
f48f0af
68d71c5
f48f0af
6453441
 
f48f0af
 
 
 
3c3f47e
4d871c7
 
0e6fea8

import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def respond(message):
    try:
        # Record the start time
        start_time = time.time()
        
        # Only use the current message as input (no history)
        input_text = f"User: {message}"

        # Generate response
        response = pipe(
            input_text,
            max_length=1024,
            truncation=True,
            num_return_sequences=1,
            temperature=0.7,  # 控制生成多样性
            top_p=0.9,        # 控制生成质量
        )
        reply = response[0]['generated_text'].strip()
        
        # Calculate inference time
        inference_time = time.time() - start_time
        print(f"Inference time: {inference_time:.4f} seconds")
        
        # Return as a tuple (user message, bot reply)
        return [(message, reply)]
    
    except Exception as e:
        print(f"Error: {e}")
        return [(message, "Sorry, something went wrong. Please try again.")]

# Set up Gradio chat interface
with gr.Blocks() as demo:
    gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
    gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
    
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your Message")
    
    msg.submit(respond, msg, chatbot)

if __name__ == "__main__":
    demo.launch(share=True)