|
import gradio as gr |
|
import time |
|
from optimum.intel import OVModelForCausalLM |
|
from transformers import AutoTokenizer, pipeline |
|
|
|
|
|
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino" |
|
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
|
|
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
def respond(message): |
|
try: |
|
|
|
start_time = time.time() |
|
|
|
|
|
input_text = f"User: {message}" |
|
|
|
|
|
response = pipe( |
|
input_text, |
|
max_length=1024, |
|
truncation=True, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
top_p=0.9, |
|
) |
|
reply = response[0]['generated_text'].strip() |
|
|
|
|
|
inference_time = time.time() - start_time |
|
print(f"Inference time: {inference_time:.4f} seconds") |
|
|
|
|
|
return [(message, reply)] |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return [(message, "Sorry, something went wrong. Please try again.")] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat") |
|
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.") |
|
|
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox(label="Your Message") |
|
|
|
msg.submit(respond, msg, chatbot) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|