File size: 2,176 Bytes
4d871c7 0d30833 4d871c7 97e41b8 6453441 97e41b8 4d871c7 97e41b8 4d871c7 3c3f47e 97e41b8 0d30833 97e41b8 f906bd4 3920413 f906bd4 97e41b8 e69a5b4 f906bd4 b12dd94 e69a5b4 97e41b8 e69a5b4 c9ac48b 97e41b8 a9b4927 c9ac48b a9b4927 0d30833 a9b4927 97e41b8 f48f0af 97e41b8 f48f0af 6453441 3ee81c0 a9b4927 97e41b8 4d871c7 0e6fea8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
import time
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# Load the model and tokenizer
model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def respond(message):
try:
start_time = time.time()
# 強化 Prompt 讓模型輸出更合理
instruction = (
"請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
"User: " + message + "\n"
"Assistant: "
)
# Generate response with improved settings
response = pipe(
instruction,
max_length=4096, # 限制最大輸出長度,防止無限重複
truncation=True,
num_return_sequences=1,
temperature=0.3, # 保持一定創意但減少胡言亂語
top_p=0.8, # 增加答案多樣性,減少無意義重複
repetition_penalty=1.5, # 降低重複字詞的機率
)
generated_text = response[0]['generated_text'].strip()
# 提取 "Assistant:" 之後的部分
if "Assistant:" in generated_text:
reply = generated_text.split("Assistant:")[-1].strip()
else:
reply = generated_text
inference_time = time.time() - start_time
print(f"Inference time: {inference_time:.4f} seconds")
return [(message, reply)]
except Exception as e:
print(f"Error: {e}")
return [(message, "Sorry, something went wrong. Please try again.")]
# Set up Gradio chat interface
with gr.Blocks() as demo:
gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your Message")
msg.submit(respond, msg, chatbot)
if __name__ == "__main__":
demo.launch(share=True)
|