hsuwill000 commited on
Commit
8b4afb4
·
verified ·
1 Parent(s): 72cd188

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -50
app.py CHANGED
@@ -1,63 +1,42 @@
1
  import gradio as gr
2
- import time
3
  from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer, pipeline
5
 
6
- # Load the model and tokenizer
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
- model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
11
- # Create generation pipeline
12
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
- def respond(message):
15
- try:
16
- start_time = time.time()
17
-
18
- # 強化 Prompt 讓模型輸出更合理
19
- instruction = (
20
- "請用簡單、繁體中文、準確的語言回答問題,避免冗長和重複內容,重點摘要至512 tokens。\n"
21
- "User: " + message + "\n"
22
- "Assistant: "
23
- )
24
-
25
- # Generate response with improved settings
26
- response = pipe(
27
- instruction,
28
- max_length=1024, # 限制最大輸出長度,防止無限重複
29
- truncation=True,
30
- num_return_sequences=1,
31
- temperature=0.3, # 保持一定創意但減少胡言亂語
32
- top_p=0.8, # 增加答案多樣性,減少無意義重複
33
- repetition_penalty=1.5, # 降低重複字詞的機率
34
- )
35
- generated_text = response[0]['generated_text'].strip()
36
-
37
- # 提取 "Assistant:" 之後的部分
38
- if "Assistant:" in generated_text:
39
- reply = generated_text.split("Assistant:")[-1].strip()
40
- else:
41
- reply = generated_text
42
-
43
- inference_time = time.time() - start_time
44
- print(f"Inference time: {inference_time:.4f} seconds")
45
-
46
- return [(message, reply)]
47
 
48
- except Exception as e:
49
- print(f"Error: {e}")
50
- return [(message, "Sorry, something went wrong. Please try again.")]
51
-
52
- # Set up Gradio chat interface
53
- with gr.Blocks() as demo:
54
- gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat")
55
- gr.Markdown("Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.")
56
 
57
- chatbot = gr.Chatbot()
58
- msg = gr.Textbox(label="Your Message")
 
 
59
 
60
- msg.submit(respond, msg, chatbot)
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch(share=True)
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer, pipeline
5
 
6
+ # 載入模型和標記器
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
+ model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
 
 
11
 
12
+ def respond(prompt):
13
+ messages = [
14
+ {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
15
+ {"role": "user", "content": prompt }
16
+ ]
17
+ text = tokenizer.apply_chat_template(
18
+ messages,
19
+ tokenize=False,
20
+ add_generation_prompt=True
21
+ )
22
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
23
+ generated_ids = model.generate(
24
+ **model_inputs,
25
+ max_new_tokens=512
26
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ generated_ids = [
29
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
30
+ ]
31
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
32
 
33
+ # 返回新的消息格式
34
+ print(f"Messages: {messages}")
35
+ print(f"Reply: {response}")
36
+ return response
37
 
38
+ # 設定 Gradio 的聊天界面
39
+ demo = gr.ChatInterface(fn=respond, title="# DeepSeek-R1-Distill-Qwen-1.5B-openvino Chat", description="Chat with DeepSeek-R1-Distill-Qwen-1.5B-openvino model.", type='messages')
40
 
41
  if __name__ == "__main__":
42
+ demo.launch()