hsuwill000 commited on
Commit
6d5d9fc
·
verified ·
1 Parent(s): bfe0877

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -15,7 +15,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True,)
15
  def respond(prompt, history):
16
  # 構建聊天模板
17
  messages = [
18
- {"role": "system", "content": "用戶是繁體中文使用者. 包括think 回答限縮在1024token"},
19
  {"role": "user", "content": prompt}
20
  ]
21
  text = tokenizer.apply_chat_template(
@@ -32,7 +32,10 @@ def respond(prompt, history):
32
  # 生成回應
33
  generated_ids = model.generate(
34
  **model_inputs,
35
- max_new_tokens=2048
 
 
 
36
  )
37
  print("Generated IDs:", generated_ids)
38
 
@@ -50,12 +53,12 @@ def respond(prompt, history):
50
  # 設定 Gradio 的聊天界面
51
  demo = gr.ChatInterface(
52
  fn=respond,
53
- title="Qwen2.5-0.5B-Instruct-openvino-4bit",
54
- description="Qwen2.5-0.5B-Instruct-openvino-4bit"
55
  )
56
 
57
  if __name__ == "__main__":
58
  print("Launching Gradio app...")
59
- #demo.launch(server_name="0.0.0.0", server_port=7860)
60
- demo.launch()
61
 
 
15
  def respond(prompt, history):
16
  # 構建聊天模板
17
  messages = [
18
+ {"role": "system", "content": "使用中文,直接回答用戶的問題,不要生成 <think> 區段。"},
19
  {"role": "user", "content": prompt}
20
  ]
21
  text = tokenizer.apply_chat_template(
 
32
  # 生成回應
33
  generated_ids = model.generate(
34
  **model_inputs,
35
+ max_new_tokens=2048,
36
+ temperature=0.7, # 降低隨機性
37
+ top_p=0.9, # 限制生成的多樣性
38
+ do_sample=True # 啟用採樣
39
  )
40
  print("Generated IDs:", generated_ids)
41
 
 
53
  # 設定 Gradio 的聊天界面
54
  demo = gr.ChatInterface(
55
  fn=respond,
56
+ title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
57
+ description="DeepSeek-R1-Distill-Qwen-1.5B-openvino"
58
  )
59
 
60
  if __name__ == "__main__":
61
  print("Launching Gradio app...")
62
+ demo.launch(server_name="0.0.0.0", server_port=7860)
63
+ #demo.launch()
64