hsuwill000 commited on
Commit
155b74f
·
verified ·
1 Parent(s): 094f0d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -33
app.py CHANGED
@@ -1,66 +1,65 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from optimum.intel import OVModelForCausalLM
4
- from transformers import AutoTokenizer, pipeline
5
 
6
- # 載入模型和標記器
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
  print("Loading model...")
9
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
10
  print("Loading tokenizer...")
11
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True,)
12
-
13
- def maxtest(prompt):
14
- return prompt
15
-
16
 
17
  def respond(prompt, history):
18
- # 構建聊天模板
19
  messages = [
20
  {"role": "system", "content": "使用中文。"},
21
  {"role": "user", "content": prompt}
22
- ]
23
  text = tokenizer.apply_chat_template(
24
  messages,
25
  tokenize=False,
26
  add_generation_prompt=True
27
  )
28
- print("Chat template text:", text)
29
-
30
- # 將文本轉換為模型輸入
31
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
32
- print("Model inputs:", model_inputs)
33
-
34
- # 生成回應
35
  generated_ids = model.generate(
36
  **model_inputs,
37
  max_new_tokens=4096,
38
- temperature=0.7, # 降低隨機性
39
- top_p=0.9, # 限制生成的多樣性
40
- do_sample=True # 啟用採樣
41
  )
42
- print("Generated IDs:", generated_ids)
43
-
44
- # 解碼生成的 token IDs
45
  generated_ids = [
46
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
47
  ]
48
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
49
- print("Decoded response:", response)
50
- # **去除 `<think>` 及其他無用內容**
51
  response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
52
- # 返回回應
53
  return response
54
 
55
- # 設定 Gradio 的聊天界面
56
- demo = gr.ChatInterface(
57
- fn=respond,
58
- title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
59
- description="DeepSeek-R1-Distill-Qwen-1.5B-openvino"
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
  print("Launching Gradio app...")
64
- #demo.launch(server_name="0.0.0.0", server_port=7860)
65
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
66
-
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer
3
  from optimum.intel import OVModelForCausalLM
 
4
 
5
+ # 模型與標記器載入(你的原始代碼)
6
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
7
  print("Loading model...")
8
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
9
  print("Loading tokenizer...")
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 
 
 
 
11
 
12
  def respond(prompt, history):
 
13
  messages = [
14
  {"role": "system", "content": "使用中文。"},
15
  {"role": "user", "content": prompt}
16
+ ]
17
  text = tokenizer.apply_chat_template(
18
  messages,
19
  tokenize=False,
20
  add_generation_prompt=True
21
  )
 
 
 
22
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
 
 
23
  generated_ids = model.generate(
24
  **model_inputs,
25
  max_new_tokens=4096,
26
+ temperature=0.7,
27
+ top_p=0.9,
28
+ do_sample=True
29
  )
 
 
 
30
  generated_ids = [
31
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
32
  ]
33
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
34
  response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
 
35
  return response
36
 
37
+ def maxtest(prompt):
38
+ return prompt
39
+
40
+ # 使用 Blocks 同時建立聊天接口和 API 接口
41
+ with gr.Blocks() as demo:
42
+ gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
43
+ with gr.Tabs():
44
+ with gr.TabItem("聊天"):
45
+ chat = gr.ChatInterface(
46
+ fn=respond,
47
+ title="聊天介面",
48
+ description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
49
+ )
50
+ with gr.TabItem("MaxTest API"):
51
+ # 這個接口會被暴露為 /run/maxtest
52
+ api = gr.Interface(
53
+ fn=maxtest,
54
+ inputs=gr.Textbox(label="Prompt"),
55
+ outputs="text",
56
+ api_name="/maxtest",
57
+ title="MaxTest API",
58
+ description="回傳輸入內容的測試 API"
59
+ )
60
+ # 可以選擇將該接口放在單獨的 tab 內,也可以直接顯示
61
 
62
  if __name__ == "__main__":
63
  print("Launching Gradio app...")
64
+ # 啟動應用(如果你想使用 share=True 讓外網訪問也可加上該參數)
65
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)