hsuwill000 commited on
Commit
76cb536
·
verified ·
1 Parent(s): 155b74f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -26
app.py CHANGED
@@ -1,15 +1,20 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer
3
  from optimum.intel import OVModelForCausalLM
 
4
 
5
- # 模型與標記器載入(你的原始代碼)
6
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
7
  print("Loading model...")
8
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
9
  print("Loading tokenizer...")
10
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
11
 
 
 
 
12
  def respond(prompt, history):
 
13
  messages = [
14
  {"role": "system", "content": "使用中文。"},
15
  {"role": "user", "content": prompt}
@@ -19,7 +24,9 @@ def respond(prompt, history):
19
  tokenize=False,
20
  add_generation_prompt=True
21
  )
 
22
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
23
  generated_ids = model.generate(
24
  **model_inputs,
25
  max_new_tokens=4096,
@@ -27,39 +34,35 @@ def respond(prompt, history):
27
  top_p=0.9,
28
  do_sample=True
29
  )
 
30
  generated_ids = [
31
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
32
  ]
33
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
34
  response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
35
  return response
36
 
37
- def maxtest(prompt):
38
- return prompt
39
-
40
- # 使用 Blocks 同時建立聊天接口和 API 接口
41
  with gr.Blocks() as demo:
42
- gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
43
- with gr.Tabs():
44
- with gr.TabItem("聊天"):
45
- chat = gr.ChatInterface(
46
- fn=respond,
47
- title="聊天介面",
48
- description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
49
- )
50
- with gr.TabItem("MaxTest API"):
51
- # 這個接口會被暴露為 /run/maxtest
52
- api = gr.Interface(
53
- fn=maxtest,
54
- inputs=gr.Textbox(label="Prompt"),
55
- outputs="text",
56
- api_name="/maxtest",
57
- title="MaxTest API",
58
- description="回傳輸入內容的測試 API"
59
- )
60
- # 可以選擇將該接口放在單獨的 tab 內,也可以直接顯示
61
 
62
  if __name__ == "__main__":
63
  print("Launching Gradio app...")
64
- # 啟動應用(如果你想使用 share=True 讓外網訪問也可加上該參數)
65
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  from optimum.intel import OVModelForCausalLM
4
+ from transformers import AutoTokenizer
5
 
6
+ # 載入模型和標記器
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
  print("Loading model...")
9
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
10
  print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
12
 
13
+ def maxtest(prompt):
14
+ return prompt
15
+
16
  def respond(prompt, history):
17
+ # 構建聊天模板
18
  messages = [
19
  {"role": "system", "content": "使用中文。"},
20
  {"role": "user", "content": prompt}
 
24
  tokenize=False,
25
  add_generation_prompt=True
26
  )
27
+ print("Chat template text:", text)
28
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
29
+ print("Model inputs:", model_inputs)
30
  generated_ids = model.generate(
31
  **model_inputs,
32
  max_new_tokens=4096,
 
34
  top_p=0.9,
35
  do_sample=True
36
  )
37
+ print("Generated IDs:", generated_ids)
38
  generated_ids = [
39
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
40
  ]
41
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
42
+ print("Decoded response:", response)
43
  response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
44
  return response
45
 
 
 
 
 
46
  with gr.Blocks() as demo:
47
+ # 主要顯示的聊天界面
48
+ chat = gr.ChatInterface(
49
+ fn=respond,
50
+ title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
51
+ description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
52
+ )
53
+ # 隱藏的 API 接口(不在 UI 中呈現,但仍被註冊)
54
+ hidden_api = gr.Interface(
55
+ fn=maxtest,
56
+ inputs=gr.Textbox(label="Prompt"),
57
+ outputs="text",
58
+ api_name="/maxtest",
59
+ title="MaxTest API",
60
+ description="回傳輸入內容的測試 API",
61
+ visible=False # 設為 False 使其不顯示在 UI 上
62
+ )
63
+ # 將隱藏 API 加入到 Blocks 中,但不作為 UI 呈現
64
+ demo.append(hidden_api)
 
65
 
66
  if __name__ == "__main__":
67
  print("Launching Gradio app...")
 
68
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)