hsuwill000 commited on
Commit
0801ebc
·
verified ·
1 Parent(s): 76cb536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -21
app.py CHANGED
@@ -1,20 +1,15 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer
 
5
 
6
- # 載入模型和標記器
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
  print("Loading model...")
9
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
10
  print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
12
 
13
- def maxtest(prompt):
14
- return prompt
15
-
16
  def respond(prompt, history):
17
- # 構建聊天模板
18
  messages = [
19
  {"role": "system", "content": "使用中文。"},
20
  {"role": "user", "content": prompt}
@@ -24,9 +19,7 @@ def respond(prompt, history):
24
  tokenize=False,
25
  add_generation_prompt=True
26
  )
27
- print("Chat template text:", text)
28
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
29
- print("Model inputs:", model_inputs)
30
  generated_ids = model.generate(
31
  **model_inputs,
32
  max_new_tokens=4096,
@@ -34,23 +27,26 @@ def respond(prompt, history):
34
  top_p=0.9,
35
  do_sample=True
36
  )
37
- print("Generated IDs:", generated_ids)
38
  generated_ids = [
39
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
40
  ]
41
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
42
- print("Decoded response:", response)
43
  response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
44
  return response
45
 
 
 
 
46
  with gr.Blocks() as demo:
47
- # 主要顯示的聊天界面
48
- chat = gr.ChatInterface(
49
- fn=respond,
50
- title="DeepSeek-R1-Distill-Qwen-1.5B-openvino",
51
- description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
52
- )
53
- # 隱藏的 API 接口(不在 UI 中呈現,但仍被註冊)
 
 
54
  hidden_api = gr.Interface(
55
  fn=maxtest,
56
  inputs=gr.Textbox(label="Prompt"),
@@ -58,10 +54,10 @@ with gr.Blocks() as demo:
58
  api_name="/maxtest",
59
  title="MaxTest API",
60
  description="回傳輸入內容的測試 API",
61
- visible=False # 設為 False 使其不顯示在 UI 上
62
  )
63
- # 將隱藏 API 加入到 Blocks 中,但不作為 UI 呈現
64
- demo.append(hidden_api)
65
 
66
  if __name__ == "__main__":
67
  print("Launching Gradio app...")
 
1
  import gradio as gr
 
 
2
  from transformers import AutoTokenizer
3
+ from optimum.intel import OVModelForCausalLM
4
 
5
+ # 模型與標記器載入(你的原始代碼)
6
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
7
  print("Loading model...")
8
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
9
  print("Loading tokenizer...")
10
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
11
 
 
 
 
12
  def respond(prompt, history):
 
13
  messages = [
14
  {"role": "system", "content": "使用中文。"},
15
  {"role": "user", "content": prompt}
 
19
  tokenize=False,
20
  add_generation_prompt=True
21
  )
 
22
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
23
  generated_ids = model.generate(
24
  **model_inputs,
25
  max_new_tokens=4096,
 
27
  top_p=0.9,
28
  do_sample=True
29
  )
 
30
  generated_ids = [
31
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
32
  ]
33
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
34
  response = response.replace("<think>", "**THINK**").replace("</think>", "**THINK**").strip()
35
  return response
36
 
37
+ def maxtest(prompt):
38
+ return prompt
39
+
40
  with gr.Blocks() as demo:
41
+ gr.Markdown("# DeepSeek-R1-Distill-Qwen-1.5B-openvino")
42
+ with gr.Tabs():
43
+ with gr.TabItem("聊天"):
44
+ chat = gr.ChatInterface(
45
+ fn=respond,
46
+ title="聊天介面",
47
+ description="DeepSeek-R1-Distill-Qwen-1.5B-openvino 聊天接口"
48
+ )
49
+ # 將隱藏的接口作為一個組件加入 Blocks,設定 visible=False
50
  hidden_api = gr.Interface(
51
  fn=maxtest,
52
  inputs=gr.Textbox(label="Prompt"),
 
54
  api_name="/maxtest",
55
  title="MaxTest API",
56
  description="回傳輸入內容的測試 API",
57
+ visible=False
58
  )
59
+ # 使用 .render() hidden_api 組件加入佈局,雖然 UI 不會顯示,但 API 端點仍會註冊
60
+ hidden_api.render()
61
 
62
  if __name__ == "__main__":
63
  print("Launching Gradio app...")