hsuwill000 commited on
Commit
7160766
·
verified ·
1 Parent(s): d4c56cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -5,48 +5,53 @@ from transformers import AutoTokenizer, pipeline
5
 
6
  # 載入模型和標記器
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
 
8
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
11
- # 建立生成管道
12
- #pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
-
14
- def respond(prompt , history):
15
- # 將當前訊息與歷史訊息合併
16
- #input_text = message if not history else history[-1]["content"] + " " + message
17
- #input_text = message+",(450字內回覆)"
18
-
19
  messages = [
20
- {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
21
- {"role": "user", "content": prompt }
22
  ]
23
  text = tokenizer.apply_chat_template(
24
  messages,
25
  tokenize=False,
26
  add_generation_prompt=True
27
  )
28
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
 
 
 
 
 
29
  generated_ids = model.generate(
30
  **model_inputs,
31
  max_new_tokens=512
32
- )
33
-
34
- # 獲取模型的回應
35
- #response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
36
- #reply = response[0]['generated_text']
37
  generated_ids = [
38
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
39
  ]
40
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
41
-
42
- # 返回新的消息格式
43
- print(f"Messages: {messages}")
44
- print(f"Reply: {response}")
45
  return response
46
-
47
  # 設定 Gradio 的聊天界面
48
- demo = gr.ChatInterface(fn=respond, title="Qwen2.5-0.5B-Instruct-openvino-4bit", description="Qwen2.5-0.5B-Instruct-openvino-4bit", type='messages')
 
 
 
 
49
 
50
  if __name__ == "__main__":
51
- demo.launch()
 
52
 
 
5
 
6
  # 載入模型和標記器
7
  model_id = "hsuwill000/DeepSeek-R1-Distill-Qwen-1.5B-openvino"
8
+ print("Loading model...")
9
  model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
10
+ print("Loading tokenizer...")
11
  tokenizer = AutoTokenizer.from_pretrained(model_id)
12
 
13
+ def respond(prompt, history):
14
+ # 構建聊天模板
 
 
 
 
 
 
15
  messages = [
16
+ {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
17
+ {"role": "user", "content": prompt}
18
  ]
19
  text = tokenizer.apply_chat_template(
20
  messages,
21
  tokenize=False,
22
  add_generation_prompt=True
23
  )
24
+ print("Chat template text:", text)
25
+
26
+ # 將文本轉換為模型輸入
27
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
28
+ print("Model inputs:", model_inputs)
29
+
30
+ # 生成回應
31
  generated_ids = model.generate(
32
  **model_inputs,
33
  max_new_tokens=512
34
+ )
35
+ print("Generated IDs:", generated_ids)
36
+
37
+ # 解碼生成的 token IDs
 
38
  generated_ids = [
39
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
40
  ]
41
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
42
+ print("Decoded response:", response)
43
+
44
+ # 返回回應
 
45
  return response
46
+
47
  # 設定 Gradio 的聊天界面
48
+ demo = gr.ChatInterface(
49
+ fn=respond,
50
+ title="Qwen2.5-0.5B-Instruct-openvino-4bit",
51
+ description="Qwen2.5-0.5B-Instruct-openvino-4bit"
52
+ )
53
 
54
  if __name__ == "__main__":
55
+ print("Launching Gradio app...")
56
+ demo.launch(server_name="0.0.0.0", server_port=7860)
57