hsuwill000 commited on
Commit
da55442
·
verified ·
1 Parent(s): 8c67e00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -27
app.py CHANGED
@@ -4,6 +4,7 @@ import openvino_genai as ov_genai
4
  import numpy as np
5
  import gradio as gr
6
  import re
 
7
 
8
  # 下載模型
9
  model_ids = [
@@ -12,7 +13,6 @@ model_ids = [
12
  #"OpenVINO/Qwen3-4B-int4-ov",#不可用
13
  "OpenVINO/Qwen3-8B-int4-ov",
14
  "OpenVINO/Qwen3-14B-int4-ov",
15
-
16
  ]
17
 
18
 
@@ -30,41 +30,66 @@ for model_id in model_ids:
30
  device = "CPU"
31
  default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def generate_response(prompt, model_name):
34
- global pipe, tokenizer # Access the global variables
35
 
36
  model_path = model_name
37
 
38
  print(f"Switching to model: {model_name}")
39
- pipe = ov_genai.LLMPipeline(model_path, device)
40
- tokenizer = pipe.get_tokenizer()
41
- tokenizer.set_chat_template(tokenizer.chat_template)
42
-
43
  try:
44
- generated = pipe.generate([prompt], max_length=1024)
45
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
 
 
 
 
 
 
 
 
46
 
47
- return tokenpersec, generated
 
 
 
 
 
 
48
  except Exception as e:
49
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
 
50
 
51
-
52
  # 建立 Gradio 介面
53
  model_choices = list(model_name_to_full_id.keys())
54
 
55
- demo = gr.Interface(
56
- fn=generate_response,
57
- inputs=[
58
- gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
59
- gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
60
- ],
61
- outputs=[
62
- gr.Textbox(label="tokens/sec"),
63
- gr.Textbox(label="回應"),
64
- ],
65
- title="Qwen3 Model Inference",
66
- description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
67
- )
68
-
69
- if __name__ == "__main__":
70
- demo.launch()
 
4
  import numpy as np
5
  import gradio as gr
6
  import re
7
+ import threading
8
 
9
  # 下載模型
10
  model_ids = [
 
13
  #"OpenVINO/Qwen3-4B-int4-ov",#不可用
14
  "OpenVINO/Qwen3-8B-int4-ov",
15
  "OpenVINO/Qwen3-14B-int4-ov",
 
16
  ]
17
 
18
 
 
30
  device = "CPU"
31
  default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
32
 
33
+ # 全局变量,用于存储推理管线、分词器、Markdown 组件和累计文本
34
+ pipe = None
35
+ tokenizer = None
36
+ markdown_component = None # 初始化
37
+ accumulated_text = ""
38
+
39
+
40
+ # 定义同步更新 Markdown 组件的函数
41
+ def update_markdown(text):
42
+ global markdown_component
43
+ if markdown_component:
44
+ markdown_component.update(value=text)
45
+
46
+ # 创建 streamer 函数 (保持原有架构)
47
+ def streamer(subword):
48
+ global accumulated_text
49
+ accumulated_text += subword
50
+ print(subword, end='', flush=True) # 保留打印到控制台
51
+ # 使用线程来异步更新 Markdown 组件
52
+ threading.Thread(target=update_markdown, args=(accumulated_text,)).start() # 异步更新 UI
53
+ return ov_genai.StreamingStatus.RUNNING
54
+
55
  def generate_response(prompt, model_name):
56
+ global pipe, tokenizer, markdown_component, accumulated_text # Access the global variables
57
 
58
  model_path = model_name
59
 
60
  print(f"Switching to model: {model_name}")
 
 
 
 
61
  try:
62
+ pipe = ov_genai.LLMPipeline(model_path, device)
63
+ tokenizer = pipe.get_tokenizer()
64
+ tokenizer.set_chat_template(tokenizer.chat_template)
65
+ except Exception as e:
66
+ print(f"Error initializing pipeline: {e}")
67
+ return "初始化推理管線錯誤", "生成回應時發生錯誤", "" # 初始化失败时返回
68
+
69
+ accumulated_text = "" # 重置
70
+ if markdown_component:
71
+ markdown_component.update(value="") # 清空上一次的输出
72
 
73
+ try:
74
+ #generated = pipe.generate([prompt], max_length=1024)
75
+ pipe.start_chat()
76
+ pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
77
+ pipe.finish_chat()
78
+ tokenpersec=f'{pipe.perf_metrics.get_throughput().mean:.2f}'
79
+ return f"**{tokenpersec} tokens/sec**", accumulated_text #tokenpersec, accumulated_text
80
  except Exception as e:
81
+ print(f"Error during generation: {e}")
82
+ return "發生錯誤", f"生成回應時發生錯誤:{e}" #""
83
 
 
84
  # 建立 Gradio 介面
85
  model_choices = list(model_name_to_full_id.keys())
86
 
87
+ with gr.Blocks() as demo:
88
+ input_text = gr.Textbox(lines=5, label="輸入提示 (Prompt)")
89
+ model_dropdown = gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型")
90
+ tokens_per_sec = gr.Markdown(label="tokens/sec") #gr.Textbox(label="tokens/sec") #md
91
+ markdown_component = gr.Markdown(label="回应") # 初始化 markdown_component
92
+ #
93
+ input_text.submit(fn=generate_response, inputs=[input_text, model_dropdown], outputs=[tokens_per_sec, markdown_component])
94
+
95
+ demo.launch()