hsuwill000 commited on
Commit
a529429
·
verified ·
1 Parent(s): d32e032

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -4,6 +4,7 @@ import openvino_genai as ov_genai
4
  import numpy as np
5
  import gradio as gr
6
  import re
 
7
 
8
  # 下載模型
9
  model_ids = [
@@ -28,6 +29,28 @@ for model_id in model_ids:
28
  # 建立推理管線 (Initialize with a default model first)
29
  device = "CPU"
30
  default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def generate_response(prompt, model_name):
33
  global pipe, tokenizer # Access the global variables
@@ -40,7 +63,8 @@ def generate_response(prompt, model_name):
40
  tokenizer.set_chat_template(tokenizer.chat_template)
41
 
42
  try:
43
- generated = pipe.generate([prompt], max_length=1024)
 
44
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
45
 
46
  return tokenpersec, generated
 
4
  import numpy as np
5
  import gradio as gr
6
  import re
7
+ import threading
8
 
9
  # 下載模型
10
  model_ids = [
 
29
  # 建立推理管線 (Initialize with a default model first)
30
  device = "CPU"
31
  default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
32
+ # 全局变量,用于存储推理管线、分词器、Markdown 组件和累计文本
33
+ pipe = None
34
+ tokenizer = None
35
+ markdown_component = None # 初始化
36
+ accumulated_text = ""
37
+
38
+
39
+ # 定义同步更新 Markdown 组件的函数
40
+ def update_markdown(text):
41
+ global markdown_component
42
+ if markdown_component:
43
+ markdown_component.update(value=text)
44
+
45
+ # 创建 streamer 函数 (保持原有架构)
46
+ def streamer(subword):
47
+ global accumulated_text
48
+ accumulated_text += subword
49
+ print(subword, end='', flush=True) # 保留打印到控制台
50
+ # 使用线程来异步更新 Markdown 组件
51
+ threading.Thread(target=update_markdown, args=(accumulated_text,)).start() # 异步更新 UI
52
+ return ov_genai.StreamingStatus.RUNNING
53
+
54
 
55
  def generate_response(prompt, model_name):
56
  global pipe, tokenizer # Access the global variables
 
63
  tokenizer.set_chat_template(tokenizer.chat_template)
64
 
65
  try:
66
+ #generated = pipe.generate([prompt], max_length=1024)
67
+ generated = pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
68
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
69
 
70
  return tokenpersec, generated