Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

hsuwill000 commited on Jun 16

Commit

da55442

verified ·

1 Parent(s): 8c67e00

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -27

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import openvino_genai as ov_genai
 import numpy as np
 import gradio as gr
 import re
 # 下載模型
 model_ids = [
@@ -12,7 +13,6 @@ model_ids = [
     #"OpenVINO/Qwen3-4B-int4-ov",#不可用
     "OpenVINO/Qwen3-8B-int4-ov",
     "OpenVINO/Qwen3-14B-int4-ov",
 ]
@@ -30,41 +30,66 @@ for model_id in model_ids:
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
 def generate_response(prompt, model_name):
-    global pipe, tokenizer  # Access the global variables
     model_path = model_name
     print(f"Switching to model: {model_name}")
-    pipe = ov_genai.LLMPipeline(model_path, device)
-    tokenizer = pipe.get_tokenizer()
-    tokenizer.set_chat_template(tokenizer.chat_template)
     try:
-        generated = pipe.generate([prompt], max_length=1024)
-        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
-        return tokenpersec, generated
     except Exception as e:
-        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
 # 建立 Gradio 介面
 model_choices = list(model_name_to_full_id.keys())
-demo = gr.Interface(
-    fn=generate_response,
-    inputs=[
-        gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
-        gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
-    ],
-    outputs=[
-        gr.Textbox(label="tokens/sec"),
-        gr.Textbox(label="回應"),
-    ],
-    title="Qwen3 Model Inference",
-    description="基於 Qwen3 推理應用，支援思考過程分離與 GUI。"
-)
-if __name__ == "__main__":
-    demo.launch()

 import numpy as np
 import gradio as gr
 import re
+import threading
 # 下載模型
 model_ids = [
     #"OpenVINO/Qwen3-4B-int4-ov",#不可用
     "OpenVINO/Qwen3-8B-int4-ov",
     "OpenVINO/Qwen3-14B-int4-ov",
 ]
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
+# 全局变量，用于存储推理管线、分词器、Markdown 组件和累计文本
+pipe = None
+tokenizer = None
+markdown_component = None  # 初始化
+accumulated_text = ""
+#  定义同步更新 Markdown 组件的函数
+def update_markdown(text):
+    global markdown_component
+    if markdown_component:
+        markdown_component.update(value=text)
+# 创建 streamer 函数 (保持原有架构)
+def streamer(subword):
+    global accumulated_text
+    accumulated_text += subword
+    print(subword, end='', flush=True)  # 保留打印到控制台
+    #  使用线程来异步更新 Markdown 组件
+    threading.Thread(target=update_markdown, args=(accumulated_text,)).start() # 异步更新 UI
+    return ov_genai.StreamingStatus.RUNNING
 def generate_response(prompt, model_name):
+    global pipe, tokenizer, markdown_component, accumulated_text  # Access the global variables
     model_path = model_name
     print(f"Switching to model: {model_name}")
     try:
+        pipe = ov_genai.LLMPipeline(model_path, device)
+        tokenizer = pipe.get_tokenizer()
+        tokenizer.set_chat_template(tokenizer.chat_template)
+    except Exception as e:
+        print(f"Error initializing pipeline: {e}")
+        return "初始化推理管線錯誤", "生成回應時發生錯誤", ""  # 初始化失败时返回
+    accumulated_text = "" # 重置
+    if markdown_component:
+        markdown_component.update(value="") # 清空上一次的输出
+    try:
+        #generated = pipe.generate([prompt], max_length=1024)
+        pipe.start_chat()
+        pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
+        pipe.finish_chat()
+        tokenpersec=f'{pipe.perf_metrics.get_throughput().mean:.2f}'
+        return f"**{tokenpersec} tokens/sec**", accumulated_text #tokenpersec, accumulated_text
     except Exception as e:
+        print(f"Error during generation: {e}")
+        return "發生錯誤", f"生成回應時發生錯誤：{e}" #""
 # 建立 Gradio 介面
 model_choices = list(model_name_to_full_id.keys())
+with gr.Blocks() as demo:
+    input_text = gr.Textbox(lines=5, label="輸入提示 (Prompt)")
+    model_dropdown = gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型")
+    tokens_per_sec = gr.Markdown(label="tokens/sec") #gr.Textbox(label="tokens/sec") #md
+    markdown_component = gr.Markdown(label="回应") # 初始化 markdown_component
+    #
+    input_text.submit(fn=generate_response, inputs=[input_text, model_dropdown], outputs=[tokens_per_sec, markdown_component])
+demo.launch()