Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

hsuwill000 commited on Jun 16

Commit

d32e032

verified ·

1 Parent(s): da55442

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -53

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import openvino_genai as ov_genai
 import numpy as np
 import gradio as gr
 import re
-import threading
 # 下載模型
 model_ids = [
@@ -13,9 +12,9 @@ model_ids = [
     #"OpenVINO/Qwen3-4B-int4-ov",#不可用
     "OpenVINO/Qwen3-8B-int4-ov",
     "OpenVINO/Qwen3-14B-int4-ov",
 ]
 model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  #Create Dictionary
 for model_id in model_ids:
@@ -30,66 +29,41 @@ for model_id in model_ids:
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
-# 全局变量，用于存储推理管线、分词器、Markdown 组件和累计文本
-pipe = None
-tokenizer = None
-markdown_component = None  # 初始化
-accumulated_text = ""
-#  定义同步更新 Markdown 组件的函数
-def update_markdown(text):
-    global markdown_component
-    if markdown_component:
-        markdown_component.update(value=text)
-# 创建 streamer 函数 (保持原有架构)
-def streamer(subword):
-    global accumulated_text
-    accumulated_text += subword
-    print(subword, end='', flush=True)  # 保留打印到控制台
-    #  使用线程来异步更新 Markdown 组件
-    threading.Thread(target=update_markdown, args=(accumulated_text,)).start() # 异步更新 UI
-    return ov_genai.StreamingStatus.RUNNING
 def generate_response(prompt, model_name):
-    global pipe, tokenizer, markdown_component, accumulated_text  # Access the global variables
     model_path = model_name
     print(f"Switching to model: {model_name}")
-    try:
-        pipe = ov_genai.LLMPipeline(model_path, device)
-        tokenizer = pipe.get_tokenizer()
-        tokenizer.set_chat_template(tokenizer.chat_template)
-    except Exception as e:
-        print(f"Error initializing pipeline: {e}")
-        return "初始化推理管線錯誤", "生成回應時發生錯誤", ""  # 初始化失败时返回
-    accumulated_text = "" # 重置
-    if markdown_component:
-        markdown_component.update(value="") # 清空上一次的输出
     try:
-        #generated = pipe.generate([prompt], max_length=1024)
-        pipe.start_chat()
-        pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
-        pipe.finish_chat()
-        tokenpersec=f'{pipe.perf_metrics.get_throughput().mean:.2f}'
-        return f"**{tokenpersec} tokens/sec**", accumulated_text #tokenpersec, accumulated_text
     except Exception as e:
-        print(f"Error during generation: {e}")
-        return "發生錯誤", f"生成回應時發生錯誤：{e}" #""
 # 建立 Gradio 介面
 model_choices = list(model_name_to_full_id.keys())
-with gr.Blocks() as demo:
-    input_text = gr.Textbox(lines=5, label="輸入提示 (Prompt)")
-    model_dropdown = gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型")
-    tokens_per_sec = gr.Markdown(label="tokens/sec") #gr.Textbox(label="tokens/sec") #md
-    markdown_component = gr.Markdown(label="回应") # 初始化 markdown_component
-    #
-    input_text.submit(fn=generate_response, inputs=[input_text, model_dropdown], outputs=[tokens_per_sec, markdown_component])
-demo.launch()

 import numpy as np
 import gradio as gr
 import re
 # 下載模型
 model_ids = [
     #"OpenVINO/Qwen3-4B-int4-ov",#不可用
     "OpenVINO/Qwen3-8B-int4-ov",
     "OpenVINO/Qwen3-14B-int4-ov",
 ]
 model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  #Create Dictionary
 for model_id in model_ids:
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
 def generate_response(prompt, model_name):
+    global pipe, tokenizer  # Access the global variables
     model_path = model_name
     print(f"Switching to model: {model_name}")
+    pipe = ov_genai.LLMPipeline(model_path, device)
+    tokenizer = pipe.get_tokenizer()
+    tokenizer.set_chat_template(tokenizer.chat_template)
     try:
+        generated = pipe.generate([prompt], max_length=1024)
+        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
+        return tokenpersec, generated
     except Exception as e:
+        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
 # 建立 Gradio 介面
 model_choices = list(model_name_to_full_id.keys())
+demo = gr.Interface(
+    fn=generate_response,
+    inputs=[
+        gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
+        gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
+    ],
+    outputs=[
+        gr.Textbox(label="tokens/sec"),
+        gr.Textbox(label="回應"),
+    ],
+    title="Qwen3 Model Inference",
+    description="基於 Qwen3 推理應用，支援思考過程分離與 GUI。"
+)
+if __name__ == "__main__":
+    demo.launch()