import gradio as gr import openvino_genai as ov_genai import queue import threading import time import huggingface_hub as hf_hub # 初始化 OpenVINO 模型 model_id = "OpenVINO/Qwen3-0.6B-int4-ov" model_path = "Qwen3-0.6B-int4-ov" #model_id = "OpenVINO/Phi-4-mini-instruct-int4-ov" #model_path = "Phi-4-mini-instruct-int4-ov" hf_hub.snapshot_download(model_id, local_dir=model_path) pipe = ov_genai.LLMPipeline(model_path, "CPU") #pipe.start_chat() def start_chat(): pipe.start_chat() return "✅ 開始對話!" def finish_chat(): pipe.finish_chat() return "🛑 結束對話!" # 建立推論函式:使用 streamer 並回傳 generator 結果 def generate_stream(prompt): q = queue.Queue() tps_result = "" def streamer(subword): print(subword, end='', flush=True) q.put(subword) return ov_genai.StreamingStatus.RUNNING def worker(): # 在背景 thread 中做推論 nonlocal tps_result prompt = prompt + "/no_think" gen_result = pipe.generate([prompt], streamer=streamer, max_new_tokens=32767) tps = gen_result.perf_metrics.get_throughput().mean tps_result = f"{tps:.2f} tokens/s" q.put(None) # 結束符號 threading.Thread(target=worker).start() result = "" while True: token = q.get() if token is None: break result += token yield result,"" # 把逐步結果傳給 output textbox yield result, tps_result with gr.Blocks(css=""" #scrollbox textarea { overflow-y: auto !important; height: 300px !important; resize: none; white-space: pre-wrap; } """) as demo: gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox") with gr.Row(): start_btn = gr.Button("開始對話") end_btn = gr.Button("結束對話") status_box = gr.Textbox(label="狀態",max_lines=1, max_length=1, interactive=False) TPS_box = gr.Textbox(label="TPS", interactive=False) start_btn.click(fn=start_chat, outputs=status_box) end_btn.click(fn=finish_chat, outputs=status_box) textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...") textbox_output = gr.Textbox(label="Output", elem_id="scrollbox", lines=10) # 按鈕控制觸發推論 button = gr.Button("Submit") # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box]) demo.launch()