Spaces:
Running
Running
import gradio as gr | |
import openvino_genai as ov_genai | |
import huggingface_hub as hf_hub | |
import queue | |
import threading | |
import time | |
# 初始化 OpenVINO 模型 | |
model_id = "OpenVINO/Qwen3-0.6B-int4-ov" | |
model_path = "Qwen3-0.6B-int4-ov" | |
hf_hub.snapshot_download(model_id, local_dir=model_path) | |
pipe = ov_genai.LLMPipeline(model_path, "CPU") | |
#pipe.start_chat() | |
# 建立推論函式:使用 streamer 並回傳 generator 結果 | |
def generate_stream(prompt): | |
q = queue.Queue() | |
def streamer(subword): | |
print(subword, end='', flush=True) | |
q.put(subword) | |
return ov_genai.StreamingStatus.RUNNING | |
def worker(): | |
# 在背景 thread 中做推論 | |
pipe.generate([prompt], streamer=streamer, max_new_tokens=4096) | |
q.put(None) # 結束符號 | |
threading.Thread(target=worker).start() | |
result = "" | |
while True: | |
token = q.get() | |
if token is None: | |
break | |
result += token | |
yield result # 把逐步結果傳給 output textbox | |
with gr.Blocks(css=""" | |
#scrollbox textarea { | |
overflow-y: auto !important; | |
height: 300px !important; | |
resize: none; | |
white-space: pre-wrap; | |
} | |
""") as demo: | |
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox") | |
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...") | |
textbox_output = gr.Textbox(label="Output", elem_id="scrollbox", lines=10) | |
# 按鈕控制觸發推論 | |
button = gr.Button("Submit") | |
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output | |
button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output) | |
demo.launch() | |