Spaces:
Running
Running
File size: 1,516 Bytes
259675b 9424b30 5ae289b 6bda22b d45acbf 9424b30 259675b 6bda22b 1353c4b d45acbf 9424b30 259675b d45acbf 6bda22b 259675b 6bda22b afcd897 6bda22b 259675b 6bda22b d45acbf f123151 d45acbf 9424b30 6bda22b 259675b d45acbf 6bda22b d45acbf 3818ee6 a781bb8 d45acbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub
import queue
import threading
import time
# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.start_chat()
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
q = queue.Queue()
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
# 在背景 thread 中做推論
pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result # 把逐步結果傳給 output textbox
with gr.Blocks() as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
textbox_output = gr.Textbox(label="Output", lines=10,live=True)
# 按鈕控制觸發推論
button = gr.Button("Submit")
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output)
demo.launch()
|