maxtest01 / app.py
hsuwill000's picture
Update app.py
1394e9d verified
raw
history blame
1.51 kB
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub
import queue
import threading
import time
# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)
pipe = ov_genai.LLMPipeline(model_path, "CPU")
#pipe.start_chat()
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
q = queue.Queue()
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
# 在背景 thread 中做推論
pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result # 把逐步結果傳給 output textbox
with gr.Blocks() as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
textbox_output = gr.Textbox(label="Output", lines=5)
# 按鈕控制觸發推論
button = gr.Button("Submit")
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output)
demo.launch()