File size: 1,516 Bytes
259675b
9424b30
5ae289b
6bda22b
d45acbf
9424b30
259675b
6bda22b
1353c4b
 
 
d45acbf
9424b30
 
259675b
d45acbf
 
6bda22b
259675b
6bda22b
afcd897
6bda22b
 
259675b
6bda22b
d45acbf
f123151
d45acbf
9424b30
6bda22b
259675b
d45acbf
6bda22b
 
 
 
d45acbf
 
 
 
 
 
 
3818ee6
a781bb8
d45acbf
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub
import queue
import threading
import time

# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)

pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.start_chat()

# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
    q = queue.Queue()

    def streamer(subword):
        print(subword, end='', flush=True)
        q.put(subword)
        return ov_genai.StreamingStatus.RUNNING

    def worker():
        # 在背景 thread 中做推論
        pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
        q.put(None)  # 結束符號

    threading.Thread(target=worker).start()

    result = ""
    while True:
        token = q.get()
        if token is None:
            break
        result += token
        yield result  # 把逐步結果傳給 output textbox


with gr.Blocks() as demo:
    gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")

    textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
    textbox_output = gr.Textbox(label="Output", lines=10,live=True)

    # 按鈕控制觸發推論
    button = gr.Button("Submit")

    # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
    button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output)

demo.launch()