File size: 1,462 Bytes
259675b
9424b30
5ae289b
6bda22b
d45acbf
9424b30
259675b
6bda22b
1353c4b
 
 
d45acbf
9424b30
 
259675b
d45acbf
 
6bda22b
259675b
6bda22b
 
 
259675b
6bda22b
d45acbf
 
 
9424b30
6bda22b
259675b
d45acbf
6bda22b
 
 
 
d45acbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub
import queue
import threading
import time

# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)

pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.start_chat()

# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
    q = queue.Queue()

    def streamer(subword):
        q.put(subword)
        return ov_genai.StreamingStatus.RUNNING

    def worker():
        # 在背景 thread 中做推論
        pipe.generate([prompt], streamer=streamer, max_new_tokens=512)
        q.put(None)  # 結束符號

    threading.Thread(target=worker).start()

    result = ""
    while True:
        token = q.get()
        if token is None:
            break
        result += token
        yield result  # 把逐步結果傳給 output textbox


with gr.Blocks() as demo:
    gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")

    textbox_input = gr.Textbox(label="Prompt", lines=3, placeholder="Enter prompt here...")
    textbox_output = gr.Textbox(label="Output", lines=10)

    # 按鈕控制觸發推論
    button = gr.Button("Submit")

    # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
    button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output)

demo.launch()