File size: 2,076 Bytes
259675b
9424b30
6bda22b
d45acbf
9424b30
259675b
b307fb6
6bda22b
fd95a73
 
1353c4b
d45acbf
9424b30
3e39aa5
259675b
d7d5739
 
 
 
 
 
 
 
3e39aa5
d45acbf
6bda22b
259675b
6bda22b
3e39aa5
6bda22b
 
259675b
6bda22b
3e39aa5
f123151
3e39aa5
9424b30
6bda22b
259675b
d45acbf
6bda22b
 
 
 
d45acbf
3e39aa5
 
d45acbf
cbbc528
 
 
 
 
 
 
 
3e39aa5
d7d5739
 
 
1e27ea1
 
d7d5739
 
1e27ea1
3818ee6
cbbc528
d45acbf
3e39aa5
b307fb6
d45acbf
3e39aa5
 
b307fb6
3e39aa5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time

import huggingface_hub as hf_hub
# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen3-8B-int4-ov"
model_path = "Qwen3-8B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)

pipe = ov_genai.LLMPipeline(model_path, "CPU")
#pipe.start_chat()

def start_chat():
    pipe.start_chat()
    return "✅ 開始對話!"

def finish_chat():
    pipe.finish_chat()
    return "🛑 結束對話!"
	
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
    q = queue.Queue()

    def streamer(subword):
        print(subword, end='', flush=True)
        q.put(subword)
        return ov_genai.StreamingStatus.RUNNING

    def worker():
        # 在背景 thread 中做推論
        pipe.generate([prompt], streamer=streamer, max_new_tokens=4096)
        q.put(None)  # 結束符號

    threading.Thread(target=worker).start()

    result = ""
    while True:
        token = q.get()
        if token is None:
            break
        result += token
        yield result  # 把逐步結果傳給 output textbox


with gr.Blocks(css="""
#scrollbox textarea {
    overflow-y: auto !important;
    height: 300px !important;
    resize: none;
    white-space: pre-wrap;
}
""") as demo:
    gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
	
    start_btn = gr.Button("開始對話")
    end_btn = gr.Button("結束對話")

    status_box = gr.Textbox(label="狀態", interactive=False)
    start_btn.click(fn=start_chat, outputs=status_box)
    end_btn.click(fn=finish_chat, outputs=status_box)
	
    textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
    textbox_output = gr.Textbox(label="Output", elem_id="scrollbox", lines=10)

    # 按鈕控制觸發推論
    button = gr.Button("Submit")

    # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
    button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output)

demo.launch()