File size: 2,016 Bytes
07268e3
 
1dd12ee
07268e3
 
 
4b02b47
 
 
 
 
 
1dd12ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e96eae
4b02b47
1dd12ee
 
 
 
 
 
 
 
 
 
4b02b47
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import huggingface_hub as hf_hub
import time
import openvino_genai as ov_genai
import numpy as np
import gradio as gr
import re

model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"

hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)


pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.start_chat()  # 初始化聊天狀態

def generate(prompt, history):
    """
    與 LLM 互動,並使用 `yield` 串流輸出回應。
    """
    global pipe  # 允許修改全域的 pipeline 物件
    full_response = ""

    def streamer(subword):
        nonlocal full_response  # 允許修改封閉範圍的 full_response 變數
        full_response += subword
        yield full_response  # 逐步產生回應
        return ov_genai.StreamingStatus.RUNNING

    # Gradio 會傳入 history,所以我們用它建立 prompts
    # 如果需要更複雜的prompt 處理,可以在這裡添加
    # 例如: 建立 system prompt, 用過去的對話建立完整的prompts

    for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
       yield value # Streamer 已經在yield full_response了,這裡只需要把streamer的產出再次yield出去

    # 結束生成後,可以添加一些邏輯,例如記錄對話或更新狀態
    # ...

def on_close():
    global pipe
    pipe.finish_chat() # 在應用結束時清理pipeline
    print("Chat finished and pipeline closed.")


if __name__ == "__main__":
    demo = gr.ChatInterface(
        generate,
        chatbot=gr.Chatbot(height=300),
        textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
        title="LLM 串流輸出範例 (OpenVINO)",
        description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
        theme="soft",
        examples=["你好", "請自我介紹一下", "今天的氣候如何?"],
    )
    demo.close(on_close) # 添加在應用關閉時的清理函數
    demo.launch()