Spaces:
Running
Running
import huggingface_hub as hf_hub | |
import time | |
import openvino_genai as ov_genai | |
import numpy as np | |
import gradio as gr | |
import re | |
model_id = "OpenVINO/Qwen3-0.6B-int4-ov" | |
model_path = "Qwen3-0.6B-int4-ov" | |
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False) | |
pipe = ov_genai.LLMPipeline(model_path, "CPU") | |
pipe.start_chat() # 初始化聊天狀態 | |
def generate(prompt, history): | |
""" | |
與 LLM 互動,並使用 `yield` 串流輸出回應。 | |
""" | |
global pipe # 允許修改全域的 pipeline 物件 | |
full_response = "" | |
def streamer(subword): | |
nonlocal full_response # 允許修改封閉範圍的 full_response 變數 | |
full_response += subword | |
yield full_response # 逐步產生回應 | |
return ov_genai.StreamingStatus.RUNNING | |
# Gradio 會傳入 history,所以我們用它建立 prompts | |
# 如果需要更複雜的prompt 處理,可以在這裡添加 | |
# 例如: 建立 system prompt, 用過去的對話建立完整的prompts | |
for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100): | |
yield value # Streamer 已經在yield full_response了,這裡只需要把streamer的產出再次yield出去 | |
# 結束生成後,可以添加一些邏輯,例如記錄對話或更新狀態 | |
# ... | |
def on_close(): | |
global pipe | |
pipe.finish_chat() # 在應用結束時清理pipeline | |
print("Chat finished and pipeline closed.") | |
if __name__ == "__main__": | |
demo = gr.ChatInterface( | |
generate, | |
chatbot=gr.Chatbot(height=300), | |
textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7), | |
title="LLM 串流輸出範例 (OpenVINO)", | |
description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。", | |
theme="soft", | |
examples=["你好", "請自我介紹一下", "今天的氣候如何?"], | |
) | |
demo.close(on_close) # 添加在應用關閉時的清理函數 | |
demo.launch() |