Spaces:
Running
Running
import gradio as gr | |
import openvino_genai as ov_genai | |
import queue | |
import threading | |
import time | |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig | |
import nncf | |
import huggingface_hub as hf_hub | |
# 初始化 OpenVINO 模型 | |
model_id = "OpenVINO/Qwen2.5-7B-Instruct-int4-ov" | |
model_path = "ov" | |
hf_hub.snapshot_download(model_id, local_dir=model_path) | |
config = ov_genai.GenerationConfig() | |
config.max_new_tokens = 4096 | |
config.top_p = 0.9; | |
config.top_k = 30; | |
pipe = ov_genai.LLMPipeline(model_path, "CPU") | |
#pipe.start_chat() | |
def start_chat(): | |
pipe.start_chat() | |
return "✅ 開始對話!" | |
def finish_chat(): | |
pipe.finish_chat() | |
return "🛑 結束對話!" | |
# 建立推論函式:使用 streamer 並回傳 generator 結果 | |
def generate_stream(prompt): | |
prompt = prompt #+ " /no_think" + " 答案短且明瞭" | |
q = queue.Queue() | |
tps_result = "" | |
def streamer(subword): | |
print(subword, end='', flush=True) | |
q.put(subword) | |
return ov_genai.StreamingStatus.RUNNING | |
def worker(): | |
# 在背景 thread 中做推論 | |
nonlocal tps_result | |
gen_result = pipe.generate([prompt], streamer=streamer, config=config) | |
tps = gen_result.perf_metrics.get_throughput().mean | |
tps_result = f"{tps:.2f} tokens/s" | |
q.put(None) # 結束符號 | |
threading.Thread(target=worker).start() | |
result = "" | |
while True: | |
token = q.get() | |
if token is None: | |
break | |
result += token | |
yield result,"" # 把逐步結果傳給 output textbox | |
yield result, tps_result | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox") | |
with gr.Row(): | |
start_btn = gr.Button("開始對話") | |
end_btn = gr.Button("結束對話") | |
status_box = gr.Textbox(label="狀態", interactive=False) | |
TPS_box = gr.Textbox(label="TPS", interactive=False) | |
start_btn.click(fn=start_chat, outputs=status_box) | |
end_btn.click(fn=finish_chat, outputs=status_box) | |
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...") | |
#textbox_output = gr.Textbox(label="Output", elem_id="scrollbox", lines=10) | |
textbox_output = gr.Markdown(min_height=30, label="robot answer:") | |
# 按鈕控制觸發推論 | |
button = gr.Button("Submit") | |
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output | |
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box]) | |
demo.launch() |