maxtest01 / app.py
hsuwill000's picture
Update app.py
f0109a7 verified
raw
history blame
2.57 kB
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
import nncf
import huggingface_hub as hf_hub
# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen2.5-7B-Instruct-int4-ov"
model_path = "ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)
config = ov_genai.GenerationConfig()
config.max_new_tokens = 4096
config.top_p = 0.9;
config.top_k = 30;
pipe = ov_genai.LLMPipeline(model_path, "CPU")
#pipe.start_chat()
def start_chat():
pipe.start_chat()
return "✅ 開始對話!"
def finish_chat():
pipe.finish_chat()
return "🛑 結束對話!"
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
prompt = prompt #+ " /no_think" + " 答案短且明瞭"
q = queue.Queue()
tps_result = ""
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
# 在背景 thread 中做推論
nonlocal tps_result
gen_result = pipe.generate([prompt], streamer=streamer, config=config)
tps = gen_result.perf_metrics.get_throughput().mean
tps_result = f"{tps:.2f} tokens/s"
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result,"" # 把逐步結果傳給 output textbox
yield result, tps_result
with gr.Blocks() as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
with gr.Row():
start_btn = gr.Button("開始對話")
end_btn = gr.Button("結束對話")
status_box = gr.Textbox(label="狀態", interactive=False)
TPS_box = gr.Textbox(label="TPS", interactive=False)
start_btn.click(fn=start_chat, outputs=status_box)
end_btn.click(fn=finish_chat, outputs=status_box)
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
#textbox_output = gr.Textbox(label="Output", elem_id="scrollbox", lines=10)
textbox_output = gr.Markdown(min_height=30, label="robot answer:")
# 按鈕控制觸發推論
button = gr.Button("Submit")
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
demo.launch()