Spaces:
Running
Running
File size: 2,544 Bytes
259675b 9424b30 6bda22b d45acbf 9424b30 259675b b307fb6 6bda22b 766289c 145ef29 0dedb6c 5adf214 0dedb6c 1353c4b d45acbf 9424b30 3e39aa5 259675b d7d5739 b75b9d4 d7d5739 3e39aa5 d45acbf 30994f6 085daf3 6bda22b 06ea06c 6bda22b 3e39aa5 6bda22b 259675b 6bda22b 3e39aa5 06ea06c 885df1a 4a7c181 3e39aa5 9424b30 6bda22b 259675b d45acbf 6bda22b 4a7c181 d45acbf cbbc528 3e39aa5 b75b9d4 38b3f94 b75b9d4 d7d5739 1e27ea1 3818ee6 cbbc528 d45acbf 3e39aa5 b307fb6 d45acbf 3e39aa5 4a7c181 b307fb6 4a7c181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
import huggingface_hub as hf_hub
# 初始化 OpenVINO 模型
model_id = "OpenVINO/Qwen3-8B-int4-ov"
model_path = "ov"
config = openvino_genai.GenerationConfig()
config.max_new_tokens = 2048
config.top_p = 0.9;
config.top_k = 10;
hf_hub.snapshot_download(model_id, local_dir=model_path)
pipe = ov_genai.LLMPipeline(model_path, "CPU")
#pipe.start_chat()
def start_chat():
pipe.start_chat()
return "✅ 開始對話!"
def finish_chat():
pipe.finish_chat()
return "🛑 結束對話!"
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
prompt = prompt #+ " /no_think" + " 答案短且明瞭"
q = queue.Queue()
tps_result = ""
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
# 在背景 thread 中做推論
nonlocal tps_result
gen_result = pipe.generate([prompt], streamer=streamer, config=config)
tps = gen_result.perf_metrics.get_throughput().mean
tps_result = f"{tps:.2f} tokens/s"
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result,"" # 把逐步結果傳給 output textbox
yield result, tps_result
with gr.Blocks(css="""
#scrollbox textarea {
overflow-y: auto !important;
height: 300px !important;
resize: none;
white-space: pre-wrap;
}
""") as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
with gr.Row():
start_btn = gr.Button("開始對話")
end_btn = gr.Button("結束對話")
status_box = gr.Textbox(label="狀態", interactive=False)
TPS_box = gr.Textbox(label="TPS", interactive=False)
start_btn.click(fn=start_chat, outputs=status_box)
end_btn.click(fn=finish_chat, outputs=status_box)
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
textbox_output = gr.Textbox(label="Output", elem_id="scrollbox", lines=10)
# 按鈕控制觸發推論
button = gr.Button("Submit")
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
demo.launch() |