Spaces:
Running
Running
File size: 4,110 Bytes
259675b 9424b30 6bda22b d45acbf 9424b30 678e02e c58cf79 259675b b307fb6 6bda22b d576ba2 145ef29 23052cd 5202755 edd5af4 c58cf79 678e02e 259675b d7d5739 b75b9d4 d7d5739 3e39aa5 d45acbf 30994f6 c58cf79 6bda22b 06ea06c 6bda22b 3e39aa5 6bda22b 259675b 6bda22b 3e39aa5 06ea06c c58cf79 4a7c181 3e39aa5 9424b30 6bda22b 259675b d45acbf 6bda22b 4a7c181 d45acbf a402d27 3e39aa5 d2e7baa f64f4a5 fd1afe0 f64f4a5 1c0770f bd4f72b d45acbf 3e39aa5 4a7c181 b307fb6 4a7c181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
import nncf
from llama_index.core import SimpleDirectoryReader
from rank_bm25 import BM25Okapi
import jieba
import subprocess
import os
os.makedirs("./data/", exist_ok=True)
url = "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html"
output_dir = "./data/"
cmd = ["wget", "-P", output_dir, url]
try:
subprocess.run(cmd, check=True)
print("下載成功")
except subprocess.CalledProcessError as e:
print("下載失敗:", e)
import huggingface_hub as hf_hub
# 初始化 OpenVINO 模型
#model_id = "hsuwill000/BitCPM4-1B_int4_ov"
model_id = "hsuwill000/MiniCPM3-4B_int4_ov"
model_path = "ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)
config = ov_genai.GenerationConfig()
config.max_new_tokens = 4096
config.top_p = 0.9;
config.top_k = 30;
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)
# 載入文件(放 ./data 資料夾,支持多檔案)
documents = SimpleDirectoryReader("./data").load_data()
texts = [doc.get_content() for doc in documents]
# 使用 jieba 斷詞做 BM25
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
bm25 = BM25Okapi(tokenized_corpus)
def start_chat():
pipe.start_chat()
return "✅ 開始對話!"
def finish_chat():
pipe.finish_chat()
return "🛑 結束對話!"
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
prompt = prompt #+ " /no_think" + " 答案短且明瞭"
tokenized_query = list(jieba.cut(prompt))
# BM25 取得 top 3 相關文件段落
top_k = 1
doc_scores = bm25.get_scores(tokenized_query)
top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
retrieved_texts = [texts[i] for i in top_k_indices]
print("=== 檢索到的相關段落 ===")
for i, txt in enumerate(retrieved_texts, 1):
print(f"--- 段落 {i} ---\n{txt}\n")
# 拼接 prompt,避免全文貼上,只用 top3 段落
context = "\n\n".join(retrieved_texts)
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{query}\n回答:"
print("=== 最終 prompt ===")
print(final_prompt)
q = queue.Queue()
tps_result = ""
def streamer(subword):
print(subword, end='', flush=True)
q.put(subword)
return ov_genai.StreamingStatus.RUNNING
def worker():
# 在背景 thread 中做推論
nonlocal tps_result
gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
tps = gen_result.perf_metrics.get_throughput().mean
tps_result = f"{tps:.2f} tokens/s"
q.put(None) # 結束符號
threading.Thread(target=worker).start()
result = ""
while True:
token = q.get()
if token is None:
break
result += token
yield result,"" # 把逐步結果傳給 output textbox
yield result, tps_result
with gr.Blocks() as demo:
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
with gr.Row():
with gr.Column():
start_btn = gr.Button("開始對話")
end_btn = gr.Button("結束對話")
status_box = gr.Textbox(label="狀態", interactive=False)
TPS_box = gr.Textbox(label="TPS", interactive=False)
with gr.Row():
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
button = gr.Button("Submit")
textbox_output = gr.Markdown(label="robot answer:", elem_id="scroll_output")
start_btn.click(fn=start_chat, outputs=status_box)
end_btn.click(fn=finish_chat, outputs=status_box)
# 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])
demo.launch() |