File size: 4,110 Bytes
259675b
9424b30
6bda22b
d45acbf
9424b30
678e02e
 
c58cf79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259675b
b307fb6
6bda22b
d576ba2
 
145ef29
23052cd
 
 
 
 
 
 
5202755
edd5af4
c58cf79
 
 
 
 
 
 
 
 
678e02e
259675b
d7d5739
 
 
 
 
 
 
b75b9d4
 
d7d5739
3e39aa5
d45acbf
30994f6
c58cf79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bda22b
06ea06c
6bda22b
3e39aa5
6bda22b
 
259675b
6bda22b
3e39aa5
06ea06c
c58cf79
4a7c181
 
3e39aa5
9424b30
6bda22b
259675b
d45acbf
6bda22b
 
 
 
4a7c181
 
 
d45acbf
a402d27
3e39aa5
d2e7baa
f64f4a5
 
 
 
fd1afe0
f64f4a5
 
 
 
 
1c0770f
bd4f72b
 
d45acbf
3e39aa5
4a7c181
b307fb6
4a7c181
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
import openvino_genai as ov_genai
import queue
import threading
import time
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
import nncf
from llama_index.core import SimpleDirectoryReader
from rank_bm25 import BM25Okapi
import jieba

import subprocess
import os
os.makedirs("./data/", exist_ok=True)
url = "https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html"
output_dir = "./data/"
cmd = ["wget", "-P", output_dir, url]

try:
    subprocess.run(cmd, check=True)
    print("下載成功")
except subprocess.CalledProcessError as e:
    print("下載失敗:", e)

import huggingface_hub as hf_hub
# 初始化 OpenVINO 模型
#model_id = "hsuwill000/BitCPM4-1B_int4_ov"
model_id = "hsuwill000/MiniCPM3-4B_int4_ov"
model_path = "ov"
hf_hub.snapshot_download(model_id, local_dir=model_path)

config = ov_genai.GenerationConfig()
config.max_new_tokens = 4096
config.top_p = 0.9;
config.top_k = 30;


pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template)

# 載入文件(放 ./data 資料夾,支持多檔案)
documents = SimpleDirectoryReader("./data").load_data()
texts = [doc.get_content() for doc in documents]

# 使用 jieba 斷詞做 BM25
tokenized_corpus = [list(jieba.cut(text)) for text in texts]
bm25 = BM25Okapi(tokenized_corpus)


def start_chat():
    pipe.start_chat()
    return "✅ 開始對話!"

def finish_chat():
    pipe.finish_chat()
    return "🛑 結束對話!"


	
# 建立推論函式:使用 streamer 並回傳 generator 結果
def generate_stream(prompt):
    prompt = prompt #+ " /no_think" + " 答案短且明瞭"
    tokenized_query = list(jieba.cut(prompt))
    
    # BM25 取得 top 3 相關文件段落
    top_k = 1
    doc_scores = bm25.get_scores(tokenized_query)
    top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k]
    
    retrieved_texts = [texts[i] for i in top_k_indices]
    
    print("=== 檢索到的相關段落 ===")
    for i, txt in enumerate(retrieved_texts, 1):
        print(f"--- 段落 {i} ---\n{txt}\n")
    
    # 拼接 prompt,避免全文貼上,只用 top3 段落
    context = "\n\n".join(retrieved_texts)
    final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{query}\n回答:"
    
    print("=== 最終 prompt ===")
    print(final_prompt)
    
    q = queue.Queue()
    tps_result = ""
    def streamer(subword):
        print(subword, end='', flush=True)
        q.put(subword)
        return ov_genai.StreamingStatus.RUNNING

    def worker():
        # 在背景 thread 中做推論
        nonlocal tps_result
        gen_result = pipe.generate([final_prompt], streamer=streamer, config=config)
        tps = gen_result.perf_metrics.get_throughput().mean
        tps_result = f"{tps:.2f} tokens/s"        
        q.put(None)  # 結束符號

    threading.Thread(target=worker).start()

    result = ""
    while True:
        token = q.get()
        if token is None:
            break
        result += token
        yield result,""  # 把逐步結果傳給 output textbox
    yield result, tps_result

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
    with gr.Row():
        with gr.Column():
            start_btn = gr.Button("開始對話")
            end_btn = gr.Button("結束對話")
        status_box = gr.Textbox(label="狀態", interactive=False)
        TPS_box = gr.Textbox(label="TPS", interactive=False)
    with gr.Row():
        textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...")
        button = gr.Button("Submit")
        
    textbox_output = gr.Markdown(label="robot answer:", elem_id="scroll_output")

    start_btn.click(fn=start_chat, outputs=status_box)
    end_btn.click(fn=finish_chat, outputs=status_box)   

    # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
    button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box])

demo.launch()