Spaces:
Running
Running
import gradio as gr | |
import openvino_genai as ov_genai | |
import queue | |
import threading | |
import time | |
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig | |
import nncf | |
from llama_index.core import SimpleDirectoryReader | |
from rank_bm25 import BM25Okapi | |
import jieba | |
import requests | |
from bs4 import BeautifulSoup | |
import os | |
import huggingface_hub as hf_hub | |
# 先下載網頁並存成 .txt | |
os.makedirs("./data", exist_ok=True) | |
urls = [ | |
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000311.html", | |
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000305.html", | |
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000306.html", | |
#"https://www.cwa.gov.tw/V8/C/M/Fishery/tide_30day_MOD/T000312.html", | |
#"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/SoushenJi.txt",#too much token | |
"https://huggingface.co/spaces/hsuwill000/maxtest01/resolve/main/mirrorstory.txt", | |
] | |
for i, url in enumerate(urls): | |
resp = requests.get(url) | |
resp.encoding = 'utf-8' | |
with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f: | |
f.write(resp.text) | |
#soup = BeautifulSoup(resp.text, "html.parser") | |
#text = soup.get_text(separator="\n", strip=True) | |
#with open(f"./data/doc_{i}.txt", "w", encoding="utf-8") as f: | |
# f.write(text) | |
response = requests.get(urls[0]) | |
response.encoding = 'utf-8' # 強制設為 UTF-8 編碼 | |
story_default_text = response.text.strip() | |
# 初始化 OpenVINO 模型 | |
model_id = "hsuwill000/BitCPM4-1B_int4_ov" | |
#model_id = "hsuwill000/MiniCPM4-0.5B_int4_ov" #can't finish. | |
#model_id = "OpenVINO/Qwen3-0.6B-int4-ov" #can't finish. | |
model_path = "ov" | |
hf_hub.snapshot_download(model_id, local_dir=model_path) | |
config = ov_genai.GenerationConfig() | |
config.max_new_tokens = 1024 | |
config.top_p = 0.9 | |
config.top_k = 40 | |
config.repetition_penalty = 1.2 | |
pipe = ov_genai.LLMPipeline(model_path, "CPU") | |
pipe.get_tokenizer().set_chat_template(pipe.get_tokenizer().chat_template) | |
# 讀取剛剛存的 txt 檔案 | |
documents = SimpleDirectoryReader("./data").load_data() | |
texts = [doc.get_content() for doc in documents] | |
print("==================") | |
print(texts[0][:500]) | |
print("==================") | |
# 使用 jieba 斷詞做 BM25 | |
tokenized_corpus = [list(jieba.cut(text)) for text in texts] | |
bm25 = BM25Okapi(tokenized_corpus) | |
def start_chat(): | |
pipe.start_chat() | |
return "✅ 開始對話!" | |
def finish_chat(): | |
pipe.finish_chat() | |
return "🛑 結束對話!" | |
def generate_stream(prompt): | |
tokenized_query = list(jieba.cut(prompt)) | |
top_k = 1 | |
doc_scores = bm25.get_scores(tokenized_query) | |
top_k_indices = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i], reverse=True)[:top_k] | |
retrieved_texts = [texts[i] for i in top_k_indices] | |
print("=== 檢索到的相關段落 ===") | |
for i, txt in enumerate(retrieved_texts, 1): | |
print(f"--- 段落 {i} ---\n{txt}\n") | |
context = "\n\n".join(retrieved_texts) | |
final_prompt = f"根據以下資訊,請簡潔回答問題:\n{context}\n\n問題:{prompt}\n回答:" | |
print("=== 最終 prompt ===") | |
print(final_prompt) | |
q = queue.Queue() | |
tps_result = "" | |
def streamer(subword): | |
print(subword, end='', flush=True) | |
q.put(subword) | |
return ov_genai.StreamingStatus.RUNNING | |
def worker(): | |
nonlocal tps_result | |
gen_result = pipe.generate([final_prompt], streamer=streamer, config=config) | |
tps = gen_result.perf_metrics.get_throughput().mean | |
tps_result = f"{tps:.2f} tokens/s" | |
q.put(None) # 結束符號 | |
threading.Thread(target=worker).start() | |
result = "" | |
while True: | |
token = q.get() | |
if token is None: | |
break | |
result += token | |
yield result, "" | |
yield result, tps_result | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox") | |
with gr.Row(): | |
with gr.Column(): | |
start_btn = gr.Button("開始對話") | |
end_btn = gr.Button("結束對話") | |
status_box = gr.Textbox(label="狀態", interactive=False) | |
TPS_box = gr.Textbox(label="TPS", interactive=False) | |
with gr.Row(): | |
with gr.Column(): | |
textbox_input = gr.Textbox(label="Prompt", lines=1, placeholder="Enter prompt here...") | |
button = gr.Button("Submit") | |
textbox_output = gr.Textbox(label="robot answer:",lines=20, elem_id="scroll_output") | |
with gr.Column(): | |
StoryBox = gr.Textbox(label="Story", lines=50, placeholder="Story here...", value=story_default_text) | |
start_btn.click(fn=start_chat, outputs=status_box) | |
end_btn.click(fn=finish_chat, outputs=status_box) | |
button.click(fn=generate_stream, inputs=textbox_input, outputs=[textbox_output, TPS_box]) | |
demo.launch() | |