Spaces:
Build error
Build error
init
Browse files- app.py +87 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import faiss
|
4 |
+
import pickle
|
5 |
+
from datasets import load_dataset
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from llama_cpp import Llama
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
# =========================
|
11 |
+
# STEP 1: 載入 Hugging Face Dataset
|
12 |
+
# =========================
|
13 |
+
dataset = load_dataset("pcreem/37", split="train")
|
14 |
+
df = dataset.to_pandas()
|
15 |
+
df.columns = df.columns.str.strip() # 清理欄位空白
|
16 |
+
|
17 |
+
def make_passage(row):
|
18 |
+
return f"""藥品名稱:{row['中文品名']}
|
19 |
+
英文品名:{row['英文品名']}
|
20 |
+
主成分:{row['主成分略述']}
|
21 |
+
劑型:{row['劑型']}
|
22 |
+
適應症:{row['適應症']}
|
23 |
+
用法用量:{row['用法用量']}
|
24 |
+
申請商:{row['申請商名稱']}
|
25 |
+
製造商:{row['製造商名稱']}
|
26 |
+
製造廠地址:{row['製造廠廠址']}
|
27 |
+
包裝:{row['包裝']}
|
28 |
+
有效日期:{row['有效日期']}
|
29 |
+
許可證字號:{row['許可證字號']}"""
|
30 |
+
|
31 |
+
df["retrieval_passage"] = df.apply(make_passage, axis=1)
|
32 |
+
passages = df["retrieval_passage"].tolist()
|
33 |
+
|
34 |
+
# =========================
|
35 |
+
# STEP 2: 建立 FAISS 檢索
|
36 |
+
# =========================
|
37 |
+
embedding_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
|
38 |
+
embeddings = embedding_model.encode(passages, show_progress_bar=True)
|
39 |
+
dimension = embeddings.shape[1]
|
40 |
+
index = faiss.IndexFlatL2(dimension)
|
41 |
+
index.add(np.array(embeddings).astype("float32"))
|
42 |
+
|
43 |
+
# =========================
|
44 |
+
# STEP 3: 載入 Llama 模型
|
45 |
+
# =========================
|
46 |
+
from huggingface_hub import hf_hub_download
|
47 |
+
|
48 |
+
model_path = hf_hub_download(
|
49 |
+
repo_id="chienweichang/Llama-3-Taiwan-8B-Instruct-GGUF",
|
50 |
+
filename="llama-3-taiwan-8B-instruct-q5_1.gguf"
|
51 |
+
)
|
52 |
+
|
53 |
+
llm = Llama(
|
54 |
+
model_path=model_path,
|
55 |
+
n_gpu_layers=35,
|
56 |
+
n_ctx=2048,
|
57 |
+
seed=42,
|
58 |
+
verbose=False,
|
59 |
+
)
|
60 |
+
|
61 |
+
# =========================
|
62 |
+
# STEP 4: 定義查詢函式
|
63 |
+
# =========================
|
64 |
+
def rag_qa(query, k=3):
|
65 |
+
query_embedding = embedding_model.encode([query])
|
66 |
+
D, I = index.search(np.array(query_embedding).astype("float32"), k=k)
|
67 |
+
top_passages = [passages[idx] for idx in I[0]]
|
68 |
+
|
69 |
+
context = "\n\n---\n\n".join(top_passages)
|
70 |
+
system_prompt = "你是一位專業藥師,根據以下藥品資料,回答使用者的問題,請用簡潔中文說明並避免虛構資訊。\n"
|
71 |
+
user_prompt = f"{system_prompt}\n以下是參考資料:\n\n{context}\n\n使用者問題:{query}"
|
72 |
+
chat_prompt = f"<|start_header_id|>user<|end_header_id|>\n{user_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
|
73 |
+
|
74 |
+
output = llm(chat_prompt, max_tokens=512, temperature=0.7, top_p=0.9, stop=["<|eot_id|>"])
|
75 |
+
answer = output["choices"][0]["text"]
|
76 |
+
return answer.strip()
|
77 |
+
|
78 |
+
# =========================
|
79 |
+
# STEP 5: Gradio 介面
|
80 |
+
# =========================
|
81 |
+
gr.Interface(
|
82 |
+
fn=rag_qa,
|
83 |
+
inputs=gr.Textbox(label="請輸入問題", placeholder="例如:感冒藥有什麼選擇?"),
|
84 |
+
outputs=gr.Textbox(label="藥師回答"),
|
85 |
+
title="台灣藥品問答系統",
|
86 |
+
description="輸入藥品相關問題,我會根據台灣合法藥品資料庫回答你!"
|
87 |
+
).launch()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sentence-transformers
|
2 |
+
faiss-cpu
|
3 |
+
llama-cpp-python
|
4 |
+
datasets
|
5 |
+
gradio
|
6 |
+
huggingface_hub
|