Spaces:
Running
Running
use only model bge
Browse files
app.py
CHANGED
@@ -34,20 +34,9 @@ with open("keyword_whitelist.pkl", "rb") as f:
|
|
34 |
keyword_whitelist = pickle.load(f)
|
35 |
|
36 |
# Preload Models
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
"collection": "product_E5_finetune",
|
41 |
-
"threshold": 0.8,
|
42 |
-
"prefix": "query: "
|
43 |
-
},
|
44 |
-
"BGE M3": {
|
45 |
-
"model": SentenceTransformer("BAAI/bge-m3"),
|
46 |
-
"collection": "product_bge-m3",
|
47 |
-
"threshold": 0.45,
|
48 |
-
"prefix": ""
|
49 |
-
}
|
50 |
-
}
|
51 |
|
52 |
reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
|
53 |
|
@@ -105,21 +94,14 @@ def correct_query_merge_phrases(query: str, whitelist, threshold=80, max_ngram=3
|
|
105 |
latest_query_result = {"query": "", "result": "", "raw_query": "", "time": ""}
|
106 |
|
107 |
# Search Function
|
108 |
-
def search_product(query
|
109 |
start_time = time.time()
|
110 |
latest_query_result["raw_query"] = query
|
111 |
|
112 |
-
selected = models[model_choice]
|
113 |
-
model = selected["model"]
|
114 |
-
collection_name = selected["collection"]
|
115 |
-
threshold = selected["threshold"]
|
116 |
-
prefix = selected["prefix"]
|
117 |
-
|
118 |
corrected_query = correct_query_merge_phrases(query, keyword_whitelist)
|
119 |
-
query_embed = model.encode(
|
120 |
|
121 |
try:
|
122 |
-
# 🔍 ดึง top-50 ก่อน rerank
|
123 |
result = qdrant_client.query_points(
|
124 |
collection_name=collection_name,
|
125 |
query=query_embed.tolist(),
|
@@ -130,14 +112,12 @@ def search_product(query, model_choice):
|
|
130 |
except Exception as e:
|
131 |
return f"<p>❌ Qdrant error: {str(e)}</p>"
|
132 |
|
133 |
-
# ✅ Rerank Top 10
|
134 |
-
if
|
135 |
topk = 10
|
136 |
docs = [r.payload.get("name", "") for r in result[:topk]]
|
137 |
pairs = [[corrected_query, d] for d in docs]
|
138 |
scores = reranker.compute_score(pairs, normalize=True)
|
139 |
-
|
140 |
-
# ผสมคะแนน: 0.6 จาก embedding, 0.4 จาก reranker
|
141 |
result[:topk] = sorted(
|
142 |
zip(result[:topk], scores),
|
143 |
key=lambda x: 0.6 * x[0].score + 0.4 * x[1],
|
@@ -189,11 +169,11 @@ def search_product(query, model_choice):
|
|
189 |
return html_output
|
190 |
|
191 |
# Feedback Function
|
192 |
-
def log_feedback(feedback
|
193 |
try:
|
194 |
now = datetime.now().strftime("%Y-%m-%d")
|
195 |
table.create({
|
196 |
-
"model":
|
197 |
"timestamp": now,
|
198 |
"raw_query": latest_query_result["raw_query"],
|
199 |
"query": latest_query_result["query"],
|
@@ -207,12 +187,9 @@ def log_feedback(feedback, model_choice):
|
|
207 |
|
208 |
# Gradio UI
|
209 |
with gr.Blocks() as demo:
|
210 |
-
gr.Markdown("## 🔎 Product Semantic Search (
|
211 |
-
|
212 |
-
with gr.Row():
|
213 |
-
model_selector = gr.Dropdown(label="🔍 เลือกโมเดล", choices=list(models.keys()), value="E5 Finetuned")
|
214 |
-
query_input = gr.Textbox(label="พิมพ์คำค้นหา")
|
215 |
|
|
|
216 |
result_output = gr.HTML(label="📋 ผลลัพธ์")
|
217 |
|
218 |
with gr.Row():
|
@@ -221,8 +198,8 @@ with gr.Blocks() as demo:
|
|
221 |
|
222 |
feedback_status = gr.Textbox(label="📬 สถานะ Feedback")
|
223 |
|
224 |
-
query_input.submit(search_product, inputs=[query_input
|
225 |
-
match_btn.click(fn=lambda
|
226 |
-
not_match_btn.click(fn=lambda
|
227 |
|
228 |
demo.launch(share=True)
|
|
|
34 |
keyword_whitelist = pickle.load(f)
|
35 |
|
36 |
# Preload Models
|
37 |
+
model = SentenceTransformer("BAAI/bge-m3")
|
38 |
+
collection_name = "product_bge-m3"
|
39 |
+
threshold = 0.45
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
|
42 |
|
|
|
94 |
latest_query_result = {"query": "", "result": "", "raw_query": "", "time": ""}
|
95 |
|
96 |
# Search Function
|
97 |
+
def search_product(query):
|
98 |
start_time = time.time()
|
99 |
latest_query_result["raw_query"] = query
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
corrected_query = correct_query_merge_phrases(query, keyword_whitelist)
|
102 |
+
query_embed = model.encode(corrected_query)
|
103 |
|
104 |
try:
|
|
|
105 |
result = qdrant_client.query_points(
|
106 |
collection_name=collection_name,
|
107 |
query=query_embed.tolist(),
|
|
|
112 |
except Exception as e:
|
113 |
return f"<p>❌ Qdrant error: {str(e)}</p>"
|
114 |
|
115 |
+
# ✅ Rerank Top 10
|
116 |
+
if len(result) > 0:
|
117 |
topk = 10
|
118 |
docs = [r.payload.get("name", "") for r in result[:topk]]
|
119 |
pairs = [[corrected_query, d] for d in docs]
|
120 |
scores = reranker.compute_score(pairs, normalize=True)
|
|
|
|
|
121 |
result[:topk] = sorted(
|
122 |
zip(result[:topk], scores),
|
123 |
key=lambda x: 0.6 * x[0].score + 0.4 * x[1],
|
|
|
169 |
return html_output
|
170 |
|
171 |
# Feedback Function
|
172 |
+
def log_feedback(feedback):
|
173 |
try:
|
174 |
now = datetime.now().strftime("%Y-%m-%d")
|
175 |
table.create({
|
176 |
+
"model": "BGE M3",
|
177 |
"timestamp": now,
|
178 |
"raw_query": latest_query_result["raw_query"],
|
179 |
"query": latest_query_result["query"],
|
|
|
187 |
|
188 |
# Gradio UI
|
189 |
with gr.Blocks() as demo:
|
190 |
+
gr.Markdown("## 🔎 Product Semantic Search (BGE M3 + Qdrant)")
|
|
|
|
|
|
|
|
|
191 |
|
192 |
+
query_input = gr.Textbox(label="พิมพ์คำค้นหา")
|
193 |
result_output = gr.HTML(label="📋 ผลลัพธ์")
|
194 |
|
195 |
with gr.Row():
|
|
|
198 |
|
199 |
feedback_status = gr.Textbox(label="📬 สถานะ Feedback")
|
200 |
|
201 |
+
query_input.submit(search_product, inputs=[query_input], outputs=result_output)
|
202 |
+
match_btn.click(fn=lambda: log_feedback("match"), outputs=feedback_status)
|
203 |
+
not_match_btn.click(fn=lambda: log_feedback("not_match"), outputs=feedback_status)
|
204 |
|
205 |
demo.launch(share=True)
|