Spaces:
Running
Running
File size: 6,304 Bytes
28b8e02 9ddaa27 ee48c56 7c23eb0 9ddaa27 39b722c 5629bb7 6d417ec 28b8e02 41cf03d 28b8e02 6d417ec 41cf03d 6d417ec 7c23eb0 6d417ec 28b8e02 6d417ec 41cf03d 9ddaa27 6d417ec 9ddaa27 6d417ec 9ddaa27 50c341d 6d417ec 50c341d 6d417ec 50c341d 6d417ec 50c341d 6d417ec 28b8e02 cddab55 6d417ec 5629bb7 49c543f 66a3591 d3b2a2e 49c543f 5629bb7 66a3591 28b8e02 8c1aede 49c543f 66a3591 8c1aede 6d417ec 49c543f 6d417ec 28b8e02 6d417ec 8c1aede 6d417ec 66a3591 8c1aede 28b8e02 c68ca70 6d417ec 28b8e02 6d417ec 28b8e02 6d417ec 28b8e02 6d417ec 28b8e02 7c23eb0 66a3591 7c23eb0 ab848e6 7c23eb0 cddab55 7c23eb0 ef6809f 7c23eb0 28b8e02 6d417ec 28b8e02 6d417ec 28b8e02 6d417ec 28b8e02 6d417ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import gradio as gr
import time
from datetime import datetime
import pandas as pd
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
import os
from rapidfuzz import process, fuzz
from pythainlp.tokenize import word_tokenize
from pyairtable import Table
from pyairtable import Api
import pickle
import re
import unicodedata
# Setup Qdrant Client
qdrant_client = QdrantClient(
url=os.environ.get("Qdrant_url"),
api_key=os.environ.get("Qdrant_api"),
)
# Airtable Config
AIRTABLE_API_KEY = os.environ.get("airtable_api")
BASE_ID = os.environ.get("airtable_baseid")
TABLE_NAME = "Feedback_search"
api = Api(AIRTABLE_API_KEY)
table = api.table(BASE_ID, TABLE_NAME)
# Load model
model = SentenceTransformer('intfloat/multilingual-e5-small')
collection_name = "product_E5"
# Load whitelist
with open("keyword_whitelist.pkl", "rb") as f:
keyword_whitelist = pickle.load(f)
# Utils
def normalize(text: str) -> str:
text = unicodedata.normalize("NFC", text)
return text.replace("เแ", "แ").replace("เเ", "แ").strip().lower()
def smart_tokenize(text: str) -> list:
tokens = word_tokenize(text.strip(), engine="newmm")
return tokens if tokens and len("".join(tokens)) >= len(text.strip()) * 0.5 else [text.strip()]
def correct_query_merge_phrases(query: str, whitelist, threshold=80, max_ngram=3):
query_norm = normalize(query)
tokens = smart_tokenize(query_norm)
corrected = []
i = 0
while i < len(tokens):
matched = False
for n in range(min(max_ngram, len(tokens) - i), 0, -1):
phrase = "".join(tokens[i:i+n])
match, score, _ = process.extractOne(phrase, whitelist, scorer=fuzz.token_sort_ratio)
if score >= threshold:
corrected.append(match)
i += n
matched = True
break
if not matched:
corrected.append(tokens[i])
i += 1
return "".join([word for word in corrected if len(word) > 1 or word in whitelist])
# Global state
latest_query_result = {"query": "", "result": "", "raw_query": "", "time": ""}
# Main Search
def search_product(query):
start_time = time.time()
latest_query_result["raw_query"] = query
corrected_query = correct_query_merge_phrases(query, keyword_whitelist)
query_embed = model.encode("query: " + corrected_query)
try:
result = qdrant_client.query_points(
collection_name=collection_name,
query=query_embed.tolist(),
with_payload=True,
query_filter=Filter(must=[FieldCondition(key="type", match=MatchValue(value="product"))]),
limit=50
).points
except Exception as e:
return f"<p>❌ Qdrant error: {str(e)}</p>"
elapsed = time.time() - start_time
html_output = f"<p>⏱ <strong>{elapsed:.2f} วินาที</strong></p>"
if corrected_query != query:
html_output += f"<p>🔧 แก้คำค้นจาก: <code>{query}</code> → <code>{corrected_query}</code></p>"
html_output += '<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: 20px;">'
result_summary, found = "", False
for res in result:
if res.score > 0.8:
found = True
name = res.payload.get("name", "ไม่ทราบชื่อสินค้า")
score = f"{res.score:.4f}"
img_url = res.payload.get("imageUrl", "")
price = res.payload.get("price", "ไม่ระบุ")
brand = res.payload.get("brand", "")
html_output += f"""
<div style="border: 1px solid #ddd; border-radius: 8px; padding: 10px; text-align: center; box-shadow: 1px 1px 5px rgba(0,0,0,0.1); background: #fff;">
<img src="{img_url}" style="width: 100%; max-height: 150px; object-fit: contain; border-radius: 4px;">
<div style="margin-top: 10px;">
<div style="font-weight: bold; font-size: 14px;">{name}</div>
<div style="color: gray; font-size: 12px;">{brand}</div>
<div style="color: green; margin: 4px 0;">฿{price}</div>
<div style="font-size: 12px; color: #555;">score: {score}</div>
</div>
</div>
"""
result_summary += f"{name} (score: {score}) | "
html_output += "</div>"
if not found:
html_output += '<div style="text-align: center; font-size: 18px; color: #a00; padding: 30px;">❌ ไม่พบสินค้าที่เกี่ยวข้องกับคำค้นนี้</div>'
return html_output
latest_query_result.update({
"query": corrected_query,
"result": result_summary.strip(),
"time": elapsed,
})
return html_output
# Feedback logging
def log_feedback(feedback):
try:
now = datetime.now().strftime("%Y-%m-%d")
table.create({
"model": "E5 (intfloat/multilingual-e5-small)",
"timestamp": now,
"raw_query": latest_query_result["raw_query"],
"query": latest_query_result["query"],
"result": latest_query_result["result"],
"time(second)": latest_query_result["time"],
"feedback": feedback
})
return "✅ Feedback saved to Airtable!"
except Exception as e:
return f"❌ Failed to save feedback: {str(e)}"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🔎 Product Semantic Search (Vector Search + Qdrant)")
query_input = gr.Textbox(label="พิมพ์คำค้นหา")
result_output = gr.HTML(label="📋 ผลลัพธ์")
with gr.Row():
match_btn = gr.Button("✅ ตรง")
not_match_btn = gr.Button("❌ ไม่ตรง")
feedback_status = gr.Textbox(label="📬 สถานะ Feedback")
query_input.submit(search_product, inputs=[query_input], outputs=result_output)
match_btn.click(lambda: log_feedback("match"), outputs=feedback_status)
not_match_btn.click(lambda: log_feedback("not_match"), outputs=feedback_status)
# Run
demo.launch(share=True)
|