Spaces:
Running
Running
File size: 5,339 Bytes
28b8e02 49c543f 5629bb7 28b8e02 5e7c746 28b8e02 5629bb7 28b8e02 49c543f 14cda9a 49c543f 28b8e02 5629bb7 28b8e02 49c543f 5629bb7 49c543f 5629bb7 49c543f 28b8e02 49c543f 28b8e02 49c543f 28b8e02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
import time
from datetime import datetime
import pandas as pd
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
import os
from symspellpy.symspellpy import SymSpell, Verbosity
qdrant_client = QdrantClient(
url=os.environ.get("Qdrant_url"),
api_key=os.environ.get("Qdrant_api"),
)
# โมเดลที่โหลดล่วงหน้า
models = {
"E5 (intfloat/multilingual-e5-small)": SentenceTransformer('intfloat/multilingual-e5-small'),
"E5 large instruct (multilingual-e5-large-instruct)": SentenceTransformer("intfloat/multilingual-e5-large-instruct"),
"Kalm (KaLM-embedding-multilingual-mini-v1)": SentenceTransformer('HIT-TMG/KaLM-embedding-multilingual-mini-v1')
}
model_config = {
"E5 (intfloat/multilingual-e5-small)": {
"func": lambda query: models["E5 (intfloat/multilingual-e5-small)"].encode("query: " + query),
"collection": "product_E5"
},
"E5 large instruct (multilingual-e5-large-instruct)": {
"func": lambda query: models["E5 large instruct (multilingual-e5-large-instruct)"].encode(
"Instruct: Given a product search query, retrieve relevant product listings\nQuery: " + query, convert_to_tensor=False, normalize_embeddings=True),
"collection": "product_E5_large_instruct"
},
"Kalm (KaLM-embedding-multilingual-mini-v1)": {
"func": lambda query: models["Kalm (KaLM-embedding-multilingual-mini-v1)"].encode(query, normalize_embeddings=True),
"collection": "product_kalm"
}
}
# Global memory to hold feedback state
latest_query_result = {"query": "", "result": "", "model": ""}
symspell = SymSpell(max_dictionary_edit_distance=2)
symspell.load_dictionary("symspell_dict_pythainlp.txt", term_index=0, count_index=1)
# แก้คำผิด
def correct_query_with_symspell(query: str) -> str:
# ถ้า query มีคำเดียว ใช้ lookup ปกติ
if len(query.strip().split()) == 1:
suggestions = symspell.lookup(query, Verbosity.CLOSEST, max_edit_distance=2)
else:
suggestions = symspell.lookup_compound(query, 2)
if suggestions:
return suggestions[0].term
return query
# 🌟 Main search function
def search_product(query, model_name):
start_time = time.time()
if model_name not in model_config:
return "❌ ไม่พบโมเดล"
# ✨ แทรกขั้นตอน fuzzy correction
corrected_query = correct_query_with_symspell(query)
query_embed = model_config[model_name]["func"](corrected_query)
collection_name = model_config[model_name]["collection"]
try:
result = qdrant_client.query_points(
collection_name=collection_name,
query=query_embed.tolist(),
with_payload=True,
query_filter=Filter(
must=[FieldCondition(key="type", match=MatchValue(value="product"))]
),
limit=10
).points
except Exception as e:
return f"❌ Qdrant error: {str(e)}"
elapsed = time.time() - start_time
output = f"⏱ Time: {elapsed:.2f}s\n"
if corrected_query != query:
output += f"🔧 แก้คำค้นจาก: `{query}` → `{corrected_query}`\n\n"
output += f"📦 ผลลัพธ์:\n"
result_summary = ""
for res in result:
line = f"- {res.payload.get('name', '')} (score: {res.score:.4f})"
output += line + "\n"
result_summary += line + " | "
latest_query_result["query"] = corrected_query
latest_query_result["result"] = result_summary.strip()
latest_query_result["model"] = model_name
return output
# 📝 Logging feedback
def log_feedback(feedback):
now = datetime.now().isoformat()
log_entry = {
"timestamp": now,
"model": latest_query_result["model"],
"query": latest_query_result["query"],
"result": latest_query_result["result"],
"feedback": feedback
}
df = pd.DataFrame([log_entry])
df.to_csv("feedback_log.csv", mode='a', header=not pd.io.common.file_exists("feedback_log.csv"), index=False)
return f"✅ Feedback saved: {feedback}"
# 🎨 Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🔎 Product Semantic Search (Vector Search + Qdrant)")
with gr.Row():
model_selector = gr.Dropdown(
choices=list(models.keys()),
label="เลือกโมเดล",
value="E5 (intfloat/multilingual-e5-small)"
)
query_input = gr.Textbox(label="พิมพ์คำค้นหา")
result_output = gr.Textbox(label="📋 ผลลัพธ์")
with gr.Row():
match_btn = gr.Button("✅ ตรง")
not_match_btn = gr.Button("❌ ไม่ตรง")
feedback_status = gr.Textbox(label="📬 สถานะ Feedback")
# Events
submit_fn = lambda q, m: search_product(q, m)
query_input.submit(submit_fn, inputs=[query_input, model_selector], outputs=result_output)
match_btn.click(lambda: log_feedback("match"), outputs=feedback_status)
not_match_btn.click(lambda: log_feedback("not_match"), outputs=feedback_status)
# Run app
demo.launch(share=True) |