Spaces:

sibthinon
/

environment

Running

App Files Files Community

environment / app.py

sibthinon

fix rapidfuzz partial ratio

4ccade9 verified 17 days ago

raw

history blame

6.88 kB

	import gradio as gr
	import time
	from datetime import datetime
	from sentence_transformers import SentenceTransformer
	from qdrant_client import QdrantClient
	from qdrant_client.models import Filter, FieldCondition, MatchValue
	import os
	from rapidfuzz import fuzz
	from pyairtable import Table
	from pyairtable import Api
	import re
	import unicodedata

	# Setup Qdrant Client
	qdrant_client = QdrantClient(
	url=os.environ.get("Qdrant_url"),
	api_key=os.environ.get("Qdrant_api"),
	timeout=30.0
	)

	# Airtable Config
	AIRTABLE_API_KEY = os.environ.get("airtable_api")
	BASE_ID = os.environ.get("airtable_baseid")
	TABLE_NAME = "Feedback_search"
	api = Api(AIRTABLE_API_KEY)
	table = api.table(BASE_ID, TABLE_NAME)

	# Preload Models
	model = SentenceTransformer("BAAI/bge-m3")
	collection_name = "product_bge-m3"
	threshold = 0.45

	# Utils
	def is_non_thai(text):
	return re.match(r'^[A-Za-z0-9&\-\s]+$', text) is not None

	def normalize(text: str) -> str:
	if is_non_thai(text):
	return text.strip()
	text = unicodedata.normalize("NFC", text)
	return text.replace("เแ", "แ").replace("เเ", "แ").strip().lower()

	# Global state
	latest_query_result = {"query": "", "result": "", "raw_query": "", "time": ""}

	# Search Function
	def search_product(query):
	yield gr.update(value="🔄 กำลังค้นหา..."), ""

	start_time = time.time()
	latest_query_result["raw_query"] = query

	corrected_query = normalize(query)
	query_embed = model.encode(corrected_query)

	try:
	result = qdrant_client.query_points(
	collection_name=collection_name,
	query=query_embed.tolist(),
	with_payload=True,
	query_filter=Filter(must=[FieldCondition(key="type", match=MatchValue(value="product"))]),
	limit=50
	).points
	except Exception as e:
	yield gr.update(value="❌ Qdrant error"), f"<p>❌ Qdrant error: {str(e)}</p>"
	return

	if len(result) > 0:
	topk = 50 # ดึงมา rerank แค่ 50 อันดับแรกจาก Qdrant
	result = result[:topk]

	scored = []
	for r in result:
	name = r.payload.get("name", "")

	# ถ้า query สั้นเกินไป ให้ fuzzy_score = 0 เพื่อกันเพี้ยน
	if len(corrected_query) >= 3 and name:
	fuzzy_score = fuzz.partial_ratio(corrected_query, name) / 100.0
	else:
	fuzzy_score = 0.0
	# รวม hybrid score
	if fuzzy_score < 0.5:
	hybrid_score = r.score
	else:
	hybrid_score = 0.7 * r.score + 0.3 * fuzzy_score
	r.payload["score"] = hybrid_score # เก็บลง payload ใช้เทียบ treshold ตอนเเสดงผล
	r.payload["fuzzy_score"] = fuzzy_score # เก็บไว้เผื่อ debug
	r.payload['semantic_score'] = r.score # เก็บไว้เผื่อ debug
	scored.append((r, hybrid_score))

	# เรียงตาม hybrid score แล้วกรองผลลัพธ์ที่ hybrid score ต่ำเกิน
	scored = sorted(scored, key=lambda x: x[1], reverse=True)
	result = [r[0] for r in scored]

	elapsed = time.time() - start_time
	html_output = f"<p>⏱ <strong>{elapsed:.2f} วินาที</strong></p>"
	if corrected_query != query:
	html_output += f"<p>🔧 แก้คำค้นจาก: <code>{query}</code> → <code>{corrected_query}</code></p>"
	html_output += '<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: 20px;">'
	result_summary, found = "", False

	for res in result:
	if res.payload["score"] >= threshold:
	found = True
	name = res.payload.get("name", "ไม่ทราบชื่อสินค้า")
	score = f"{res.payload['score']:.4f}"
	img_url = res.payload.get("imageUrl", "")
	price = res.payload.get("price", "ไม่ระบุ")
	brand = res.payload.get("brand", "")

	html_output += f"""
	<div style="border: 1px solid #ddd; border-radius: 8px; padding: 10px; text-align: center; box-shadow: 1px 1px 5px rgba(0,0,0,0.1); background: #fff;">
	<img src="{img_url}" style="width: 100%; max-height: 150px; object-fit: contain; border-radius: 4px;">
	<div style="margin-top: 10px;">
	<div style="font-weight: bold; font-size: 14px;">{name}</div>
	<div style="color: gray; font-size: 12px;">{brand}</div>
	<div style="color: green; margin: 4px 0;">฿{price}</div>
	<div style="font-size: 12px; color: #555;">score: {score}</div>
	</div>
	</div>
	"""
	result_summary += f"{name} (score: {score}) \| "

	html_output += "</div>"

	if not found:
	html_output += '<div style="text-align: center; font-size: 18px; color: #a00; padding: 30px;">❌ ไม่พบสินค้าที่เกี่ยวข้องกับคำค้นนี้</div>'

	latest_query_result.update({
	"query": corrected_query,
	"result": result_summary.strip(),
	"time": elapsed,
	})

	yield gr.update(value="✅ ค้นหาเสร็จแล้ว!"), html_output

	# Feedback Function
	def log_feedback(feedback):
	try:
	now = datetime.now().strftime("%Y-%m-%d")
	table.create({
	"model": "BGE M3",
	"timestamp": now,
	"raw_query": latest_query_result["raw_query"],
	"query": latest_query_result["query"],
	"result": latest_query_result["result"],
	"time(second)": latest_query_result["time"],
	"feedback": feedback
	})
	return "✅ Feedback saved to Airtable!"
	except Exception as e:
	return f"❌ Failed to save feedback: {str(e)}"

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## 🔎 Product Semantic Search (BGE M3 + Qdrant)")

	query_input = gr.Textbox(label="พิมพ์คำค้นหา")
	result_output = gr.HTML(label="📋 ผลลัพธ์")
	status_output = gr.Textbox(label="🕒 สถานะ", interactive=False)

	with gr.Row():
	match_btn = gr.Button("✅ ตรง")
	not_match_btn = gr.Button("❌ ไม่ตรง")

	feedback_status = gr.Textbox(label="📬 สถานะ Feedback")

	query_input.submit(
	search_product,
	inputs=[query_input],
	outputs=[status_output, result_output]
	)
	match_btn.click(fn=lambda: log_feedback("match"), outputs=feedback_status)
	not_match_btn.click(fn=lambda: log_feedback("not_match"), outputs=feedback_status)

	demo.launch(share=True)