Spaces:
Running
Running
fix rapidfuzz partial ratio
Browse files
app.py
CHANGED
@@ -6,7 +6,6 @@ from qdrant_client import QdrantClient
|
|
6 |
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
7 |
import os
|
8 |
from rapidfuzz import fuzz
|
9 |
-
from pythainlp.tokenize import word_tokenize
|
10 |
from pyairtable import Table
|
11 |
from pyairtable import Api
|
12 |
import re
|
@@ -73,15 +72,20 @@ def search_product(query):
|
|
73 |
scored = []
|
74 |
for r in result:
|
75 |
name = r.payload.get("name", "")
|
76 |
-
|
77 |
# ถ้า query สั้นเกินไป ให้ fuzzy_score = 0 เพื่อกันเพี้ยน
|
78 |
if len(corrected_query) >= 3 and name:
|
79 |
-
|
80 |
else:
|
81 |
-
|
82 |
-
|
83 |
# รวม hybrid score
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
scored.append((r, hybrid_score))
|
86 |
|
87 |
# เรียงตาม hybrid score แล้วกรองผลลัพธ์ที่ hybrid score ต่ำเกิน
|
@@ -96,10 +100,10 @@ def search_product(query):
|
|
96 |
result_summary, found = "", False
|
97 |
|
98 |
for res in result:
|
99 |
-
if res.score >= threshold:
|
100 |
found = True
|
101 |
name = res.payload.get("name", "ไม่ทราบชื่อสินค้า")
|
102 |
-
score = f"{res.score:.4f}"
|
103 |
img_url = res.payload.get("imageUrl", "")
|
104 |
price = res.payload.get("price", "ไม่ระบุ")
|
105 |
brand = res.payload.get("brand", "")
|
|
|
6 |
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
7 |
import os
|
8 |
from rapidfuzz import fuzz
|
|
|
9 |
from pyairtable import Table
|
10 |
from pyairtable import Api
|
11 |
import re
|
|
|
72 |
scored = []
|
73 |
for r in result:
|
74 |
name = r.payload.get("name", "")
|
75 |
+
|
76 |
# ถ้า query สั้นเกินไป ให้ fuzzy_score = 0 เพื่อกันเพี้ยน
|
77 |
if len(corrected_query) >= 3 and name:
|
78 |
+
fuzzy_score = fuzz.partial_ratio(corrected_query, name) / 100.0
|
79 |
else:
|
80 |
+
fuzzy_score = 0.0
|
|
|
81 |
# รวม hybrid score
|
82 |
+
if fuzzy_score < 0.5:
|
83 |
+
hybrid_score = r.score
|
84 |
+
else:
|
85 |
+
hybrid_score = 0.7 * r.score + 0.3 * fuzzy_score
|
86 |
+
r.payload["score"] = hybrid_score # เก็บลง payload ใช้เทียบ treshold ตอนเเสดงผล
|
87 |
+
r.payload["fuzzy_score"] = fuzzy_score # เก็บไว้เผื่อ debug
|
88 |
+
r.payload['semantic_score'] = r.score # เก็บไว้เผื่อ debug
|
89 |
scored.append((r, hybrid_score))
|
90 |
|
91 |
# เรียงตาม hybrid score แล้วกรองผลลัพธ์ที่ hybrid score ต่ำเกิน
|
|
|
100 |
result_summary, found = "", False
|
101 |
|
102 |
for res in result:
|
103 |
+
if res.payload["score"] >= threshold:
|
104 |
found = True
|
105 |
name = res.payload.get("name", "ไม่ทราบชื่อสินค้า")
|
106 |
+
score = f"{res.payload['score']:.4f}"
|
107 |
img_url = res.payload.get("imageUrl", "")
|
108 |
price = res.payload.get("price", "ไม่ระบุ")
|
109 |
brand = res.payload.get("brand", "")
|