Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
import time
|
3 |
from datetime import datetime
|
4 |
-
import pandas as pd
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from qdrant_client import QdrantClient
|
7 |
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
8 |
import os
|
9 |
-
from rapidfuzz import
|
10 |
from pythainlp.tokenize import word_tokenize
|
11 |
from pyairtable import Table
|
12 |
from pyairtable import Api
|
13 |
-
import pickle
|
14 |
import re
|
15 |
import unicodedata
|
16 |
-
from FlagEmbedding import FlagReranker
|
17 |
|
18 |
# Setup Qdrant Client
|
19 |
qdrant_client = QdrantClient(
|
@@ -34,12 +31,10 @@ model = SentenceTransformer("BAAI/bge-m3")
|
|
34 |
collection_name = "product_bge-m3"
|
35 |
threshold = 0.45
|
36 |
|
37 |
-
reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
|
38 |
-
|
39 |
# Utils
|
40 |
def is_non_thai(text):
|
41 |
return re.match(r'^[A-Za-z0-9&\-\s]+$', text) is not None
|
42 |
-
|
43 |
def normalize(text: str) -> str:
|
44 |
if is_non_thai(text):
|
45 |
return text.strip()
|
@@ -72,16 +67,26 @@ def search_product(query):
|
|
72 |
return
|
73 |
|
74 |
if len(result) > 0:
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
elapsed = time.time() - start_time
|
87 |
html_output = f"<p>⏱ <strong>{elapsed:.2f} วินาที</strong></p>"
|
|
|
1 |
import gradio as gr
|
2 |
import time
|
3 |
from datetime import datetime
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
from qdrant_client import QdrantClient
|
6 |
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
7 |
import os
|
8 |
+
from rapidfuzz import fuzz
|
9 |
from pythainlp.tokenize import word_tokenize
|
10 |
from pyairtable import Table
|
11 |
from pyairtable import Api
|
|
|
12 |
import re
|
13 |
import unicodedata
|
|
|
14 |
|
15 |
# Setup Qdrant Client
|
16 |
qdrant_client = QdrantClient(
|
|
|
31 |
collection_name = "product_bge-m3"
|
32 |
threshold = 0.45
|
33 |
|
|
|
|
|
34 |
# Utils
|
35 |
def is_non_thai(text):
|
36 |
return re.match(r'^[A-Za-z0-9&\-\s]+$', text) is not None
|
37 |
+
|
38 |
def normalize(text: str) -> str:
|
39 |
if is_non_thai(text):
|
40 |
return text.strip()
|
|
|
67 |
return
|
68 |
|
69 |
if len(result) > 0:
|
70 |
+
topk = 50 # ดึงมา rerank แค่ 50 อันดับแรกจาก Qdrant
|
71 |
+
result = result[:topk]
|
72 |
+
|
73 |
+
scored = []
|
74 |
+
for r in result:
|
75 |
+
name = r.payload.get("name", "")
|
76 |
+
|
77 |
+
# ถ้า query สั้นเกินไป ให้ fuzzy_score = 0 เพื่อกันเพี้ยน
|
78 |
+
if len(corrected_query) >= 3 and name:
|
79 |
+
fuzzy_score = fuzz.partial_ratio(corrected_query, name) / 100.0
|
80 |
+
else:
|
81 |
+
fuzzy_score = 0.0
|
82 |
+
|
83 |
+
# รวม hybrid score
|
84 |
+
hybrid_score = 0.6 * r.score + 0.4 * fuzzy_score
|
85 |
+
scored.append((r, hybrid_score))
|
86 |
+
|
87 |
+
# เรียงตาม hybrid score แล้วกรองผลลัพธ์ที่ hybrid score ต่ำเกิน
|
88 |
+
scored = sorted(scored, key=lambda x: x[1], reverse=True)
|
89 |
+
result = [r[0] for r in scored]
|
90 |
|
91 |
elapsed = time.time() - start_time
|
92 |
html_output = f"<p>⏱ <strong>{elapsed:.2f} วินาที</strong></p>"
|