sibthinon commited on
Commit
ee48c56
·
verified ·
1 Parent(s): 14cda9a

add new fuzzy

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -7,6 +7,7 @@ from qdrant_client import QdrantClient
7
  from qdrant_client.models import Filter, FieldCondition, MatchValue
8
  import os
9
  from symspellpy.symspellpy import SymSpell, Verbosity
 
10
 
11
 
12
  qdrant_client = QdrantClient(
@@ -41,19 +42,33 @@ model_config = {
41
  latest_query_result = {"query": "", "result": "", "model": ""}
42
 
43
  symspell = SymSpell(max_dictionary_edit_distance=2)
44
- symspell.load_dictionary("symspell_dict_pythainlp.txt", term_index=0, count_index=1)
45
 
46
  # แก้คำผิด
47
  def correct_query_with_symspell(query: str) -> str:
48
- # ถ้า query มีคำเดียว ใช้ lookup ปกติ
49
  if len(query.strip().split()) == 1:
50
  suggestions = symspell.lookup(query, Verbosity.CLOSEST, max_edit_distance=2)
51
- else:
52
- suggestions = symspell.lookup_compound(query, 2)
53
-
54
- if suggestions:
55
- return suggestions[0].term
56
- return query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  # 🌟 Main search function
59
  def search_product(query, model_name):
 
7
  from qdrant_client.models import Filter, FieldCondition, MatchValue
8
  import os
9
  from symspellpy.symspellpy import SymSpell, Verbosity
10
+ from pythainlp.tokenize import word_tokenize
11
 
12
 
13
  qdrant_client = QdrantClient(
 
42
  latest_query_result = {"query": "", "result": "", "model": ""}
43
 
44
  symspell = SymSpell(max_dictionary_edit_distance=2)
45
+ symspell.load_pickle("symspell_fast.pkl", term_index=0, count_index=1)
46
 
47
  # แก้คำผิด
48
  def correct_query_with_symspell(query: str) -> str:
49
+ # ถ้าคำเดียว ใช้ lookup ปกติ (ดีที่สุด)
50
  if len(query.strip().split()) == 1:
51
  suggestions = symspell.lookup(query, Verbosity.CLOSEST, max_edit_distance=2)
52
+ return suggestions[0].term if suggestions else query
53
+
54
+ # ตัดคำ
55
+ words = word_tokenize(query.strip(), engine="newmm")
56
+ corrected = []
57
+
58
+ for word in words:
59
+ # หากความยาวคำเดิม > 4 และแก้ไม่ได้ → ลองแก้ทั้งคำเดิมแทน
60
+ suggestions = symspell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
61
+ if suggestions:
62
+ corrected.append(suggestions[0].term)
63
+ else:
64
+ # ลองแก้ทั้ง word แบบ raw (กรณี word ถูกตัดผิด เช่น "ปิดปอง")
65
+ alt_suggestions = symspell.lookup_compound(word, 2)
66
+ if alt_suggestions and alt_suggestions[0].term != word:
67
+ corrected.append(alt_suggestions[0].term)
68
+ else:
69
+ corrected.append(word)
70
+
71
+ return " ".join(corrected)
72
 
73
  # 🌟 Main search function
74
  def search_product(query, model_name):