sibthinon commited on
Commit
39b722c
·
verified ·
1 Parent(s): ccd63be

fix thai "เเ"

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -9,6 +9,7 @@ import os
9
  from pythainlp.tokenize import word_tokenize
10
  from pyairtable import Table
11
  from pyairtable import Api
 
12
 
13
 
14
  qdrant_client = QdrantClient(
@@ -48,6 +49,11 @@ model_config = {
48
  # Global memory to hold feedback state
49
  latest_query_result = {"query": "", "result": "", "model": "", "raw_query": "", "time": ""}
50
 
 
 
 
 
 
51
  # 🌟 Main search function
52
  def search_product(query, model_name):
53
  start_time = time.time()
 
9
  from pythainlp.tokenize import word_tokenize
10
  from pyairtable import Table
11
  from pyairtable import Api
12
+ import unicodedata
13
 
14
 
15
  qdrant_client = QdrantClient(
 
49
  # Global memory to hold feedback state
50
  latest_query_result = {"query": "", "result": "", "model": "", "raw_query": "", "time": ""}
51
 
52
+ def fix_common_thai_typos(text: str) -> str:
53
+ text = unicodedata.normalize("NFC", text) # normalize ตัวอักษรซ้อน
54
+ text = text.replace("เเ", "แ").replace("เแ", "แ") # แก้เฉพาะเอกลักษณ์ผิด
55
+ return text
56
+
57
  # 🌟 Main search function
58
  def search_product(query, model_name):
59
  start_time = time.time()