SamiKoen Claude commited on
Commit
03d4524
·
1 Parent(s): ceaee51

Akıllı kelime filtreleme - spesifik kelime yok

Browse files

- Consonant/vowel oranına göre filtreleme
- Alphanumeric kodları koru
- 2-3 harfli product kodları tanı
- Tamamen genel algoritma

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show
  1. app.py +33 -9
app.py CHANGED
@@ -53,18 +53,42 @@ def get_warehouse_stock(product_name):
53
  query = normalize(product_name.strip()).replace('(2026)', '').replace('(2025)', '').strip()
54
  words = query.split()
55
 
56
- # Words to ignore in product search
57
- ignore_words = ['var', 'mi', 'mı', 'mu', 'mü', 'varmi', 'varmı', 'beden', 'size', 'boy',
58
- 'stok', 'stokta', 'mevcut', 'hangi', 'magazada', 'nerede', 'kaç', 'adet', 'tane',
59
- 'trek', 'bisiklet', 'bike']
60
-
61
- # Find size
62
  sizes = ['s', 'm', 'l', 'xl', 'xs', 'xxl', 'ml']
63
  size = next((w for w in words if w in sizes), None)
64
 
65
- # Filter product words - remove sizes and question words
66
- # Keep numbers (like 6, 7, 8) and words longer than 1 char
67
- product_words = [w for w in words if w not in sizes and w not in ignore_words and (len(w) > 1 or w.isdigit())]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  print(f"DEBUG - Searching: {' '.join(product_words)}, Size: {size}")
70
 
 
53
  query = normalize(product_name.strip()).replace('(2026)', '').replace('(2025)', '').strip()
54
  words = query.split()
55
 
56
+ # Find size markers (S, M, L, etc.)
 
 
 
 
 
57
  sizes = ['s', 'm', 'l', 'xl', 'xs', 'xxl', 'ml']
58
  size = next((w for w in words if w in sizes), None)
59
 
60
+ # Smart filtering: Keep only meaningful product identifiers
61
+ product_words = []
62
+
63
+ # First pass: identify what looks like product terms
64
+ for word in words:
65
+ # Skip if it's a size marker
66
+ if word in sizes:
67
+ continue
68
+
69
+ # Always keep numbers (model numbers like 6, 7, 8)
70
+ if word.isdigit():
71
+ product_words.append(word)
72
+
73
+ # Keep alphanumeric codes (like "sl6", "gen8")
74
+ elif any(c.isdigit() for c in word) and any(c.isalpha() for c in word):
75
+ product_words.append(word)
76
+
77
+ # Keep 2-3 letter codes (often product codes like "sl", "slr", "emx")
78
+ elif 2 <= len(word) <= 3 and word.isalpha():
79
+ # Check if it has consonants (likely a code, not a particle)
80
+ if any(c not in 'aeiou' for c in word):
81
+ product_words.append(word)
82
+
83
+ # Keep longer words that have good consonant/vowel mix (likely product names)
84
+ elif len(word) > 3:
85
+ # Calculate consonant ratio
86
+ consonants = sum(1 for c in word if c not in 'aeiou')
87
+ vowels = len(word) - consonants
88
+ # Product names usually have balanced or consonant-heavy distribution
89
+ # Turkish question words are often vowel-heavy
90
+ if consonants >= vowels * 0.5: # At least 1 consonant per 2 vowels
91
+ product_words.append(word)
92
 
93
  print(f"DEBUG - Searching: {' '.join(product_words)}, Size: {size}")
94