BF / smart_warehouse_with_price.py
SamiKoen's picture
Add 12-hour caching system to reduce token usage
827a20a
raw
history blame
18.3 kB
"""Smart warehouse stock finder with price and link information"""
import requests
import re
import os
import json
import xml.etree.ElementTree as ET
import time
# Cache configuration - 12 hours
CACHE_DURATION = 43200 # 12 hours
cache = {
'warehouse_xml': {'data': None, 'time': 0},
'trek_xml': {'data': None, 'time': 0},
'products_summary': {'data': None, 'time': 0},
'search_results': {} # Cache for specific searches
}
def get_cached_trek_xml():
"""Get Trek XML with 12-hour caching"""
current_time = time.time()
if cache['trek_xml']['data'] and (current_time - cache['trek_xml']['time'] < CACHE_DURATION):
print("🚴 Using cached Trek XML (12-hour cache)")
return cache['trek_xml']['data']
print("📡 Fetching fresh Trek XML...")
try:
url = 'https://www.trekbisiklet.com.tr/output/8582384479'
response = requests.get(url, verify=False, timeout=10)
if response.status_code == 200:
cache['trek_xml']['data'] = response.content
cache['trek_xml']['time'] = current_time
return response.content
else:
return None
except Exception as e:
print(f"Trek XML fetch error: {e}")
return None
def get_product_price_and_link(product_name, variant=None):
"""Get price and link from Trek website XML"""
try:
# Get cached Trek XML
xml_content = get_cached_trek_xml()
if not xml_content:
return None, None
root = ET.fromstring(xml_content)
# Normalize search terms
search_name = product_name.lower()
search_variant = variant.lower() if variant else ""
# Turkish character normalization
tr_map = {'ı': 'i', 'ğ': 'g', 'ü': 'u', 'ş': 's', 'ö': 'o', 'ç': 'c'}
for tr, en in tr_map.items():
search_name = search_name.replace(tr, en)
search_variant = search_variant.replace(tr, en)
best_match = None
best_score = 0
for item in root.findall('item'):
# Get product name
rootlabel_elem = item.find('rootlabel')
if rootlabel_elem is None or not rootlabel_elem.text:
continue
item_name = rootlabel_elem.text.lower()
for tr, en in tr_map.items():
item_name = item_name.replace(tr, en)
# Calculate match score
score = 0
name_parts = search_name.split()
for part in name_parts:
if part in item_name:
score += 1
# Check variant if specified
if variant and search_variant in item_name:
score += 2 # Variant match is important
if score > best_score:
best_score = score
best_match = item
if best_match and best_score > 0:
# Extract price
price_elem = best_match.find('priceTaxWithCur')
price = price_elem.text if price_elem is not None and price_elem.text else None
# Round price
if price:
try:
price_float = float(price)
if price_float > 200000:
rounded = round(price_float / 5000) * 5000
price = f"{int(rounded):,}".replace(',', '.') + " TL"
elif price_float > 30000:
rounded = round(price_float / 1000) * 1000
price = f"{int(rounded):,}".replace(',', '.') + " TL"
elif price_float > 10000:
rounded = round(price_float / 100) * 100
price = f"{int(rounded):,}".replace(',', '.') + " TL"
else:
rounded = round(price_float / 10) * 10
price = f"{int(rounded):,}".replace(',', '.') + " TL"
except:
price = f"{price} TL"
# Extract link (field name is productLink, not productUrl!)
link_elem = best_match.find('productLink')
link = link_elem.text if link_elem is not None and link_elem.text else None
return price, link
return None, None
except Exception as e:
print(f"Error getting price/link: {e}")
return None, None
def get_cached_warehouse_xml():
"""Get warehouse XML with 12-hour caching"""
current_time = time.time()
if cache['warehouse_xml']['data'] and (current_time - cache['warehouse_xml']['time'] < CACHE_DURATION):
print("📦 Using cached warehouse XML (12-hour cache)")
return cache['warehouse_xml']['data']
print("📡 Fetching fresh warehouse XML...")
for attempt in range(3):
try:
url = 'https://video.trek-turkey.com/bizimhesap-warehouse-xml-b2b-api-v2.php'
timeout_val = 10 + (attempt * 5)
response = requests.get(url, verify=False, timeout=timeout_val)
xml_text = response.text
print(f"DEBUG - XML fetched: {len(xml_text)} characters (attempt {attempt+1})")
cache['warehouse_xml']['data'] = xml_text
cache['warehouse_xml']['time'] = current_time
return xml_text
except requests.exceptions.Timeout:
print(f"XML fetch timeout (attempt {attempt+1}/3, timeout={timeout_val}s)")
if attempt == 2:
return None
except Exception as e:
print(f"XML fetch error: {e}")
return None
return None
def get_warehouse_stock_smart_with_price(user_message, previous_result=None):
"""Enhanced smart warehouse search with price and link info"""
# Check search cache first
cache_key = user_message.lower()
current_time = time.time()
if cache_key in cache['search_results']:
cached = cache['search_results'][cache_key]
if current_time - cached['time'] < CACHE_DURATION:
print(f"✅ Using cached result for '{user_message}' (12-hour cache)")
return cached['data']
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Check if user is asking about specific warehouse
warehouse_keywords = {
'caddebostan': 'Caddebostan',
'ortaköy': 'Ortaköy',
'ortakoy': 'Ortaköy',
'alsancak': 'Alsancak',
'izmir': 'Alsancak',
'bahçeköy': 'Bahçeköy',
'bahcekoy': 'Bahçeköy'
}
user_lower = user_message.lower()
asked_warehouse = None
for keyword, warehouse in warehouse_keywords.items():
if keyword in user_lower:
asked_warehouse = warehouse
break
# Get cached XML data
xml_text = get_cached_warehouse_xml()
if not xml_text:
return None
# Extract product blocks
product_pattern = r'<Product>(.*?)</Product>'
all_products = re.findall(product_pattern, xml_text, re.DOTALL)
# Create simplified product list for GPT
products_summary = []
for i, product_block in enumerate(all_products):
name_match = re.search(r'<ProductName><!\[CDATA\[(.*?)\]\]></ProductName>', product_block)
variant_match = re.search(r'<ProductVariant><!\[CDATA\[(.*?)\]\]></ProductVariant>', product_block)
if name_match:
warehouses_with_stock = []
warehouse_regex = r'<Warehouse>.*?<Name><!\[CDATA\[(.*?)\]\]></Name>.*?<Stock>(.*?)</Stock>.*?</Warehouse>'
warehouses = re.findall(warehouse_regex, product_block, re.DOTALL)
for wh_name, wh_stock in warehouses:
try:
if int(wh_stock.strip()) > 0:
warehouses_with_stock.append(wh_name)
except:
pass
product_info = {
"index": i,
"name": name_match.group(1),
"variant": variant_match.group(1) if variant_match else "",
"warehouses": warehouses_with_stock
}
products_summary.append(product_info)
# Prepare warehouse filter if needed
warehouse_filter = ""
if asked_warehouse:
warehouse_filter = f"\nIMPORTANT: User is asking specifically about {asked_warehouse} warehouse. Only return products available in that warehouse."
# GPT-5 prompt with enhanced instructions
smart_prompt = f"""User is asking: "{user_message}"
Find ALL products that match this query from the list below.
If user asks about specific size (S, M, L, XL, XXL, SMALL, MEDIUM, LARGE, X-LARGE), return only that size.
If user asks generally (without size), return ALL variants of the product.
{warehouse_filter}
IMPORTANT BRAND AND PRODUCT TYPE RULES:
- GOBIK: Spanish textile brand we import. When user asks about "gobik", return ALL products with "GOBIK" in the name.
- Product names contain type information: FORMA (jersey/cycling shirt), TAYT (tights), İÇLİK (base layer), YAĞMURLUK (raincoat), etc.
- Understand Turkish/English terms:
* "erkek forma" / "men's jersey" -> Find products with FORMA in name
* "tayt" / "tights" -> Find products with TAYT in name
* "içlik" / "base layer" -> Find products with İÇLİK in name
* "yağmurluk" / "raincoat" -> Find products with YAĞMURLUK in name
- Gender: UNISEX means for both men and women. If no gender specified, it's typically men's.
Products list (with warehouse availability):
{json.dumps(products_summary, ensure_ascii=False, indent=2)}
Return ONLY index numbers of ALL matching products as comma-separated list (e.g., "5,8,12,15").
If no products found, return ONLY: -1
DO NOT return empty string or any explanation, ONLY numbers or -1
Examples of correct responses:
- "2,5,8,12,15,20" (multiple products found)
- "45" (single product found)
- "-1" (no products found)"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
payload = {
"model": "gpt-5-chat-latest",
"messages": [
{"role": "system", "content": "You are a product matcher. Find ALL matching products. Return only index numbers."},
{"role": "user", "content": smart_prompt}
],
"temperature": 0,
"max_tokens": 100
}
try:
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
json=payload,
timeout=10
)
if response.status_code == 200:
result = response.json()
indices_str = result['choices'][0]['message']['content'].strip()
print(f"DEBUG - GPT-5 response: '{indices_str}'")
# Handle empty response
if not indices_str or indices_str == "-1":
return ["Ürün bulunamadı"]
try:
# Filter out empty strings and parse indices
indices = []
for idx in indices_str.split(','):
idx = idx.strip()
if idx and idx.isdigit():
indices.append(int(idx))
# Collect all matching products with price/link
all_variants = []
warehouse_stock = {}
for idx in indices:
if 0 <= idx < len(all_products):
product_block = all_products[idx]
# Get product details
name_match = re.search(r'<ProductName><!\[CDATA\[(.*?)\]\]></ProductName>', product_block)
variant_match = re.search(r'<ProductVariant><!\[CDATA\[(.*?)\]\]></ProductVariant>', product_block)
if name_match:
product_name = name_match.group(1)
variant = variant_match.group(1) if variant_match else ""
# Get price and link from Trek website
price, link = get_product_price_and_link(product_name, variant)
variant_info = {
'name': product_name,
'variant': variant,
'price': price,
'link': link,
'warehouses': []
}
# Get warehouse stock
warehouse_regex = r'<Warehouse>.*?<Name><!\[CDATA\[(.*?)\]\]></Name>.*?<Stock>(.*?)</Stock>.*?</Warehouse>'
warehouses = re.findall(warehouse_regex, product_block, re.DOTALL)
for wh_name, wh_stock in warehouses:
try:
stock = int(wh_stock.strip())
if stock > 0:
display_name = format_warehouse_name(wh_name)
variant_info['warehouses'].append({
'name': display_name,
'stock': stock
})
if display_name not in warehouse_stock:
warehouse_stock[display_name] = 0
warehouse_stock[display_name] += stock
except:
pass
if variant_info['warehouses']:
all_variants.append(variant_info)
# Format result
result = []
if asked_warehouse:
# Filter for specific warehouse
warehouse_variants = []
for variant in all_variants:
for wh in variant['warehouses']:
if asked_warehouse in wh['name']:
warehouse_variants.append(variant)
break
if warehouse_variants:
result.append(f"{format_warehouse_name(asked_warehouse)} mağazasında mevcut:")
for v in warehouse_variants:
variant_text = f" ({v['variant']})" if v['variant'] else ""
result.append(f"• {v['name']}{variant_text}")
if v['price']:
result.append(f" Fiyat: {v['price']}")
if v['link']:
result.append(f" Link: {v['link']}")
else:
result.append(f"{format_warehouse_name(asked_warehouse)} mağazasında bu ürün mevcut değil")
else:
# Show all variants
if all_variants:
# Group by product name for cleaner display
product_groups = {}
for variant in all_variants:
if variant['name'] not in product_groups:
product_groups[variant['name']] = []
product_groups[variant['name']].append(variant)
result.append(f"Bulunan ürünler:")
for product_name, variants in product_groups.items():
result.append(f"\n{product_name}:")
# Show first variant's price and link (usually same for all variants)
if variants[0]['price']:
result.append(f"Fiyat: {variants[0]['price']}")
if variants[0]['link']:
result.append(f"Link: {variants[0]['link']}")
# Show variants and their availability
for v in variants:
if v['variant']:
warehouses_str = ", ".join([w['name'].replace(' mağazası', '') for w in v['warehouses']])
result.append(f"• {v['variant']}: {warehouses_str}")
else:
result.append("Hiçbir mağazada stok yok")
# Cache the result before returning
cache['search_results'][cache_key] = {
'data': result,
'time': current_time
}
print(f"💾 Cached result for '{user_message}' (12-hour cache)")
return result
except (ValueError, IndexError) as e:
print(f"DEBUG - Error parsing indices: {e}")
return None
else:
print(f"GPT API error: {response.status_code}")
return None
except Exception as e:
print(f"Error calling GPT: {e}")
return None
def format_warehouse_name(wh_name):
"""Format warehouse name nicely"""
if "CADDEBOSTAN" in wh_name:
return "Caddebostan mağazası"
elif "ORTAKÖY" in wh_name:
return "Ortaköy mağazası"
elif "ALSANCAK" in wh_name:
return "İzmir Alsancak mağazası"
elif "BAHCEKOY" in wh_name or "BAHÇEKÖY" in wh_name:
return "Bahçeköy mağazası"
else:
return wh_name.replace("MAGAZA DEPO", "").strip()