|
"""
|
|
Optimized Game Fuzzy Matcher for G-Assist LLM Integration
|
|
Handles game name variations, prioritizes Steam API data, and provides intelligent matching.
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
from collections import defaultdict
|
|
from typing import List, Tuple, Dict, Optional
|
|
import asyncio
|
|
from src.rtx_llm_analyzer import GAssistLLMAnalyzer
|
|
|
|
|
|
class OptimizedGameFuzzyMatcher:
|
|
"""
|
|
Ultra-optimized fuzzy matcher for video game titles
|
|
Designed for G-Assist LLM integration with game-specific optimizations
|
|
"""
|
|
|
|
def __init__(self, threshold: float = 0.75):
|
|
self.threshold = threshold
|
|
self.cache = {}
|
|
self.logger = logging.getLogger(__name__)
|
|
self.llm_analyzer = GAssistLLMAnalyzer()
|
|
|
|
|
|
self.game_map = {
|
|
|
|
'diablo': 'diablo',
|
|
'diablo i': 'diablo',
|
|
'diablo 2': 'diablo ii',
|
|
'diablo 3': 'diablo iii',
|
|
'diablo 4': 'diablo iv',
|
|
|
|
'grand theft auto': 'grand theft auto',
|
|
'gta': 'grand theft auto',
|
|
'gta 3': 'grand theft auto 3',
|
|
'gta iii': 'grand theft auto 3',
|
|
'gta 4': 'grand theft auto 4',
|
|
'gta iv': 'grand theft auto 4',
|
|
'gta 5': 'grand theft auto 5',
|
|
'gta v': 'grand theft auto 5',
|
|
}
|
|
|
|
|
|
self.acronym_map = {
|
|
'gta': ['grand', 'theft', 'auto'],
|
|
'cod': ['call', 'of', 'duty'],
|
|
'cs': ['counter', 'strike'],
|
|
'csgo': ['counter', 'strike', 'global', 'offensive'],
|
|
'pubg': ['playerunknowns', 'battlegrounds'],
|
|
'ac': ['assassins', 'creed'],
|
|
'ds': ['dark', 'souls'],
|
|
'gow': ['god', 'of', 'war'],
|
|
'hzd': ['horizon', 'zero', 'dawn'],
|
|
'botw': ['breath', 'of', 'the', 'wild'],
|
|
'mw': ['modern', 'warfare'],
|
|
'nfs': ['need', 'for', 'speed'],
|
|
'ff': ['final', 'fantasy'],
|
|
'lol': ['league', 'of', 'legends'],
|
|
'wow': ['world', 'of', 'warcraft'],
|
|
'diablo': ['diablo']
|
|
}
|
|
|
|
|
|
self.roman_map = {
|
|
'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5',
|
|
'vi': '6', 'vii': '7', 'viii': '8', 'ix': '9', 'x': '10',
|
|
'xi': '11', 'xii': '12', 'xiii': '13', 'xiv': '14', 'xv': '15'
|
|
}
|
|
|
|
|
|
self.number_to_roman = {v: k for k, v in self.roman_map.items()}
|
|
|
|
|
|
self.edition_words = {
|
|
'edition', 'remastered', 'remake', 'definitive', 'ultimate',
|
|
'goty', 'complete', 'deluxe', 'special', 'anniversary', 'enhanced'
|
|
}
|
|
|
|
def preprocess_title(self, title: str) -> List[str]:
|
|
"""Preprocess and tokenize game title with aggressive normalization."""
|
|
cache_key = f"prep_{title}"
|
|
if cache_key in self.cache:
|
|
return self.cache[cache_key]
|
|
|
|
|
|
clean = re.sub(r'[^a-z0-9\s]', ' ', title.lower())
|
|
|
|
|
|
original_tokens = clean.split()
|
|
|
|
|
|
tokens = clean.split()
|
|
|
|
|
|
if len(original_tokens) >= 2 and any(t.isdigit() or t in self.roman_map for t in original_tokens):
|
|
self.logger.info(f"Preserving numbered game title: {title}")
|
|
self.cache[cache_key] = original_tokens
|
|
return original_tokens
|
|
|
|
|
|
processed_tokens = []
|
|
i = 0
|
|
while i < len(tokens):
|
|
token = tokens[i]
|
|
|
|
|
|
if token in self.acronym_map:
|
|
processed_tokens.extend(self.acronym_map[token])
|
|
|
|
|
|
elif i + 1 < len(tokens):
|
|
combined = token + tokens[i + 1]
|
|
if combined in self.acronym_map:
|
|
processed_tokens.extend(self.acronym_map[combined])
|
|
i += 1
|
|
else:
|
|
processed_tokens.append(self.normalize_token(token))
|
|
else:
|
|
processed_tokens.append(self.normalize_token(token))
|
|
|
|
i += 1
|
|
|
|
self.cache[cache_key] = processed_tokens
|
|
return processed_tokens
|
|
|
|
def normalize_token(self, token: str) -> str:
|
|
"""Normalize individual token with bidirectional roman/number conversion."""
|
|
|
|
if token in self.roman_map:
|
|
return self.roman_map[token]
|
|
|
|
|
|
if token in self.number_to_roman:
|
|
return self.number_to_roman[token]
|
|
|
|
|
|
if token.isdigit() and len(token) == 2:
|
|
year = int(token)
|
|
if year < 50:
|
|
return f"20{token}"
|
|
else:
|
|
return f"19{token}"
|
|
|
|
|
|
if token.isdigit() or token in self.roman_map:
|
|
return token
|
|
|
|
return token
|
|
|
|
def fuzzy_match_with_variants(self, query: str, target: str) -> float:
|
|
"""
|
|
Enhanced fuzzy matching that creates multiple variants for comparison.
|
|
Specifically handles cases like "Diablo 4" -> "Diablo IV"
|
|
"""
|
|
|
|
if query.lower() == target.lower():
|
|
return 1.0
|
|
|
|
|
|
query_variants = self.generate_variants(query)
|
|
target_variants = self.generate_variants(target)
|
|
|
|
|
|
max_score = 0.0
|
|
|
|
for q_variant in query_variants:
|
|
for t_variant in target_variants:
|
|
score = self.basic_fuzzy_match(q_variant, t_variant)
|
|
max_score = max(max_score, score)
|
|
|
|
|
|
if score >= 0.95:
|
|
return score
|
|
|
|
return max_score
|
|
|
|
def generate_variants(self, title: str) -> List[str]:
|
|
"""Generate multiple variants of a game title for robust matching."""
|
|
variants = [title]
|
|
|
|
|
|
tokens = self.preprocess_title(title)
|
|
if tokens:
|
|
variants.append(' '.join(tokens))
|
|
|
|
|
|
lower_title = title.lower()
|
|
|
|
|
|
for num, roman in self.number_to_roman.items():
|
|
if num in lower_title:
|
|
variant = lower_title.replace(num, roman)
|
|
variants.append(variant)
|
|
|
|
|
|
for roman, num in self.roman_map.items():
|
|
if roman in lower_title:
|
|
variant = lower_title.replace(roman, num)
|
|
variants.append(variant)
|
|
|
|
|
|
seen = set()
|
|
unique_variants = []
|
|
for variant in variants:
|
|
if variant not in seen:
|
|
seen.add(variant)
|
|
unique_variants.append(variant)
|
|
|
|
return unique_variants
|
|
|
|
def basic_fuzzy_match(self, title1: str, title2: str) -> float:
|
|
"""Basic fuzzy matching with token-based similarity."""
|
|
tokens1 = self.preprocess_title(title1)
|
|
tokens2 = self.preprocess_title(title2)
|
|
|
|
if not tokens1 or not tokens2:
|
|
return 0.0
|
|
|
|
set1, set2 = set(tokens1), set(tokens2)
|
|
|
|
|
|
intersection = set1 & set2
|
|
union = set1 | set2
|
|
|
|
if not union:
|
|
return 0.0
|
|
|
|
|
|
jaccard = len(intersection) / len(union)
|
|
|
|
|
|
weight = 1.0
|
|
|
|
|
|
main_words = intersection - self.edition_words
|
|
if main_words:
|
|
weight += 0.2
|
|
|
|
|
|
edition_diff = (set1 ^ set2) & self.edition_words
|
|
if edition_diff:
|
|
weight -= 0.05 * len(edition_diff)
|
|
|
|
return min(1.0, jaccard * weight)
|
|
|
|
def normalize_game_name(self, game_name: str) -> str:
|
|
"""
|
|
Normalize game name for consistent caching and matching.
|
|
|
|
Args:
|
|
game_name: Original game name
|
|
|
|
Returns:
|
|
Normalized game name with roman numerals and standardized format
|
|
"""
|
|
|
|
tokens = self.preprocess_title(game_name)
|
|
return ' '.join(tokens)
|
|
|
|
async def find_best_match(self, query: str, candidates: List[str],
|
|
steam_priority: bool = True) -> Optional[Tuple[str, float]]:
|
|
"""
|
|
Find the best match with Steam API prioritization and simplified mapping.
|
|
|
|
Args:
|
|
query: Game name to search for
|
|
candidates: List of candidate game names
|
|
steam_priority: Whether to prioritize results that look like Steam data
|
|
|
|
Returns:
|
|
Tuple of (best_match, confidence_score) or None
|
|
"""
|
|
if not candidates:
|
|
return None
|
|
|
|
|
|
query_lower = query.lower()
|
|
if query_lower in self.game_map:
|
|
mapped_name = self.game_map[query_lower]
|
|
self.logger.info(f"Direct game map match: '{query}' -> '{mapped_name}'")
|
|
|
|
|
|
|
|
for candidate in candidates:
|
|
if candidate.lower() == mapped_name.lower():
|
|
|
|
return candidate, 1.0
|
|
|
|
|
|
for candidate in candidates:
|
|
if mapped_name.lower() in candidate.lower().split():
|
|
self.logger.info(f"Partial match for mapped name: '{mapped_name}' found in '{candidate}'")
|
|
return candidate, 0.95
|
|
|
|
|
|
for candidate in candidates:
|
|
if candidate.lower() == query_lower:
|
|
return candidate, 1.0
|
|
|
|
|
|
query_words = query_lower.split()
|
|
if len(query_words) >= 2 and any(w.isdigit() or w in self.roman_map for w in query_words):
|
|
|
|
self.logger.info(f"Preserving numbered query: {query}")
|
|
|
|
|
|
for candidate in candidates:
|
|
candidate_lower = candidate.lower()
|
|
|
|
if all(word in candidate_lower for word in query_words):
|
|
return candidate, 1.0
|
|
|
|
|
|
|
|
|
|
self.logger.info(f"No exact match for numbered game: '{query}'")
|
|
return None
|
|
|
|
|
|
matches = []
|
|
for candidate in candidates:
|
|
|
|
query_set = set(query_lower.split())
|
|
candidate_set = set(candidate.lower().split())
|
|
|
|
intersection = query_set & candidate_set
|
|
if intersection and len(intersection) / len(query_set) > 0.5:
|
|
score = len(intersection) / max(len(query_set), len(candidate_set))
|
|
matches.append((candidate, score))
|
|
|
|
if matches:
|
|
|
|
matches.sort(key=lambda x: x[1], reverse=True)
|
|
best_match, best_score = matches[0]
|
|
if best_score > 0.6:
|
|
return best_match, best_score
|
|
|
|
|
|
if candidates:
|
|
return candidates[0], 0.5
|
|
|
|
return None
|
|
|
|
def looks_like_steam_title(self, title: str) -> bool:
|
|
"""Heuristic to identify Steam-style game titles."""
|
|
|
|
return (
|
|
len(title) > 5 and
|
|
not any(abbrev in title.lower() for abbrev in ['gta', 'cod', 'cs']) and
|
|
':' not in title
|
|
)
|
|
|
|
async def match_with_steam_fallback(self, query: str, steam_candidates: List[str],
|
|
cache_candidates: List[str]) -> Optional[Tuple[str, float, str]]:
|
|
"""
|
|
Match with Steam API prioritization and local cache fallback.
|
|
|
|
Returns:
|
|
Tuple of (matched_name, confidence_score, source) or None
|
|
"""
|
|
|
|
if steam_candidates:
|
|
steam_match = await self.find_best_match(query, steam_candidates, steam_priority=True)
|
|
if steam_match and steam_match[1] >= self.threshold:
|
|
return steam_match[0], steam_match[1], "Steam API"
|
|
|
|
|
|
if cache_candidates:
|
|
cache_match = await self.find_best_match(query, cache_candidates, steam_priority=False)
|
|
if cache_match and cache_match[1] >= self.threshold:
|
|
return cache_match[0], cache_match[1], "Local Cache"
|
|
|
|
|
|
self.logger.warning(f"No fuzzy match found for '{query}' in {len(steam_candidates)} Steam + {len(cache_candidates)} cache candidates")
|
|
return None
|
|
|
|
|
|
|
|
game_fuzzy_matcher = OptimizedGameFuzzyMatcher(threshold=0.7)
|
|
|