Spaces:
Running
on
Zero
Running
on
Zero
| import torch | |
| import numpy as np | |
| from typing import List, Dict, Tuple, Optional | |
| from dataclasses import dataclass | |
| from breed_health_info import breed_health_info | |
| from breed_noise_info import breed_noise_info | |
| from dog_database import dog_data | |
| from scoring_calculation_system import UserPreferences | |
| from sentence_transformers import SentenceTransformer, util | |
| class SmartBreedMatcher: | |
| def __init__(self, dog_data: List[Tuple]): | |
| self.dog_data = dog_data | |
| self.model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
| def _categorize_breeds(self) -> Dict: | |
| """自動將狗品種分類""" | |
| categories = { | |
| 'working_dogs': [], | |
| 'herding_dogs': [], | |
| 'hunting_dogs': [], | |
| 'companion_dogs': [], | |
| 'guard_dogs': [] | |
| } | |
| for breed_info in self.dog_data: | |
| description = breed_info[9].lower() | |
| temperament = breed_info[4].lower() | |
| # 根據描述和性格特徵自動分類 | |
| if any(word in description for word in ['herding', 'shepherd', 'cattle', 'flock']): | |
| categories['herding_dogs'].append(breed_info[1]) | |
| elif any(word in description for word in ['hunting', 'hunt', 'retriever', 'pointer']): | |
| categories['hunting_dogs'].append(breed_info[1]) | |
| elif any(word in description for word in ['companion', 'toy', 'family', 'lap']): | |
| categories['companion_dogs'].append(breed_info[1]) | |
| elif any(word in description for word in ['guard', 'protection', 'watchdog']): | |
| categories['guard_dogs'].append(breed_info[1]) | |
| elif any(word in description for word in ['working', 'draft', 'cart']): | |
| categories['working_dogs'].append(breed_info[1]) | |
| return categories | |
| def find_similar_breeds(self, breed_name: str, top_n: int = 5) -> List[Tuple[str, float]]: | |
| """找出與指定品種最相似的其他品種""" | |
| target_breed = next((breed for breed in self.dog_data if breed[1] == breed_name), None) | |
| if not target_breed: | |
| return [] | |
| # 獲取目標品種的特徵 | |
| target_features = { | |
| 'breed_name': target_breed[1], # 添加品種名稱 | |
| 'size': target_breed[2], | |
| 'temperament': target_breed[4], | |
| 'exercise': target_breed[7], | |
| 'description': target_breed[9] | |
| } | |
| similarities = [] | |
| for breed in self.dog_data: | |
| if breed[1] != breed_name: | |
| breed_features = { | |
| 'breed_name': breed[1], # 添加品種名稱 | |
| 'size': breed[2], | |
| 'temperament': breed[4], | |
| 'exercise': breed[7], | |
| 'description': breed[9] | |
| } | |
| similarity_score = self._calculate_breed_similarity(target_features, breed_features) | |
| similarities.append((breed[1], similarity_score)) | |
| return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n] | |
| def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float: | |
| """計算兩個品種之間的相似度,包含健康因素""" | |
| # 計算描述文本的相似度 | |
| desc1_embedding = self.model.encode(breed1_features['description']) | |
| desc2_embedding = self.model.encode(breed2_features['description']) | |
| description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding)) | |
| # 基本特徵相似度 | |
| size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5 | |
| exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5 | |
| # 性格相似度 | |
| temp1_embedding = self.model.encode(breed1_features['temperament']) | |
| temp2_embedding = self.model.encode(breed2_features['temperament']) | |
| temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding)) | |
| # 健康分數相似度 | |
| health_score1 = self._calculate_health_score(breed1_features['breed_name']) | |
| health_score2 = self._calculate_health_score(breed2_features['breed_name']) | |
| health_similarity = 1.0 - abs(health_score1 - health_score2) | |
| # 加權計算 | |
| weights = { | |
| 'description': 0.3, | |
| 'temperament': 0.25, | |
| 'exercise': 0.15, | |
| 'size': 0.1, | |
| 'health': 0.2 | |
| } | |
| final_similarity = ( | |
| description_similarity * weights['description'] + | |
| temperament_similarity * weights['temperament'] + | |
| exercise_similarity * weights['exercise'] + | |
| size_similarity * weights['size'] + | |
| health_similarity * weights['health'] | |
| ) | |
| return final_similarity | |
| def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float: | |
| """計算兩個品種之間的相似度,包含健康和噪音因素""" | |
| # 計算描述文本的相似度 | |
| desc1_embedding = self.model.encode(breed1_features['description']) | |
| desc2_embedding = self.model.encode(breed2_features['description']) | |
| description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding)) | |
| # 基本特徵相似度 | |
| size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5 | |
| exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5 | |
| # 性格相似度 | |
| temp1_embedding = self.model.encode(breed1_features['temperament']) | |
| temp2_embedding = self.model.encode(breed2_features['temperament']) | |
| temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding)) | |
| # 健康分數相似度 | |
| health_score1 = self._calculate_health_score(breed1_features['breed_name']) | |
| health_score2 = self._calculate_health_score(breed2_features['breed_name']) | |
| health_similarity = 1.0 - abs(health_score1 - health_score2) | |
| # 噪音水平相似度 | |
| noise_similarity = self._calculate_noise_similarity( | |
| breed1_features['breed_name'], | |
| breed2_features['breed_name'] | |
| ) | |
| # 加權計算 | |
| weights = { | |
| 'description': 0.25, | |
| 'temperament': 0.20, | |
| 'exercise': 0.15, | |
| 'size': 0.10, | |
| 'health': 0.15, | |
| 'noise': 0.15 | |
| } | |
| final_similarity = ( | |
| description_similarity * weights['description'] + | |
| temperament_similarity * weights['temperament'] + | |
| exercise_similarity * weights['exercise'] + | |
| size_similarity * weights['size'] + | |
| health_similarity * weights['health'] + | |
| noise_similarity * weights['noise'] | |
| ) | |
| return final_similarity | |
| def _calculate_final_scores(self, breed_name: str, base_scores: Dict, | |
| smart_score: float, is_preferred: bool, | |
| similarity_score: float = 0.0) -> Dict: | |
| """ | |
| 計算最終分數,包含基礎分數和獎勵分數 | |
| Args: | |
| breed_name: 品種名稱 | |
| base_scores: 基礎評分 (空間、運動等) | |
| smart_score: 智能匹配分數 | |
| is_preferred: 是否為用戶指定品種 | |
| similarity_score: 與指定品種的相似度 (0-1) | |
| """ | |
| # 基礎權重 | |
| weights = { | |
| 'base': 0.6, # 基礎分數權重 | |
| 'smart': 0.25, # 智能匹配權重 | |
| 'bonus': 0.15 # 獎勵分數權重 | |
| } | |
| # 計算基礎分數 | |
| base_score = base_scores.get('overall', 0.7) | |
| # 計算獎勵分數 | |
| bonus_score = 0.0 | |
| if is_preferred: | |
| # 用戶指定品種獲得最高獎勵 | |
| bonus_score = 0.95 | |
| elif similarity_score > 0: | |
| # 相似品種獲得部分獎勵,但不超過80%的最高獎勵 | |
| bonus_score = min(0.8, similarity_score) * 0.95 | |
| # 計算最終分數 | |
| final_score = ( | |
| base_score * weights['base'] + | |
| smart_score * weights['smart'] + | |
| bonus_score * weights['bonus'] | |
| ) | |
| # 更新各項分數 | |
| scores = base_scores.copy() | |
| # 如果是用戶指定品種,稍微提升各項基礎分數,但保持合理範圍 | |
| if is_preferred: | |
| for key in scores: | |
| if key != 'overall': | |
| scores[key] = min(1.0, scores[key] * 1.1) # 最多提升10% | |
| # 為相似品種調整分數 | |
| elif similarity_score > 0: | |
| boost_factor = 1.0 + (similarity_score * 0.05) # 最多提升5% | |
| for key in scores: | |
| if key != 'overall': | |
| scores[key] = min(0.95, scores[key] * boost_factor) # 確保不超過95% | |
| return { | |
| 'final_score': round(final_score, 4), | |
| 'base_score': round(base_score, 4), | |
| 'bonus_score': round(bonus_score, 4), | |
| 'scores': {k: round(v, 4) for k, v in scores.items()} | |
| } | |
| def _calculate_health_score(self, breed_name: str) -> float: | |
| """計算品種的健康分數""" | |
| if breed_name not in breed_health_info: | |
| return 0.5 | |
| health_notes = breed_health_info[breed_name]['health_notes'].lower() | |
| # 嚴重健康問題 | |
| severe_conditions = [ | |
| 'cancer', 'cardiomyopathy', 'epilepsy', 'dysplasia', | |
| 'bloat', 'progressive', 'syndrome' | |
| ] | |
| # 中等健康問題 | |
| moderate_conditions = [ | |
| 'allergies', 'infections', 'thyroid', 'luxation', | |
| 'skin problems', 'ear' | |
| ] | |
| severe_count = sum(1 for condition in severe_conditions if condition in health_notes) | |
| moderate_count = sum(1 for condition in moderate_conditions if condition in health_notes) | |
| health_score = 1.0 | |
| health_score -= (severe_count * 0.1) | |
| health_score -= (moderate_count * 0.05) | |
| # 特殊條件調整(根據用戶偏好) | |
| if hasattr(self, 'user_preferences'): | |
| if self.user_preferences.has_children: | |
| if 'requires frequent' in health_notes or 'regular monitoring' in health_notes: | |
| health_score *= 0.9 | |
| if self.user_preferences.health_sensitivity == 'high': | |
| health_score *= 0.9 | |
| return max(0.3, min(1.0, health_score)) | |
| def _calculate_noise_similarity(self, breed1: str, breed2: str) -> float: | |
| """計算兩個品種的噪音相似度""" | |
| noise_levels = { | |
| 'Low': 1, | |
| 'Moderate': 2, | |
| 'High': 3, | |
| 'Unknown': 2 # 默認為中等 | |
| } | |
| noise1 = breed_noise_info.get(breed1, {}).get('noise_level', 'Unknown') | |
| noise2 = breed_noise_info.get(breed2, {}).get('noise_level', 'Unknown') | |
| # 獲取數值級別 | |
| level1 = noise_levels.get(noise1, 2) | |
| level2 = noise_levels.get(noise2, 2) | |
| # 計算差異並歸一化 | |
| difference = abs(level1 - level2) | |
| similarity = 1.0 - (difference / 2) # 最大差異是2,所以除以2來歸一化 | |
| return similarity | |
| def match_user_preference(self, description: str, top_n: int = 10) -> List[Dict]: | |
| """根據用戶描述匹配最適合的品種""" | |
| preferred_breed = self._detect_breed_preference(description) | |
| matches = [] | |
| if preferred_breed: | |
| similar_breeds = self.find_similar_breeds(preferred_breed, top_n=top_n) | |
| # 首先添加偏好品種 | |
| breed_info = next((breed for breed in self.dog_data if breed[1] == preferred_breed), None) | |
| if breed_info: | |
| health_score = self._calculate_health_score(preferred_breed) | |
| noise_info = breed_noise_info.get(preferred_breed, { | |
| "noise_level": "Unknown", | |
| "noise_notes": "No noise information available" | |
| }) | |
| # 偏好品種必定是最高分 | |
| matches.append({ | |
| 'breed': preferred_breed, | |
| 'score': 1.0, | |
| 'is_preferred': True, | |
| 'similarity': 1.0, | |
| 'health_score': health_score, | |
| 'noise_level': noise_info['noise_level'], | |
| 'reason': "Directly matched your preferred breed" | |
| }) | |
| # 添加相似品種 | |
| for breed_name, similarity in similar_breeds: | |
| if breed_name != preferred_breed: | |
| health_score = self._calculate_health_score(breed_name) | |
| noise_info = breed_noise_info.get(breed_name, { | |
| "noise_level": "Unknown", | |
| "noise_notes": "No noise information available" | |
| }) | |
| # 調整相似品種分數計算 | |
| base_similarity = similarity * 0.6 | |
| health_factor = health_score * 0.2 | |
| noise_factor = self._calculate_noise_similarity(preferred_breed, breed_name) * 0.2 | |
| # 確保相似品種分數不會超過偏好品種 | |
| final_score = min(0.95, base_similarity + health_factor + noise_factor) | |
| matches.append({ | |
| 'breed': breed_name, | |
| 'score': final_score, | |
| 'is_preferred': False, | |
| 'similarity': similarity, | |
| 'health_score': health_score, | |
| 'noise_level': noise_info['noise_level'], | |
| 'reason': f"Similar to {preferred_breed} in characteristics, health profile, and noise level" | |
| }) | |
| else: | |
| matches = self._general_matching(description, top_n) | |
| return sorted(matches, | |
| key=lambda x: (-int(x.get('is_preferred', False)), | |
| -x['score'], # 降序排列 | |
| x['breed']))[:top_n] | |
| def _detect_breed_preference(self, description: str) -> Optional[str]: | |
| """檢測用戶是否提到特定品種""" | |
| description_lower = description.lower() | |
| for breed_info in self.dog_data: | |
| breed_name = breed_info[1] | |
| normalized_breed = breed_name.lower().replace('_', ' ') | |
| if any(phrase in description_lower for phrase in [ | |
| f"love {normalized_breed}", | |
| f"like {normalized_breed}", | |
| f"prefer {normalized_breed}", | |
| f"want {normalized_breed}", | |
| normalized_breed | |
| ]): | |
| return breed_name | |
| return None | |