# models/price_analysis.py import re from .model_loader import load_model from .logging_config import logger def analyze_price(data): try: # Safely convert price to float price_str = str(data.get('market_value', '0')).replace('$', '').replace(',', '').strip() price = float(price_str) if price_str else 0 # Safely convert sq_ft to float sq_ft_str = str(data.get('sq_ft', '0')).replace(',', '').strip() sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) if sq_ft_str else 0 price_per_sqft = price / sq_ft if sq_ft else 0 if not price: return { 'assessment': 'no price', 'confidence': 0.0, 'price': 0, 'formatted_price': '₹0', 'price_per_sqft': 0, 'formatted_price_per_sqft': '₹0', 'price_range': 'unknown', 'location_price_assessment': 'cannot assess', 'has_price': False, 'market_trends': {}, 'price_factors': {}, 'risk_indicators': [] } # Use a more sophisticated model for price analysis classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli") # Create a detailed context for price analysis price_context = f""" Property Type: {data.get('property_type', '')} Location: {data.get('city', '')}, {data.get('state', '')} Size: {sq_ft} sq.ft. Price: ₹{price:,.2f} Price per sq.ft.: ₹{price_per_sqft:,.2f} Property Status: {data.get('status', '')} Year Built: {data.get('year_built', '')} Bedrooms: {data.get('bedrooms', '')} Bathrooms: {data.get('bathrooms', '')} Amenities: {data.get('amenities', '')} """ # Enhanced price categories with more specific indicators price_categories = [ "reasonable market price", "suspiciously low price", "suspiciously high price", "average market price", "luxury property price", "budget property price", "premium property price", "mid-range property price", "overpriced for location", "underpriced for location", "price matches amenities", "price matches property age", "price matches location value", "price matches property condition", "price matches market trends" ] # Analyze price with multiple aspects price_result = classifier(price_context, price_categories, multi_label=True) # Get top classifications with enhanced confidence calculation top_classifications = [] for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]): if score > 0.25: # Lower threshold for better sensitivity top_classifications.append({ 'classification': label, 'confidence': float(score) }) # Determine price range based on AI classification and market data price_range = 'unknown' if top_classifications: primary_class = top_classifications[0]['classification'] if 'luxury' in primary_class: price_range = 'luxury' elif 'premium' in primary_class: price_range = 'premium' elif 'mid-range' in primary_class: price_range = 'mid_range' elif 'budget' in primary_class: price_range = 'budget' # Enhanced location-specific price assessment location_assessment = "unknown" market_trends = {} if data.get('city') and price_per_sqft: city_lower = data['city'].lower() metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"] # Define price ranges for different city tiers if any(city in city_lower for city in metro_cities): market_trends = { 'city_tier': 'metro', 'avg_price_range': { 'min': 5000, 'max': 30000, 'trend': 'stable' }, 'price_per_sqft': { 'current': price_per_sqft, 'market_avg': 15000, 'deviation': abs(price_per_sqft - 15000) / 15000 * 100 } } location_assessment = ( "reasonable" if 5000 <= price_per_sqft <= 30000 else "suspiciously low" if price_per_sqft < 5000 else "suspiciously high" ) else: market_trends = { 'city_tier': 'non-metro', 'avg_price_range': { 'min': 1500, 'max': 15000, 'trend': 'stable' }, 'price_per_sqft': { 'current': price_per_sqft, 'market_avg': 7500, 'deviation': abs(price_per_sqft - 7500) / 7500 * 100 } } location_assessment = ( "reasonable" if 1500 <= price_per_sqft <= 15000 else "suspiciously low" if price_per_sqft < 1500 else "suspiciously high" ) # Enhanced price analysis factors price_factors = {} risk_indicators = [] # Property age factor try: year_built = int(data.get('year_built', 0)) current_year = datetime.now().year property_age = current_year - year_built if property_age > 0: depreciation_factor = max(0.5, 1 - (property_age * 0.01)) # 1% depreciation per year, min 50% price_factors['age_factor'] = { 'property_age': property_age, 'depreciation_factor': depreciation_factor, 'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low' } except: price_factors['age_factor'] = {'error': 'Invalid year built'} # Size factor if sq_ft > 0: size_factor = { 'size': sq_ft, 'price_per_sqft': price_per_sqft, 'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low' } price_factors['size_factor'] = size_factor # Add risk indicators based on size if sq_ft < 300: risk_indicators.append('Unusually small property size') elif sq_ft > 10000: risk_indicators.append('Unusually large property size') # Amenities factor if data.get('amenities'): amenities_list = [a.strip() for a in data['amenities'].split(',')] amenities_score = min(1.0, len(amenities_list) * 0.1) # 10% per amenity, max 100% price_factors['amenities_factor'] = { 'count': len(amenities_list), 'score': amenities_score, 'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low' } # Calculate overall confidence with weighted factors confidence_weights = { 'primary_classification': 0.3, 'location_assessment': 0.25, 'age_factor': 0.2, 'size_factor': 0.15, 'amenities_factor': 0.1 } confidence_scores = [] # Primary classification confidence if top_classifications: confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification']) # Location assessment confidence location_confidence = 0.8 if location_assessment == "reasonable" else 0.4 confidence_scores.append(location_confidence * confidence_weights['location_assessment']) # Age factor confidence if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']: age_confidence = price_factors['age_factor']['depreciation_factor'] confidence_scores.append(age_confidence * confidence_weights['age_factor']) # Size factor confidence if 'size_factor' in price_factors: size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6 confidence_scores.append(size_confidence * confidence_weights['size_factor']) # Amenities factor confidence if 'amenities_factor' in price_factors: amenities_confidence = price_factors['amenities_factor']['score'] confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor']) overall_confidence = sum(confidence_scores) / sum(confidence_weights.values()) return { 'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify', 'confidence': float(overall_confidence), 'price': price, 'formatted_price': f"₹{price:,.0f}", 'price_per_sqft': price_per_sqft, 'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}", 'price_range': price_range, 'location_price_assessment': location_assessment, 'has_price': True, 'market_trends': market_trends, 'price_factors': price_factors, 'risk_indicators': risk_indicators, 'top_classifications': top_classifications } except Exception as e: logger.error(f"Error analyzing price: {str(e)}") return { 'assessment': 'error', 'confidence': 0.0, 'price': 0, 'formatted_price': '₹0', 'price_per_sqft': 0, 'formatted_price_per_sqft': '₹0', 'price_range': 'unknown', 'location_price_assessment': 'error', 'has_price': False, 'market_trends': {}, 'price_factors': {}, 'risk_indicators': [], 'top_classifications': [] }