real_estate / models /recommender_model.py
hardik8588's picture
Update models/recommender_model.py
65271df verified
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
class PropertyRecommender:
def __init__(self, data_path):
"""Initialize the recommender with property data"""
self.df = pd.read_csv(data_path)
self.prepare_data()
def prepare_data(self):
"""Prepare and transform the data for recommendation"""
# Extract amenities into separate columns
amenities = self.df['Amenities'].str.split(', ', expand=True).stack()
amenities = pd.get_dummies(amenities).groupby(level=0).sum()
# Convert price to numeric
self.df['Price'] = pd.to_numeric(self.df['Price (INR)'])
# Create feature matrix
self.features = pd.DataFrame()
self.features['property_id'] = self.df['Property ID']
# One-hot encode property type
property_type = pd.get_dummies(self.df['Property Type'], prefix='type')
self.features = pd.concat([self.features, property_type], axis=1)
# Add location as one-hot encoded
location = pd.get_dummies(self.df['Location'], prefix='location')
self.features = pd.concat([self.features, location], axis=1)
# Add bedrooms and bathrooms
self.features['bedrooms'] = self.df['Bedrooms']
self.features['bathrooms'] = self.df['Bathrooms']
# Add square footage
self.features['square_footage'] = self.df['Square Footage']
# Add amenities
self.features = pd.concat([self.features, amenities], axis=1)
# Scale numerical features
scaler = MinMaxScaler()
numerical_cols = ['bedrooms', 'bathrooms', 'square_footage']
self.features[numerical_cols] = scaler.fit_transform(self.features[numerical_cols])
# Set property_id as index
self.features.set_index('property_id', inplace=True)
def get_recommendations(self, preferences, top_n=5):
"""
Get property recommendations based on user preferences
Parameters:
-----------
preferences: dict
Dictionary containing user preferences with keys:
- propertyType: str
- budget: str (price range)
- location: str
- bedrooms: str
- amenities: list of str
top_n: int
Number of recommendations to return
Returns:
--------
list of dict
List of recommended properties with details
"""
import logging
logger = logging.getLogger(__name__)
# Create a user profile vector
user_profile = pd.DataFrame(0, index=[0], columns=self.features.columns)
logger.info(f"Total properties before filtering: {len(self.df)}")
# Set bedroom preference
bedrooms_val = preferences.get('bedrooms')
if bedrooms_val and bedrooms_val not in ["Any", None, ""]:
try:
bedrooms = int(bedrooms_val)
user_profile['bedrooms'] = bedrooms / 6 # Normalize
except (ValueError, TypeError):
pass
logger.info(f"Properties after bedrooms filter: {len(self.df)}")
# Set bathroom preference
bathrooms_val = preferences.get('bathrooms')
if bathrooms_val and bathrooms_val not in ["Any", None, ""]:
try:
bathrooms = int(bathrooms_val)
user_profile['bathrooms'] = bathrooms / 4 # Normalize
except (ValueError, TypeError):
pass
logger.info(f"Properties after bathrooms filter: {len(self.df)}")
# Set property type preference
property_type_val = preferences.get('propertyType')
if property_type_val and property_type_val not in ["Any", None, ""]:
type_col = f"type_{property_type_val.strip()}"
if type_col in user_profile.columns:
user_profile[type_col] = 1
logger.info(f"Properties after property type filter: {len(self.df)}")
# Set location preference
location_val = preferences.get('location')
if location_val and location_val not in ["Any", None, ""]:
location_col = f"location_{location_val.strip()}"
if location_col in user_profile.columns:
user_profile[location_col] = 1
logger.info(f"Properties after location filter: {len(self.df)}")
# Set amenities preferences
if preferences.get('amenities'):
for amenity in preferences['amenities']:
if amenity in user_profile.columns:
user_profile[amenity] = 1
logger.info(f"Properties after amenities filter: {len(self.df)}")
# Calculate similarity scores
similarity = cosine_similarity(user_profile, self.features)
similarity_scores = similarity[0]
logger.info(f"Top similarity scores: {sorted(similarity_scores, reverse=True)[:10]}")
indices = np.argsort(similarity_scores)[::-1][:top_n*2]
top_indices = [self.features.index[i] for i in indices]
logger.info(f"Top indices before budget filter: {top_indices}")
# Filter by budget if provided
if preferences.get('budget'):
try:
budget_range = preferences['budget'].split('-')
min_budget = float(budget_range[0].strip())
max_budget = float(budget_range[1].strip()) if len(budget_range) > 1 else float('inf')
budget_filtered_indices = []
for property_id in top_indices:
price = self.df.loc[self.df['Property ID'] == property_id, 'Price (INR)'].values[0]
if min_budget <= price <= max_budget:
budget_filtered_indices.append(property_id)
logger.info(f"Top indices after budget filter: {budget_filtered_indices}")
# If budget filter removes all, fallback to top N by similarity
if not budget_filtered_indices:
logger.info("No properties matched budget filter, returning top N by similarity instead.")
budget_filtered_indices = top_indices[:top_n]
top_indices = budget_filtered_indices[:top_n]
except (ValueError, IndexError):
top_indices = top_indices[:top_n]
else:
top_indices = top_indices[:top_n]
logger.info(f"Final recommended indices: {top_indices}")
logger.info(f"Properties after budget filter: {len(self.df)}")
# Get recommended properties
recommendations = []
for property_id in top_indices:
property_data = self.df.loc[self.df['Property ID'] == property_id].iloc[0]
recommendations.append({
'id': str(property_id),
'title': f"{str(property_data['Property Type'])} in {str(property_data['Location'])}",
'type': str(property_data['Property Type']),
'price': int(property_data['Price (INR)']),
'location': str(property_data['Location']),
'bedrooms': int(property_data['Bedrooms']),
'bathrooms': int(property_data['Bathrooms']),
'squareFeet': int(property_data['Square Footage']),
'amenities': [str(a) for a in property_data['Amenities'].split(', ')],
'description': str(property_data['Property Description']),
'yearBuilt': str(property_data['Year Built']) if 'Year Built' in property_data else 'N/A'
})
return recommendations