Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import MinMaxScaler | |
from sklearn.metrics.pairwise import cosine_similarity | |
class PropertyRecommender: | |
def __init__(self, data_path): | |
"""Initialize the recommender with property data""" | |
self.df = pd.read_csv(data_path) | |
self.prepare_data() | |
def prepare_data(self): | |
"""Prepare and transform the data for recommendation""" | |
# Extract amenities into separate columns | |
amenities = self.df['Amenities'].str.split(', ', expand=True).stack() | |
amenities = pd.get_dummies(amenities).groupby(level=0).sum() | |
# Convert price to numeric | |
self.df['Price'] = pd.to_numeric(self.df['Price (INR)']) | |
# Create feature matrix | |
self.features = pd.DataFrame() | |
self.features['property_id'] = self.df['Property ID'] | |
# One-hot encode property type | |
property_type = pd.get_dummies(self.df['Property Type'], prefix='type') | |
self.features = pd.concat([self.features, property_type], axis=1) | |
# Add location as one-hot encoded | |
location = pd.get_dummies(self.df['Location'], prefix='location') | |
self.features = pd.concat([self.features, location], axis=1) | |
# Add bedrooms and bathrooms | |
self.features['bedrooms'] = self.df['Bedrooms'] | |
self.features['bathrooms'] = self.df['Bathrooms'] | |
# Add square footage | |
self.features['square_footage'] = self.df['Square Footage'] | |
# Add amenities | |
self.features = pd.concat([self.features, amenities], axis=1) | |
# Scale numerical features | |
scaler = MinMaxScaler() | |
numerical_cols = ['bedrooms', 'bathrooms', 'square_footage'] | |
self.features[numerical_cols] = scaler.fit_transform(self.features[numerical_cols]) | |
# Set property_id as index | |
self.features.set_index('property_id', inplace=True) | |
def get_recommendations(self, preferences, top_n=5): | |
""" | |
Get property recommendations based on user preferences | |
Parameters: | |
----------- | |
preferences: dict | |
Dictionary containing user preferences with keys: | |
- propertyType: str | |
- budget: str (price range) | |
- location: str | |
- bedrooms: str | |
- amenities: list of str | |
top_n: int | |
Number of recommendations to return | |
Returns: | |
-------- | |
list of dict | |
List of recommended properties with details | |
""" | |
import logging | |
logger = logging.getLogger(__name__) | |
# Create a user profile vector | |
user_profile = pd.DataFrame(0, index=[0], columns=self.features.columns) | |
logger.info(f"Total properties before filtering: {len(self.df)}") | |
# Set bedroom preference | |
bedrooms_val = preferences.get('bedrooms') | |
if bedrooms_val and bedrooms_val not in ["Any", None, ""]: | |
try: | |
bedrooms = int(bedrooms_val) | |
user_profile['bedrooms'] = bedrooms / 6 # Normalize | |
except (ValueError, TypeError): | |
pass | |
logger.info(f"Properties after bedrooms filter: {len(self.df)}") | |
# Set bathroom preference | |
bathrooms_val = preferences.get('bathrooms') | |
if bathrooms_val and bathrooms_val not in ["Any", None, ""]: | |
try: | |
bathrooms = int(bathrooms_val) | |
user_profile['bathrooms'] = bathrooms / 4 # Normalize | |
except (ValueError, TypeError): | |
pass | |
logger.info(f"Properties after bathrooms filter: {len(self.df)}") | |
# Set property type preference | |
property_type_val = preferences.get('propertyType') | |
if property_type_val and property_type_val not in ["Any", None, ""]: | |
type_col = f"type_{property_type_val.strip()}" | |
if type_col in user_profile.columns: | |
user_profile[type_col] = 1 | |
logger.info(f"Properties after property type filter: {len(self.df)}") | |
# Set location preference | |
location_val = preferences.get('location') | |
if location_val and location_val not in ["Any", None, ""]: | |
location_col = f"location_{location_val.strip()}" | |
if location_col in user_profile.columns: | |
user_profile[location_col] = 1 | |
logger.info(f"Properties after location filter: {len(self.df)}") | |
# Set amenities preferences | |
if preferences.get('amenities'): | |
for amenity in preferences['amenities']: | |
if amenity in user_profile.columns: | |
user_profile[amenity] = 1 | |
logger.info(f"Properties after amenities filter: {len(self.df)}") | |
# Calculate similarity scores | |
similarity = cosine_similarity(user_profile, self.features) | |
similarity_scores = similarity[0] | |
logger.info(f"Top similarity scores: {sorted(similarity_scores, reverse=True)[:10]}") | |
indices = np.argsort(similarity_scores)[::-1][:top_n*2] | |
top_indices = [self.features.index[i] for i in indices] | |
logger.info(f"Top indices before budget filter: {top_indices}") | |
# Filter by budget if provided | |
if preferences.get('budget'): | |
try: | |
budget_range = preferences['budget'].split('-') | |
min_budget = float(budget_range[0].strip()) | |
max_budget = float(budget_range[1].strip()) if len(budget_range) > 1 else float('inf') | |
budget_filtered_indices = [] | |
for property_id in top_indices: | |
price = self.df.loc[self.df['Property ID'] == property_id, 'Price (INR)'].values[0] | |
if min_budget <= price <= max_budget: | |
budget_filtered_indices.append(property_id) | |
logger.info(f"Top indices after budget filter: {budget_filtered_indices}") | |
# If budget filter removes all, fallback to top N by similarity | |
if not budget_filtered_indices: | |
logger.info("No properties matched budget filter, returning top N by similarity instead.") | |
budget_filtered_indices = top_indices[:top_n] | |
top_indices = budget_filtered_indices[:top_n] | |
except (ValueError, IndexError): | |
top_indices = top_indices[:top_n] | |
else: | |
top_indices = top_indices[:top_n] | |
logger.info(f"Final recommended indices: {top_indices}") | |
logger.info(f"Properties after budget filter: {len(self.df)}") | |
# Get recommended properties | |
recommendations = [] | |
for property_id in top_indices: | |
property_data = self.df.loc[self.df['Property ID'] == property_id].iloc[0] | |
recommendations.append({ | |
'id': str(property_id), | |
'title': f"{str(property_data['Property Type'])} in {str(property_data['Location'])}", | |
'type': str(property_data['Property Type']), | |
'price': int(property_data['Price (INR)']), | |
'location': str(property_data['Location']), | |
'bedrooms': int(property_data['Bedrooms']), | |
'bathrooms': int(property_data['Bathrooms']), | |
'squareFeet': int(property_data['Square Footage']), | |
'amenities': [str(a) for a in property_data['Amenities'].split(', ')], | |
'description': str(property_data['Property Description']), | |
'yearBuilt': str(property_data['Year Built']) if 'Year Built' in property_data else 'N/A' | |
}) | |
return recommendations |