real_estate

Sleeping

App Files Files Community

real_estate / models /recommender_model.py

hardik8588

Update models/recommender_model.py

65271df verified 5 months ago

raw

history blame contribute delete

7.9 kB

	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.metrics.pairwise import cosine_similarity

	class PropertyRecommender:
	def __init__(self, data_path):
	"""Initialize the recommender with property data"""
	self.df = pd.read_csv(data_path)
	self.prepare_data()

	def prepare_data(self):
	"""Prepare and transform the data for recommendation"""
	# Extract amenities into separate columns
	amenities = self.df['Amenities'].str.split(', ', expand=True).stack()
	amenities = pd.get_dummies(amenities).groupby(level=0).sum()

	# Convert price to numeric
	self.df['Price'] = pd.to_numeric(self.df['Price (INR)'])

	# Create feature matrix
	self.features = pd.DataFrame()
	self.features['property_id'] = self.df['Property ID']

	# One-hot encode property type
	property_type = pd.get_dummies(self.df['Property Type'], prefix='type')
	self.features = pd.concat([self.features, property_type], axis=1)

	# Add location as one-hot encoded
	location = pd.get_dummies(self.df['Location'], prefix='location')
	self.features = pd.concat([self.features, location], axis=1)

	# Add bedrooms and bathrooms
	self.features['bedrooms'] = self.df['Bedrooms']
	self.features['bathrooms'] = self.df['Bathrooms']

	# Add square footage
	self.features['square_footage'] = self.df['Square Footage']

	# Add amenities
	self.features = pd.concat([self.features, amenities], axis=1)

	# Scale numerical features
	scaler = MinMaxScaler()
	numerical_cols = ['bedrooms', 'bathrooms', 'square_footage']
	self.features[numerical_cols] = scaler.fit_transform(self.features[numerical_cols])

	# Set property_id as index
	self.features.set_index('property_id', inplace=True)

	def get_recommendations(self, preferences, top_n=5):
	"""
	Get property recommendations based on user preferences

	Parameters:
	-----------
	preferences: dict
	Dictionary containing user preferences with keys:
	- propertyType: str
	- budget: str (price range)
	- location: str
	- bedrooms: str
	- amenities: list of str

	top_n: int
	Number of recommendations to return

	Returns:
	--------
	list of dict
	List of recommended properties with details
	"""
	import logging
	logger = logging.getLogger(__name__)
	# Create a user profile vector
	user_profile = pd.DataFrame(0, index=[0], columns=self.features.columns)
	logger.info(f"Total properties before filtering: {len(self.df)}")
	# Set bedroom preference
	bedrooms_val = preferences.get('bedrooms')
	if bedrooms_val and bedrooms_val not in ["Any", None, ""]:
	try:
	bedrooms = int(bedrooms_val)
	user_profile['bedrooms'] = bedrooms / 6 # Normalize
	except (ValueError, TypeError):
	pass

	logger.info(f"Properties after bedrooms filter: {len(self.df)}")

	# Set bathroom preference
	bathrooms_val = preferences.get('bathrooms')
	if bathrooms_val and bathrooms_val not in ["Any", None, ""]:
	try:
	bathrooms = int(bathrooms_val)
	user_profile['bathrooms'] = bathrooms / 4 # Normalize
	except (ValueError, TypeError):
	pass

	logger.info(f"Properties after bathrooms filter: {len(self.df)}")


	# Set property type preference
	property_type_val = preferences.get('propertyType')
	if property_type_val and property_type_val not in ["Any", None, ""]:
	type_col = f"type_{property_type_val.strip()}"
	if type_col in user_profile.columns:
	user_profile[type_col] = 1

	logger.info(f"Properties after property type filter: {len(self.df)}")

	# Set location preference
	location_val = preferences.get('location')
	if location_val and location_val not in ["Any", None, ""]:
	location_col = f"location_{location_val.strip()}"
	if location_col in user_profile.columns:
	user_profile[location_col] = 1

	logger.info(f"Properties after location filter: {len(self.df)}")

	# Set amenities preferences
	if preferences.get('amenities'):
	for amenity in preferences['amenities']:
	if amenity in user_profile.columns:
	user_profile[amenity] = 1

	logger.info(f"Properties after amenities filter: {len(self.df)}")

	# Calculate similarity scores
	similarity = cosine_similarity(user_profile, self.features)
	similarity_scores = similarity[0]
	logger.info(f"Top similarity scores: {sorted(similarity_scores, reverse=True)[:10]}")
	indices = np.argsort(similarity_scores)[::-1][:top_n*2]
	top_indices = [self.features.index[i] for i in indices]
	logger.info(f"Top indices before budget filter: {top_indices}")

	# Filter by budget if provided
	if preferences.get('budget'):
	try:
	budget_range = preferences['budget'].split('-')
	min_budget = float(budget_range[0].strip())
	max_budget = float(budget_range[1].strip()) if len(budget_range) > 1 else float('inf')
	budget_filtered_indices = []
	for property_id in top_indices:
	price = self.df.loc[self.df['Property ID'] == property_id, 'Price (INR)'].values[0]
	if min_budget <= price <= max_budget:
	budget_filtered_indices.append(property_id)
	logger.info(f"Top indices after budget filter: {budget_filtered_indices}")
	# If budget filter removes all, fallback to top N by similarity
	if not budget_filtered_indices:
	logger.info("No properties matched budget filter, returning top N by similarity instead.")
	budget_filtered_indices = top_indices[:top_n]
	top_indices = budget_filtered_indices[:top_n]
	except (ValueError, IndexError):
	top_indices = top_indices[:top_n]
	else:
	top_indices = top_indices[:top_n]
	logger.info(f"Final recommended indices: {top_indices}")

	logger.info(f"Properties after budget filter: {len(self.df)}")

	# Get recommended properties
	recommendations = []
	for property_id in top_indices:
	property_data = self.df.loc[self.df['Property ID'] == property_id].iloc[0]

	recommendations.append({
	'id': str(property_id),
	'title': f"{str(property_data['Property Type'])} in {str(property_data['Location'])}",
	'type': str(property_data['Property Type']),
	'price': int(property_data['Price (INR)']),
	'location': str(property_data['Location']),
	'bedrooms': int(property_data['Bedrooms']),
	'bathrooms': int(property_data['Bathrooms']),
	'squareFeet': int(property_data['Square Footage']),
	'amenities': [str(a) for a in property_data['Amenities'].split(', ')],
	'description': str(property_data['Property Description']),
	'yearBuilt': str(property_data['Year Built']) if 'Year Built' in property_data else 'N/A'
	})

	return recommendations