Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.cluster import KMeans | |
from sklearn.preprocessing import StandardScaler | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import io | |
import base64 | |
from datetime import datetime | |
import json | |
class MarketAnalysisModel: | |
def __init__(self, data_path=None): | |
"""Initialize the market analysis model with modern data analysis capabilities""" | |
self.df = None | |
self.neighborhoods = [] | |
self.latest_data = None | |
self.trends_cache = {} # Cache for performance | |
if data_path: | |
self.load_data(data_path) | |
def load_data(self, data_path): | |
"""Load and prepare market data with enhanced preprocessing""" | |
try: | |
self.df = pd.read_csv(data_path) | |
# Convert time period to datetime for time series analysis | |
self.df['Time Period'] = pd.to_datetime(self.df['Time Period']) | |
# Ensure numeric columns are properly typed | |
numeric_cols = ['Median Home Price', 'Number of Sales', 'Days on Market', | |
'Price per Square Foot', 'Inventory Levels', 'Year-over-Year Price Change'] | |
for col in numeric_cols: | |
self.df[col] = pd.to_numeric(self.df[col], errors='coerce') | |
# Fill any missing values with appropriate methods | |
self.df['Median Home Price'].fillna(self.df['Median Home Price'].median(), inplace=True) | |
self.df['Number of Sales'].fillna(self.df['Number of Sales'].median(), inplace=True) | |
self.df['Days on Market'].fillna(self.df['Days on Market'].median(), inplace=True) | |
self.df['Price per Square Foot'].fillna(self.df['Price per Square Foot'].median(), inplace=True) | |
self.df['Inventory Levels'].fillna(self.df['Inventory Levels'].median(), inplace=True) | |
self.df['Year-over-Year Price Change'].fillna(0, inplace=True) | |
# Sort by neighborhood and time | |
self.df = self.df.sort_values(['Neighborhood', 'Time Period']) | |
# Store unique neighborhoods | |
self.neighborhoods = self.df['Neighborhood'].unique().tolist() | |
# Pre-compute latest data for each neighborhood | |
self.latest_data = self.df.loc[self.df.groupby('Neighborhood')['Time Period'].idxmax()] | |
print(f"Successfully loaded data with {len(self.df)} records and {len(self.neighborhoods)} neighborhoods") | |
except Exception as e: | |
print(f"Error loading data: {str(e)}") | |
# Create minimal dataframe if loading fails | |
self.df = pd.DataFrame({ | |
'Neighborhood': ['Default'], | |
'Time Period': [datetime.now()], | |
'Median Home Price': [10000000], | |
'Number of Sales': [100], | |
'Days on Market': [30], | |
'Price per Square Foot': [8000], | |
'Inventory Levels': [200], | |
'Year-over-Year Price Change': [5.0] | |
}) | |
self.neighborhoods = ['Default'] | |
self.latest_data = self.df.copy() | |
def get_market_trends(self, location=None, months=12): | |
""" | |
Get comprehensive market trends data with modern analytics | |
Parameters: | |
----------- | |
location: str, optional | |
Filter trends by neighborhood | |
months: int, optional | |
Number of months to analyze | |
Returns: | |
-------- | |
dict | |
Dictionary with structured market trends data ready for frontend | |
""" | |
# Create cache key | |
cache_key = f"{location}_{months}" | |
if cache_key in self.trends_cache: | |
return self.trends_cache[cache_key] | |
try: | |
if self.df is None or self.df.empty: | |
raise ValueError("Data not loaded or empty") | |
# Improved location handling | |
print(f"Analyzing location: {location}, available neighborhoods: {self.neighborhoods[:5]}...") | |
# Filter by location if provided | |
if location and location in self.neighborhoods: | |
filtered_data = self.df[self.df['Neighborhood'] == location].copy() | |
location_latest = self.latest_data[self.latest_data['Neighborhood'] == location].copy() | |
print(f"Found data for {location}: {len(filtered_data)} records") | |
elif location: | |
# Try case-insensitive match | |
matching_neighborhoods = [n for n in self.neighborhoods if n.lower() == location.lower()] | |
if matching_neighborhoods: | |
matched_location = matching_neighborhoods[0] | |
print(f"Found case-insensitive match: {matched_location}") | |
filtered_data = self.df[self.df['Neighborhood'] == matched_location].copy() | |
location_latest = self.latest_data[self.latest_data['Neighborhood'] == matched_location].copy() | |
else: | |
print(f"Location '{location}' not found in data, using all data") | |
filtered_data = self.df.copy() | |
location_latest = self.latest_data.copy() | |
else: | |
filtered_data = self.df.copy() | |
location_latest = self.latest_data.copy() | |
if filtered_data.empty: | |
raise ValueError(f"No data available for location: {location}") | |
# Get the most recent data for time series analysis | |
latest_date = filtered_data['Time Period'].max() | |
start_date = latest_date - pd.DateOffset(months=months) | |
recent_data = filtered_data[filtered_data['Time Period'] >= start_date].copy() | |
if recent_data.empty: | |
recent_data = filtered_data.tail(min(months, len(filtered_data))).copy() | |
# Calculate market metrics | |
market_metrics = self._calculate_market_metrics(filtered_data, location_latest, recent_data) | |
# Identify hot neighborhoods | |
hot_neighborhoods = self._identify_hot_neighborhoods(location) | |
# Generate insights | |
insights = self._generate_insights(location_latest, recent_data, location) | |
# Generate charts | |
charts = self._generate_charts(recent_data, location) | |
# Compile the complete response | |
response = { | |
"marketTrends": market_metrics, | |
"hotNeighborhoods": hot_neighborhoods, | |
"insights": insights, | |
"charts": charts | |
} | |
# Cache the result | |
self.trends_cache[cache_key] = response | |
return response | |
except Exception as e: | |
print(f"Error in get_market_trends: {str(e)}") | |
# Return fallback data | |
return self._get_fallback_data(location) | |
def _calculate_market_metrics(self, data, latest_data, recent_data): | |
"""Calculate key market metrics with trend analysis""" | |
try: | |
# Add better error handling for timestamps | |
try: | |
# Calculate period-over-period changes | |
if len(recent_data) >= 2: | |
# Get the most recent and second most recent periods | |
sorted_periods = recent_data['Time Period'].sort_values(ascending=False).unique() | |
if len(sorted_periods) >= 2: | |
current_period_data = recent_data[recent_data['Time Period'] == sorted_periods[0]] | |
previous_period_data = recent_data[recent_data['Time Period'] == sorted_periods[1]] | |
# Group metrics by neighborhood for the current period | |
current_metrics = current_period_data.groupby('Neighborhood').agg({ | |
'Median Home Price': 'mean', | |
'Number of Sales': 'sum', | |
'Days on Market': 'mean', | |
'Price per Square Foot': 'mean', | |
'Inventory Levels': 'mean', | |
'Year-over-Year Price Change': 'mean' | |
}).reset_index() | |
# Group metrics by neighborhood for the previous period | |
previous_metrics = previous_period_data.groupby('Neighborhood').agg({ | |
'Median Home Price': 'mean', | |
'Number of Sales': 'sum', | |
'Days on Market': 'mean', | |
'Price per Square Foot': 'mean', | |
'Inventory Levels': 'mean' | |
}).reset_index() | |
# Calculate changes | |
metrics_with_changes = pd.merge(current_metrics, previous_metrics, | |
on='Neighborhood', suffixes=('', '_prev')) | |
else: | |
# Not enough unique time periods | |
raise ValueError(f"Not enough unique time periods in data") | |
else: | |
# If not enough data, use the latest data with default changes | |
metrics_with_changes = latest_data.copy() | |
metrics_with_changes['price_change'] = metrics_with_changes['Year-over-Year Price Change'] | |
metrics_with_changes['sales_change'] = 0.0 | |
metrics_with_changes['dom_change'] = 0.0 | |
metrics_with_changes['ppsf_change'] = 0.0 | |
metrics_with_changes['inventory_change'] = 0.0 | |
except Exception as time_error: | |
print(f"Error processing time periods: {str(time_error)}") | |
# Instead of hard fallback, return what data we can from the most recent period | |
current_metrics = data.groupby('Neighborhood').agg({ | |
'Median Home Price': 'mean', | |
'Number of Sales': 'sum', | |
'Days on Market': 'mean', | |
'Price per Square Foot': 'mean', | |
'Inventory Levels': 'mean', | |
'Year-over-Year Price Change': 'mean' | |
}).reset_index() | |
current_metrics['price_change'] = current_metrics['Year-over-Year Price Change'] | |
current_metrics['sales_change'] = 0.0 | |
current_metrics['dom_change'] = 0.0 | |
current_metrics['ppsf_change'] = 0.0 | |
current_metrics['inventory_change'] = 0.0 | |
metrics_with_changes = current_metrics | |
metrics_with_changes['price_change'] = ((metrics_with_changes['Median Home Price'] - | |
metrics_with_changes['Median Home Price_prev']) / | |
metrics_with_changes['Median Home Price_prev'] * 100) | |
metrics_with_changes['sales_change'] = ((metrics_with_changes['Number of Sales'] - | |
metrics_with_changes['Number of Sales_prev']) / | |
metrics_with_changes['Number of Sales_prev'] * 100) | |
metrics_with_changes['dom_change'] = ((metrics_with_changes['Days on Market'] - | |
metrics_with_changes['Days on Market_prev']) / | |
metrics_with_changes['Days on Market_prev'] * 100) | |
metrics_with_changes['ppsf_change'] = ((metrics_with_changes['Price per Square Foot'] - | |
metrics_with_changes['Price per Square Foot_prev']) / | |
metrics_with_changes['Price per Square Foot_prev'] * 100) | |
metrics_with_changes['inventory_change'] = ((metrics_with_changes['Inventory Levels'] - | |
metrics_with_changes['Inventory Levels_prev']) / | |
metrics_with_changes['Inventory Levels_prev'] * 100) | |
else: | |
# If not enough data, use the latest data with default changes | |
metrics_with_changes = latest_data.copy() | |
metrics_with_changes['price_change'] = metrics_with_changes['Year-over-Year Price Change'] | |
metrics_with_changes['sales_change'] = 0.0 | |
metrics_with_changes['dom_change'] = 0.0 | |
metrics_with_changes['ppsf_change'] = 0.0 | |
metrics_with_changes['inventory_change'] = 0.0 | |
# Calculate averages across neighborhoods if needed | |
if len(metrics_with_changes) > 1: | |
avg_metrics = metrics_with_changes.mean(numeric_only=True) | |
else: | |
avg_metrics = metrics_with_changes.iloc[0] if not metrics_with_changes.empty else pd.Series({ | |
'Median Home Price': 10000000, | |
'Number of Sales': 100, | |
'Days on Market': 30, | |
'Price per Square Foot': 8000, | |
'Inventory Levels': 200, | |
'Year-over-Year Price Change': 5.0, | |
'price_change': 5.0, | |
'sales_change': 0.0, | |
'dom_change': 0.0, | |
'ppsf_change': 5.0, | |
'inventory_change': 0.0 | |
}) | |
# Format the metrics for the frontend | |
market_trends = [ | |
{ | |
"metric": "Median Home Price", | |
"value": float(avg_metrics['Median Home Price']), | |
"change": float(avg_metrics['price_change']), | |
"isPositive": float(avg_metrics['price_change']) > 0 | |
}, | |
{ | |
"metric": "Number of Sales", | |
"value": int(avg_metrics['Number of Sales']), | |
"change": float(avg_metrics['sales_change']), | |
"isPositive": float(avg_metrics['sales_change']) > 0 | |
}, | |
{ | |
"metric": "Days on Market", | |
"value": int(avg_metrics['Days on Market']), | |
"change": float(avg_metrics['dom_change']), | |
"isPositive": float(avg_metrics['dom_change']) < 0 # Lower is better for DOM | |
}, | |
{ | |
"metric": "Price per Square Foot", | |
"value": float(avg_metrics['Price per Square Foot']), | |
"change": float(avg_metrics['ppsf_change']), | |
"isPositive": float(avg_metrics['ppsf_change']) > 0 | |
}, | |
{ | |
"metric": "Inventory Levels", | |
"value": int(avg_metrics['Inventory Levels']), | |
"change": float(avg_metrics['inventory_change']), | |
"isPositive": float(avg_metrics['inventory_change']) < 0 # Lower inventory typically means seller's market | |
}, | |
{ | |
"metric": "Year-over-Year Price Change", | |
"value": float(avg_metrics['Year-over-Year Price Change']), | |
"change": float(avg_metrics['Year-over-Year Price Change']), | |
"isPositive": float(avg_metrics['Year-over-Year Price Change']) > 0 | |
} | |
] | |
return market_trends | |
except Exception as e: | |
print(f"Error calculating market metrics: {str(e)}") | |
# Return fallback metrics | |
return [ | |
{"metric": "Median Home Price", "value": 12500000, "change": 5.2, "isPositive": True}, | |
{"metric": "Number of Sales", "value": 245, "change": -2.8, "isPositive": False}, | |
{"metric": "Days on Market", "value": 32, "change": -15.8, "isPositive": True}, | |
{"metric": "Price per Square Foot", "value": 9800, "change": 3.5, "isPositive": True}, | |
{"metric": "Inventory Levels", "value": 320, "change": 8.2, "isPositive": False}, | |
{"metric": "Year-over-Year Price Change", "value": 5.2, "change": 5.2, "isPositive": True} | |
] | |
def _identify_hot_neighborhoods(self, location=None): | |
"""Identify hot neighborhoods using advanced clustering and scoring""" | |
try: | |
if location: | |
# If location is specified, return similar neighborhoods | |
return self._find_similar_neighborhoods(location) | |
# Use the latest data for each neighborhood | |
latest_data = self.latest_data.copy() | |
if len(latest_data) <= 1: | |
return self._get_fallback_neighborhoods() | |
# Select features for clustering | |
features = latest_data[['Median Home Price', 'Days on Market', 'Year-over-Year Price Change', | |
'Price per Square Foot', 'Inventory Levels']] | |
# Scale features | |
scaler = StandardScaler() | |
scaled_features = scaler.fit_transform(features) | |
# Use KMeans to identify clusters | |
n_clusters = min(3, len(latest_data)) | |
kmeans = KMeans(n_clusters=n_clusters, random_state=42) | |
latest_data['Cluster'] = kmeans.fit_predict(scaled_features) | |
# Create a scoring system for "hotness" | |
latest_data['HotScore'] = ( | |
latest_data['Year-over-Year Price Change'] * 0.4 + # Higher price growth is better | |
(100 - latest_data['Days on Market']) * 0.3 + # Lower days on market is better | |
latest_data['Price per Square Foot'] / 1000 * 0.2 - # Higher price per sq ft is better | |
latest_data['Inventory Levels'] / 100 * 0.1 # Lower inventory is better (seller's market) | |
) | |
# Sort by hot score and get top neighborhoods | |
hot_neighborhoods = latest_data.sort_values('HotScore', ascending=False).head(5) | |
# Format for frontend | |
return [ | |
{ | |
"name": row['Neighborhood'], | |
"growth": f"{row['Year-over-Year Price Change']:.1f}%", | |
"medianPrice": float(row['Median Home Price']), | |
"pricePerSqFt": float(row['Price per Square Foot']) | |
} for _, row in hot_neighborhoods.iterrows() | |
] | |
except Exception as e: | |
print(f"Error identifying hot neighborhoods: {str(e)}") | |
return self._get_fallback_neighborhoods() | |
def _find_similar_neighborhoods(self, target_location): | |
"""Find neighborhoods similar to the target location""" | |
try: | |
if target_location not in self.neighborhoods: | |
return self._get_fallback_neighborhoods() | |
# Get the latest data for the target location | |
target_data = self.latest_data[self.latest_data['Neighborhood'] == target_location].iloc[0] | |
# Calculate similarity scores for all neighborhoods | |
similarity_scores = [] | |
for _, row in self.latest_data.iterrows(): | |
if row['Neighborhood'] == target_location: | |
continue | |
# Calculate Euclidean distance on normalized values | |
price_diff = abs(row['Median Home Price'] - target_data['Median Home Price']) / target_data['Median Home Price'] | |
dom_diff = abs(row['Days on Market'] - target_data['Days on Market']) / max(1, target_data['Days on Market']) | |
ppsf_diff = abs(row['Price per Square Foot'] - target_data['Price per Square Foot']) / target_data['Price per Square Foot'] | |
# Lower score means more similar | |
similarity = 1 / (1 + price_diff + dom_diff + ppsf_diff) | |
similarity_scores.append({ | |
'Neighborhood': row['Neighborhood'], | |
'Similarity': similarity, | |
'Median Home Price': row['Median Home Price'], | |
'Year-over-Year Price Change': row['Year-over-Year Price Change'], | |
'Price per Square Foot': row['Price per Square Foot'] | |
}) | |
# Sort by similarity and get top 5 | |
similar_neighborhoods = sorted(similarity_scores, key=lambda x: x['Similarity'], reverse=True)[:5] | |
# Format for frontend | |
return [ | |
{ | |
"name": n['Neighborhood'], | |
"growth": f"{n['Year-over-Year Price Change']:.1f}%", | |
"medianPrice": float(n['Median Home Price']), | |
"pricePerSqFt": float(n['Price per Square Foot']) | |
} for n in similar_neighborhoods | |
] | |
except Exception as e: | |
print(f"Error finding similar neighborhoods: {str(e)}") | |
return self._get_fallback_neighborhoods() | |
def _generate_insights(self, latest_data, recent_data, location=None): | |
"""Generate data-driven insights with natural language processing""" | |
try: | |
insights = [] | |
# Make a copy of recent_data to avoid SettingWithCopyWarning | |
recent_data_copy = recent_data.copy() | |
# Overall market insight | |
if location: | |
location_data = latest_data[latest_data['Neighborhood'] == location] | |
if not location_data.empty: | |
avg_price_change = location_data['Year-over-Year Price Change'].mean() | |
avg_price = location_data['Median Home Price'].mean() | |
avg_dom = location_data['Days on Market'].mean() | |
insights.append(f"{location} real estate has shown {abs(avg_price_change):.1f}% " | |
f"{'growth' if avg_price_change > 0 else 'decline'} in the past year.") | |
if avg_dom < 30: | |
insights.append(f"Properties in {location} are selling quickly, averaging just {avg_dom:.0f} days on market.") | |
elif avg_dom > 60: | |
insights.append(f"Properties in {location} are taking longer to sell, averaging {avg_dom:.0f} days on market.") | |
# Price trend analysis | |
if len(recent_data_copy) >= 3: | |
location_recent = recent_data_copy[recent_data_copy['Neighborhood'] == location] | |
if not location_recent.empty: | |
price_trend = location_recent['Median Home Price'].pct_change().mean() * 100 | |
if abs(price_trend) > 1: | |
insights.append(f"Monthly price trend in {location} shows a {abs(price_trend):.1f}% " | |
f"{'increase' if price_trend > 0 else 'decrease'} on average.") | |
else: | |
# Overall market insights | |
avg_price_change = latest_data['Year-over-Year Price Change'].mean() | |
if avg_price_change > 5: | |
insights.append(f"The Delhi real estate market is showing strong growth with prices increasing {avg_price_change:.1f}% year-over-year.") | |
elif avg_price_change > 0: | |
insights.append(f"The Delhi real estate market is stable with modest price appreciation of {avg_price_change:.1f}%.") | |
else: | |
insights.append(f"The Delhi real estate market is experiencing a slight correction with prices decreasing {abs(avg_price_change):.1f}% year-over-year.") | |
# Identify neighborhoods with exceptional growth | |
high_growth = latest_data[latest_data['Year-over-Year Price Change'] > 7].sort_values('Year-over-Year Price Change', ascending=False) | |
if not high_growth.empty: | |
top_growth = high_growth.iloc[0] | |
insights.append(f"{top_growth['Neighborhood']} is showing exceptional growth with prices up {top_growth['Year-over-Year Price Change']:.1f}% year-over-year.") | |
# Identify neighborhoods with quick sales | |
quick_sales = latest_data[latest_data['Days on Market'] < 30].sort_values('Days on Market') | |
if not quick_sales.empty: | |
top_quick = quick_sales.iloc[0] | |
insights.append(f"Properties in {top_quick['Neighborhood']} are selling quickly, with an average of just {top_quick['Days on Market']:.0f} days on market.") | |
# Price per square foot analysis | |
high_ppsf = latest_data.sort_values('Price per Square Foot', ascending=False).iloc[0] | |
insights.append(f"{high_ppsf['Neighborhood']} commands the highest price per square foot at ₹{high_ppsf['Price per Square Foot']:,.0f}.") | |
# Seasonal analysis if we have enough data | |
if len(recent_data_copy) >= 6: | |
# Fix the SettingWithCopyWarning by using .loc | |
recent_data_copy.loc[:, 'Month'] = recent_data_copy['Time Period'].dt.month | |
monthly_avg = recent_data_copy.groupby('Month')['Median Home Price'].mean() | |
if max(monthly_avg) > min(monthly_avg) * 1.05: # 5% difference | |
high_month = monthly_avg.idxmax() | |
low_month = monthly_avg.idxmin() | |
month_names = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', | |
7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December'} | |
insights.append(f"Seasonal analysis shows prices tend to be higher in {month_names[high_month]} and lower in {month_names[low_month]}.") | |
# Limit to top 5 insights | |
return insights[:5] | |
except Exception as e: | |
print(f"Error generating insights: {str(e)}") | |
return [ | |
"The Delhi real estate market has shown strong resilience with a 5.2% increase in median home prices.", | |
"Luxury properties in South Delhi continue to appreciate faster than other segments.", | |
"Inventory levels have increased by 8.2%, indicating a potential shift towards a buyer's market.", | |
"Properties in Vasant Kunj are selling 15% faster than the market average." | |
] | |
def _generate_charts(self, data, location=None): | |
"""Generate modern, interactive charts for data visualization""" | |
try: | |
charts = {} | |
# Filter data by location if specified | |
if location: | |
chart_data = data[data['Neighborhood'] == location] | |
if chart_data.empty: | |
chart_data = data | |
else: | |
chart_data = data | |
# Set a modern style for plots | |
plt.style.use('ggplot') | |
# Price trend chart | |
charts['priceTrend'] = self._create_price_trend_chart(chart_data) | |
# Inventory chart | |
charts['inventory'] = self._create_inventory_chart(chart_data) | |
# Price distribution chart | |
charts['priceDistribution'] = self._create_price_distribution_chart(chart_data) | |
# Days on market trend | |
charts['daysOnMarket'] = self._create_dom_chart(chart_data) | |
return charts | |
except Exception as e: | |
print(f"Error generating charts: {str(e)}") | |
return {} | |
def _create_price_trend_chart(self, data): | |
"""Create a price trend chart with improved styling""" | |
plt.figure(figsize=(10, 6)) | |
plt.clf() | |
# Group by time period and neighborhood | |
if 'Neighborhood' in data.columns and len(data['Neighborhood'].unique()) > 1: | |
for neighborhood in data['Neighborhood'].unique(): | |
neighborhood_data = data[data['Neighborhood'] == neighborhood] | |
if not neighborhood_data.empty: | |
plt.plot(neighborhood_data['Time Period'], | |
neighborhood_data['Median Home Price'], | |
marker='o', markersize=4, | |
label=neighborhood) | |
else: | |
# If only one neighborhood or no neighborhood column | |
plt.plot(data['Time Period'], data['Median Home Price'], | |
marker='o', markersize=4, color='#1f77b4') | |
plt.title('Median Home Price Trends', fontsize=14, fontweight='bold') | |
plt.xlabel('Date', fontsize=12) | |
plt.ylabel('Price (₹)', fontsize=12) | |
plt.grid(True, alpha=0.3) | |
plt.tight_layout() | |
if 'Neighborhood' in data.columns and len(data['Neighborhood'].unique()) > 1: | |
plt.legend(fontsize=10) | |
# Format y-axis with commas for thousands | |
plt.gca().get_yaxis().set_major_formatter(plt.matplotlib.ticker.StrMethodFormatter('{x:,.0f}')) | |
# Save plot to a bytes buffer | |
buffer = io.BytesIO() | |
plt.savefig(buffer, format='png', dpi=100) | |
buffer.seek(0) | |
plt.close() | |
# Encode the image to base64 string | |
image_png = buffer.getvalue() | |
buffer.close() | |
return base64.b64encode(image_png).decode('utf-8') | |
def _create_inventory_chart(self, data): | |
"""Create an inventory levels chart with improved styling""" | |
plt.figure(figsize=(10, 6)) | |
plt.clf() | |
# Group by time period and neighborhood | |
if 'Neighborhood' in data.columns and len(data['Neighborhood'].unique()) > 1: | |
for neighborhood in data['Neighborhood'].unique(): | |
neighborhood_data = data[data['Neighborhood'] == neighborhood] | |
if not neighborhood_data.empty: | |
plt.plot(neighborhood_data['Time Period'], | |
neighborhood_data['Inventory Levels'], | |
marker='o', markersize=4, | |
label=neighborhood) | |
else: | |
# If only one neighborhood or no neighborhood column | |
plt.plot(data['Time Period'], data['Inventory Levels'], | |
marker='o', markersize=4, color='#ff7f0e') | |
plt.title('Inventory Level Trends', fontsize=14, fontweight='bold') | |
plt.xlabel('Date', fontsize=12) | |
plt.ylabel('Inventory', fontsize=12) | |
plt.grid(True, alpha=0.3) | |
plt.tight_layout() | |
if 'Neighborhood' in data.columns and len(data['Neighborhood'].unique()) > 1: | |
plt.legend(fontsize=10) | |
# Save plot to a bytes buffer | |
buffer = io.BytesIO() | |
plt.savefig(buffer, format='png', dpi=100) | |
buffer.seek(0) | |
plt.close() | |
# Encode the image to base64 string | |
image_png = buffer.getvalue() | |
buffer.close() | |
return base64.b64encode(image_png).decode('utf-8') | |
def _create_price_distribution_chart(self, data): | |
"""Create a price distribution chart""" | |
plt.figure(figsize=(10, 6)) | |
plt.clf() | |
# Create a histogram of prices | |
sns.histplot(data['Median Home Price'] / 1000000, bins=15, kde=True) | |
plt.title('Distribution of Home Prices', fontsize=14, fontweight='bold') | |
plt.xlabel('Price (Million ₹)', fontsize=12) | |
plt.ylabel('Frequency', fontsize=12) | |
plt.grid(True, alpha=0.3) | |
plt.tight_layout() | |
# Save plot to a bytes buffer | |
buffer = io.BytesIO() | |
plt.savefig(buffer, format='png', dpi=100) | |
buffer.seek(0) | |
plt.close() | |
# Encode the image to base64 string | |
image_png = buffer.getvalue() | |
buffer.close() | |
return base64.b64encode(image_png).decode('utf-8') | |
def _create_dom_chart(self, data): | |
"""Create a days on market trend chart""" | |
plt.figure(figsize=(10, 6)) | |
plt.clf() | |
# Group by time period and neighborhood | |
if 'Neighborhood' in data.columns and len(data['Neighborhood'].unique()) > 1: | |
for neighborhood in data['Neighborhood'].unique(): | |
neighborhood_data = data[data['Neighborhood'] == neighborhood] | |
if not neighborhood_data.empty: | |
plt.plot(neighborhood_data['Time Period'], | |
neighborhood_data['Days on Market'], | |
marker='o', markersize=4, | |
label=neighborhood) | |
else: | |
# If only one neighborhood or no neighborhood column | |
plt.plot(data['Time Period'], data['Days on Market'], | |
marker='o', markersize=4, color='#2ca02c') | |
plt.title('Days on Market Trends', fontsize=14, fontweight='bold') | |
plt.xlabel('Date', fontsize=12) | |
plt.ylabel('Days', fontsize=12) | |
plt.grid(True, alpha=0.3) | |
plt.tight_layout() | |
if 'Neighborhood' in data.columns and len(data['Neighborhood'].unique()) > 1: | |
plt.legend(fontsize=10) | |
# Save plot to a bytes buffer | |
buffer = io.BytesIO() | |
plt.savefig(buffer, format='png', dpi=100) | |
buffer.seek(0) | |
plt.close() | |
# Encode the image to base64 string | |
image_png = buffer.getvalue() | |
buffer.close() | |
return base64.b64encode(image_png).decode('utf-8') | |
def _get_fallback_data(self, location=None): | |
"""Return fallback data if an error occurs""" | |
location_text = f" in {location}" if location else "" | |
return { | |
"marketTrends": [ | |
{"metric": "Median Home Price", "value": 12500000, "change": 5.2, "isPositive": True}, | |
{"metric": "Number of Sales", "value": 245, "change": -2.8, "isPositive": False}, | |
{"metric": "Days on Market", "value": 32, "change": -15.8, "isPositive": True}, | |
{"metric": "Price per Square Foot", "value": 9800, "change": 3.5, "isPositive": True}, | |
{"metric": "Inventory Levels", "value": 320, "change": 8.2, "isPositive": False}, | |
{"metric": "Year-over-Year Price Change", "value": 5.2, "change": 5.2, "isPositive": True} | |
], | |
"hotNeighborhoods": self._get_fallback_neighborhoods(), | |
"insights": [ | |
f"The Delhi real estate market{location_text} has shown strong resilience with a 5.2% increase in median home prices.", | |
f"Luxury properties{location_text} continue to appreciate faster than other segments.", | |
f"Inventory levels{location_text} have increased by 8.2%, indicating a potential shift towards a buyer's market.", | |
f"Properties in Vasant Kunj are selling 15% faster than the market average." | |
], | |
"charts": {} | |
} | |
def _get_fallback_neighborhoods(self): | |
"""Return fallback neighborhood data""" | |
return [ | |
{"name": "Vasant Kunj", "growth": "8.5%", "medianPrice": 15800000, "pricePerSqFt": 12500}, | |
{"name": "Greater Kailash", "growth": "7.2%", "medianPrice": 18500000, "pricePerSqFt": 14200}, | |
{"name": "Dwarka", "growth": "6.8%", "medianPrice": 9800000, "pricePerSqFt": 8500}, | |
{"name": "Saket", "growth": "6.2%", "medianPrice": 14200000, "pricePerSqFt": 11800}, | |
{"name": "Rohini", "growth": "5.9%", "medianPrice": 8500000, "pricePerSqFt": 7800} | |
] |