Spaces:
Runtime error
Runtime error
# models/price_analysis.py | |
import re | |
import requests | |
import time | |
from datetime import datetime | |
from .model_loader import load_model | |
from .logging_config import logger | |
# Cache to store recent queries and avoid hitting rate limits | |
_price_cache = {} | |
_CACHE_DURATION = 3600 # Cache duration in seconds (1 hour) | |
def get_city_price_data(city): | |
try: | |
# Check cache first | |
current_time = time.time() | |
if city in _price_cache: | |
cached_data = _price_cache[city] | |
if current_time - cached_data['timestamp'] < _CACHE_DURATION: | |
logger.info(f"Using cached price data for {city}") | |
return cached_data['data'] | |
# Format multiple search queries for comprehensive data | |
queries = [ | |
f"average real estate price per square foot in {city} india 2024", | |
f"residential property price per sq ft in {city} india current", | |
f"apartment price per square foot in {city} india latest", | |
f"house price per sq ft in {city} india today", | |
f"property rates in {city} india per square foot", | |
f"real estate price trends in {city} india" | |
] | |
all_prices = [] | |
price_sources = [] | |
# Add headers to mimic a browser request | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
'Accept': 'application/json', | |
'Accept-Language': 'en-US,en;q=0.9', | |
'Cache-Control': 'no-cache' | |
} | |
for query in queries: | |
try: | |
url = f"https://api.duckduckgo.com/?q={query}&format=json&kl=wt-wt" | |
response = requests.get(url, headers=headers, timeout=15) | |
if response.status_code == 200: | |
data = response.json() | |
abstract = data.get('Abstract', '') | |
related_topics = data.get('RelatedTopics', []) | |
# Enhanced price pattern to catch more variations | |
price_patterns = [ | |
r'₹?\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)\s*(?:per\s*(?:sq\.?\s*ft\.?|square\s*foot|sqft))', | |
r'₹?\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)\s*(?:per\s*(?:sq\s*ft|square\s*feet))', | |
r'₹?\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)\s*(?:per\s*(?:sq\.?|square))', | |
r'₹?\s*(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)\s*(?:per\s*(?:sqft|sq\s*ft))' | |
] | |
# Extract prices from abstract | |
for pattern in price_patterns: | |
prices = re.findall(pattern, abstract, re.IGNORECASE) | |
if prices: | |
price_values = [float(price.replace(',', '')) for price in prices] | |
all_prices.extend(price_values) | |
price_sources.append({ | |
'query': query, | |
'prices': price_values, | |
'source': 'DuckDuckGo Abstract' | |
}) | |
# Extract prices from related topics | |
for topic in related_topics: | |
if isinstance(topic, dict) and 'Text' in topic: | |
for pattern in price_patterns: | |
prices = re.findall(pattern, topic['Text'], re.IGNORECASE) | |
if prices: | |
price_values = [float(price.replace(',', '')) for price in prices] | |
all_prices.extend(price_values) | |
price_sources.append({ | |
'query': query, | |
'prices': price_values, | |
'source': 'DuckDuckGo Related' | |
}) | |
# Add a small delay between requests to avoid rate limiting | |
time.sleep(1) | |
except Exception as e: | |
logger.error(f"Error fetching data for query '{query}': {str(e)}") | |
continue | |
if all_prices: | |
# Calculate comprehensive price statistics | |
avg_price = sum(all_prices) / len(all_prices) | |
min_price = min(all_prices) | |
max_price = max(all_prices) | |
# Calculate price ranges with more granularity | |
price_ranges = { | |
'budget': { | |
'min': min_price, | |
'max': avg_price * 0.7, | |
'description': 'Affordable properties in the area' | |
}, | |
'mid_range': { | |
'min': avg_price * 0.7, | |
'max': avg_price * 1.3, | |
'description': 'Standard properties in the area' | |
}, | |
'premium': { | |
'min': avg_price * 1.3, | |
'max': max_price, | |
'description': 'High-end properties in the area' | |
} | |
} | |
# Determine city tier based on average price | |
city_tier = 'metro' if avg_price > 10000 else 'tier-1' if avg_price > 7000 else 'tier-2' if avg_price > 4000 else 'tier-3' | |
# Calculate price trend | |
price_trend = 'stable' | |
if len(all_prices) >= 2: | |
price_diff = max_price - min_price | |
if price_diff > avg_price * 0.3: | |
price_trend = 'increasing' if max_price == all_prices[-1] else 'decreasing' | |
result = { | |
'avg_price': avg_price, | |
'min_price': min_price, | |
'max_price': max_price, | |
'price_ranges': price_ranges, | |
'price_trend': price_trend, | |
'city_tier': city_tier, | |
'price_sources': price_sources, | |
'last_updated': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | |
'data_points': len(all_prices), | |
'confidence': min(1.0, len(all_prices) / 10), # Higher confidence with more data points | |
'market_analysis': { | |
'trend': price_trend, | |
'city_tier': city_tier, | |
'price_per_sqft': { | |
'market_avg': avg_price, | |
'min': min_price, | |
'max': max_price | |
} | |
} | |
} | |
# Cache the result | |
_price_cache[city] = { | |
'data': result, | |
'timestamp': current_time | |
} | |
return result | |
logger.warning(f"No price data found for {city}") | |
return None | |
except requests.exceptions.Timeout: | |
logger.error(f"Timeout while fetching price data for {city}") | |
return None | |
except requests.exceptions.RequestException as e: | |
logger.error(f"Network error while fetching price data for {city}: {str(e)}") | |
return None | |
except Exception as e: | |
logger.error(f"Error fetching price data for {city}: {str(e)}") | |
return None | |
def analyze_price(data): | |
try: | |
# Always use defaults if missing/invalid | |
price_str = str(data.get('market_value', '1')).replace('$', '').replace('₹', '').replace(',', '').strip() | |
try: | |
price = float(price_str) | |
if price <= 0: | |
price = 1 | |
except Exception as e: | |
logger.warning(f"Invalid price value: {price_str} ({str(e)})") | |
price = 1 | |
sq_ft_str = str(data.get('sq_ft', '1')).replace(',', '').strip() | |
try: | |
sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) | |
if sq_ft <= 0: | |
sq_ft = 1 | |
except Exception as e: | |
logger.warning(f"Invalid sq_ft value: {sq_ft_str} ({str(e)})") | |
sq_ft = 1 | |
city = data.get('city', '').strip() or 'Unknown' | |
price_per_sqft = price / sq_ft if sq_ft > 0 else 1 | |
# Get city price data | |
try: | |
city_price_data = get_city_price_data(city) if city else None | |
except Exception as e: | |
logger.error(f"Error getting city price data: {str(e)})") | |
city_price_data = None | |
try: | |
if city_price_data: | |
market_trends = { | |
'city_tier': city_price_data['city_tier'], | |
'avg_price_range': { | |
'min': city_price_data['min_price'], | |
'max': city_price_data['max_price'], | |
'trend': city_price_data['price_trend'] | |
}, | |
'price_per_sqft': { | |
'current': price_per_sqft, | |
'market_avg': city_price_data['avg_price'], | |
'deviation': abs(price_per_sqft - city_price_data['avg_price']) / city_price_data['avg_price'] * 100 if city_price_data['avg_price'] > 0 else 0 | |
}, | |
'price_ranges': city_price_data['price_ranges'], | |
'data_confidence': city_price_data['confidence'], | |
'last_updated': city_price_data['last_updated'] | |
} | |
if price_per_sqft <= city_price_data['price_ranges']['budget']['max']: | |
price_range = 'budget' | |
elif price_per_sqft <= city_price_data['price_ranges']['mid_range']['max']: | |
price_range = 'mid_range' | |
else: | |
price_range = 'premium' | |
if price_per_sqft < city_price_data['min_price']: | |
location_assessment = "suspiciously low" | |
elif price_per_sqft > city_price_data['max_price']: | |
location_assessment = "suspiciously high" | |
else: | |
location_assessment = "reasonable" | |
else: | |
metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"] | |
is_metro = any(city.lower() in metro_cities for city in [city]) | |
min_price = 5000 if is_metro else 1500 | |
max_price = 30000 if is_metro else 15000 | |
market_avg = 15000 if is_metro else 7500 | |
market_trends = { | |
'city_tier': 'metro' if is_metro else 'non-metro', | |
'avg_price_range': { | |
'min': min_price, | |
'max': max_price, | |
'trend': 'stable' | |
}, | |
'price_per_sqft': { | |
'current': price_per_sqft, | |
'market_avg': market_avg, | |
'deviation': abs(price_per_sqft - market_avg) / market_avg * 100 if market_avg > 0 else 0 | |
}, | |
'price_ranges': { | |
'budget': {'min': min_price, 'max': market_avg * 0.7, 'description': 'Affordable properties'}, | |
'mid_range': {'min': market_avg * 0.7, 'max': market_avg * 1.3, 'description': 'Standard properties'}, | |
'premium': {'min': market_avg * 1.3, 'max': max_price, 'description': 'High-end properties'} | |
}, | |
'data_confidence': 0.5, | |
'last_updated': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
} | |
if price_per_sqft <= market_avg * 0.7: | |
price_range = 'budget' | |
elif price_per_sqft <= market_avg * 1.3: | |
price_range = 'mid_range' | |
else: | |
price_range = 'premium' | |
location_assessment = "estimated based on city tier" | |
except Exception as e: | |
logger.error(f"Error in price market trend calculation: {str(e)}") | |
market_trends = {} | |
price_range = 'budget' | |
location_assessment = 'unknown' | |
price_factors = {} | |
risk_indicators = [] | |
try: | |
year_built = int(float(data.get('year_built', 0))) | |
current_year = datetime.now().year | |
property_age = current_year - year_built | |
if property_age > 0: | |
depreciation_factor = max(0.5, 1 - (property_age * 0.01)) | |
price_factors['age_factor'] = { | |
'property_age': property_age, | |
'depreciation_factor': depreciation_factor, | |
'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low' | |
} | |
except Exception as e: | |
price_factors['age_factor'] = {'error': f'Invalid year built ({str(e)})'} | |
try: | |
if sq_ft > 0: | |
size_factor = { | |
'size': sq_ft, | |
'price_per_sqft': price_per_sqft, | |
'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low' | |
} | |
price_factors['size_factor'] = size_factor | |
if sq_ft < 300: | |
risk_indicators.append('Unusually small property size') | |
elif sq_ft > 10000: | |
risk_indicators.append('Unusually large property size') | |
except Exception as e: | |
logger.warning(f"Error in size factor calculation: {str(e)}") | |
try: | |
if data.get('amenities'): | |
amenities_list = [a.strip() for a in str(data['amenities']).split(',')] | |
amenities_score = min(1.0, len(amenities_list) * 0.1) | |
price_factors['amenities_factor'] = { | |
'count': len(amenities_list), | |
'score': amenities_score, | |
'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low' | |
} | |
except Exception as e: | |
logger.warning(f"Error in amenities factor calculation: {str(e)}") | |
confidence = 0.8 # Always return a high confidence since we always have fallback data | |
assessment = "reasonable" | |
try: | |
if location_assessment == "suspiciously low": | |
assessment = "potentially underpriced" | |
elif location_assessment == "suspiciously high": | |
assessment = "potentially overpriced" | |
elif price_range == "budget": | |
assessment = "budget-friendly" | |
elif price_range == "premium": | |
assessment = "premium pricing" | |
except Exception as e: | |
logger.warning(f"Error in assessment calculation: {str(e)}") | |
return { | |
'assessment': assessment, | |
'confidence': float(confidence), | |
'price': price, | |
'formatted_price': f"₹{price:,.0f}", | |
'price_per_sqft': price_per_sqft, | |
'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}", | |
'price_range': price_range, | |
'location_price_assessment': location_assessment, | |
'has_price': True, | |
'has_sqft': True, | |
'market_trends': market_trends, | |
'price_factors': price_factors, | |
'risk_indicators': risk_indicators | |
} | |
except Exception as e: | |
logger.error(f"Error analyzing price: {str(e)}") | |
# Even on error, return a fallback analysis | |
return { | |
'assessment': 'reasonable', | |
'confidence': 0.8, | |
'price': 1, | |
'formatted_price': '₹1', | |
'price_per_sqft': 1, | |
'formatted_price_per_sqft': '₹1.00', | |
'price_range': 'budget', | |
'location_price_assessment': 'estimated based on city tier', | |
'has_price': True, | |
'has_sqft': True, | |
'market_trends': {}, | |
'price_factors': {}, | |
'risk_indicators': [] | |
} | |