Edwin Salguero
Enhanced FRED ML with improved Reports & Insights page, fixed alignment analysis, and comprehensive analytics improvements
2469150
#!/usr/bin/env python3 | |
""" | |
Fixes Demonstration | |
Demonstrate the fixes applied to the economic analysis pipeline | |
""" | |
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
def create_test_data(): | |
"""Create test data to demonstrate fixes""" | |
# Create date range | |
dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q') | |
# Test data with the issues | |
data = { | |
'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900], # Billions | |
'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5], # Index | |
'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5], # Index | |
'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000], # Millions | |
'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27], # Decimal form | |
'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4] # Decimal form | |
} | |
df = pd.DataFrame(data, index=dates) | |
return df | |
def demonstrate_fixes(): | |
"""Demonstrate the fixes applied""" | |
print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n") | |
# Create test data | |
raw_data = create_test_data() | |
print("1. ORIGINAL DATA (with issues):") | |
print(raw_data.tail()) | |
print() | |
print("2. APPLYING FIXES:") | |
print() | |
# Fix 1: Unit Normalization | |
print("FIX 1: Unit Normalization") | |
print("-" * 30) | |
normalized_data = raw_data.copy() | |
# Apply unit fixes | |
normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000 # Billions to trillions | |
normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000 # Millions to billions | |
normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100 # Decimal to percentage | |
normalized_data['DGS10'] = raw_data['DGS10'] * 100 # Decimal to percentage | |
print("After unit normalization:") | |
print(normalized_data.tail()) | |
print() | |
# Fix 2: Growth Rate Calculation | |
print("FIX 2: Proper Growth Rate Calculation") | |
print("-" * 40) | |
growth_data = normalized_data.pct_change() * 100 | |
growth_data = growth_data.dropna() | |
print("Growth rates (percent change):") | |
print(growth_data.tail()) | |
print() | |
# Fix 3: Safe MAPE Calculation | |
print("FIX 3: Safe MAPE Calculation") | |
print("-" * 30) | |
# Test MAPE with problematic data | |
actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) | |
forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55]) | |
# Original MAPE (can fail) | |
try: | |
original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100 | |
print(f"Original MAPE: {original_mape:.2f}%") | |
except: | |
print("Original MAPE: ERROR (division by zero)") | |
# Fixed MAPE | |
denominator = np.maximum(np.abs(actual_problematic), 1e-5) | |
fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100 | |
print(f"Fixed MAPE: {fixed_mape:.2f}%") | |
print() | |
# Fix 4: Forecast Period Scaling | |
print("FIX 4: Forecast Period Scaling") | |
print("-" * 35) | |
base_periods = 4 | |
freq_scaling = {'D': 90, 'M': 3, 'Q': 1} | |
print("Original forecast_periods = 4") | |
print("Scaled by frequency:") | |
for freq, scale in freq_scaling.items(): | |
scaled = base_periods * scale | |
print(f" {freq} (daily): {base_periods} -> {scaled} periods") | |
print() | |
# Fix 5: Correlation Analysis with Normalized Data | |
print("FIX 5: Correlation Analysis with Normalized Data") | |
print("-" * 50) | |
# Original correlation (dominated by scale) | |
original_corr = raw_data.corr() | |
print("Original correlation (scale-dominated):") | |
print(original_corr.round(3)) | |
print() | |
# Fixed correlation (normalized) | |
fixed_corr = growth_data.corr() | |
print("Fixed correlation (normalized growth rates):") | |
print(fixed_corr.round(3)) | |
print() | |
# Fix 6: Data Quality Metrics | |
print("FIX 6: Enhanced Data Quality Metrics") | |
print("-" * 40) | |
# Calculate comprehensive quality metrics | |
quality_metrics = {} | |
for column in growth_data.columns: | |
series = growth_data[column].dropna() | |
quality_metrics[column] = { | |
'mean': series.mean(), | |
'std': series.std(), | |
'skewness': series.skew(), | |
'kurtosis': series.kurtosis(), | |
'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100 | |
} | |
print("Quality metrics for growth rates:") | |
for col, metrics in quality_metrics.items(): | |
print(f" {col}:") | |
print(f" Mean: {metrics['mean']:.4f}%") | |
print(f" Std: {metrics['std']:.4f}%") | |
print(f" Skewness: {metrics['skewness']:.4f}") | |
print(f" Kurtosis: {metrics['kurtosis']:.4f}") | |
print(f" Missing: {metrics['missing_pct']:.1f}%") | |
print() | |
# Summary of fixes | |
print("=== SUMMARY OF FIXES APPLIED ===") | |
print() | |
fixes = [ | |
"1. Unit Normalization:", | |
" • GDP: billions → trillions", | |
" • Retail Sales: millions → billions", | |
" • Interest Rates: decimal → percentage", | |
"", | |
"2. Growth Rate Calculation:", | |
" • Explicit percent change calculation", | |
" • Proper interpretation of results", | |
"", | |
"3. Safe MAPE Calculation:", | |
" • Added epsilon to prevent division by zero", | |
" • More robust error metrics", | |
"", | |
"4. Forecast Period Scaling:", | |
" • Scale periods by data frequency", | |
" • Appropriate horizons for different series", | |
"", | |
"5. Data Normalization:", | |
" • Z-score or growth rate normalization", | |
" • Prevents scale bias in correlations", | |
"", | |
"6. Stationarity Enforcement:", | |
" • ADF tests before causality analysis", | |
" • Differencing for non-stationary series", | |
"", | |
"7. Enhanced Error Handling:", | |
" • Robust missing data handling", | |
" • Graceful failure recovery", | |
"" | |
] | |
for fix in fixes: | |
print(fix) | |
print("=== IMPACT OF FIXES ===") | |
print() | |
impacts = [ | |
"• More accurate economic interpretations", | |
"• Proper scale comparisons between indicators", | |
"• Robust forecasting with appropriate horizons", | |
"• Reliable statistical tests and correlations", | |
"• Better error handling and data quality", | |
"• Consistent frequency alignment", | |
"• Safe mathematical operations" | |
] | |
for impact in impacts: | |
print(impact) | |
print() | |
print("These fixes address all the major math issues identified in the original analysis.") | |
if __name__ == "__main__": | |
demonstrate_fixes() |