FREDML / backup /redundant_files /test_fixes_demonstration.py
Edwin Salguero
Enhanced FRED ML with improved Reports & Insights page, fixed alignment analysis, and comprehensive analytics improvements
2469150
#!/usr/bin/env python3
"""
Fixes Demonstration
Demonstrate the fixes applied to the economic analysis pipeline
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
def create_test_data():
"""Create test data to demonstrate fixes"""
# Create date range
dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q')
# Test data with the issues
data = {
'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900], # Billions
'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5], # Index
'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5], # Index
'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000], # Millions
'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27], # Decimal form
'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4] # Decimal form
}
df = pd.DataFrame(data, index=dates)
return df
def demonstrate_fixes():
"""Demonstrate the fixes applied"""
print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n")
# Create test data
raw_data = create_test_data()
print("1. ORIGINAL DATA (with issues):")
print(raw_data.tail())
print()
print("2. APPLYING FIXES:")
print()
# Fix 1: Unit Normalization
print("FIX 1: Unit Normalization")
print("-" * 30)
normalized_data = raw_data.copy()
# Apply unit fixes
normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000 # Billions to trillions
normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000 # Millions to billions
normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100 # Decimal to percentage
normalized_data['DGS10'] = raw_data['DGS10'] * 100 # Decimal to percentage
print("After unit normalization:")
print(normalized_data.tail())
print()
# Fix 2: Growth Rate Calculation
print("FIX 2: Proper Growth Rate Calculation")
print("-" * 40)
growth_data = normalized_data.pct_change() * 100
growth_data = growth_data.dropna()
print("Growth rates (percent change):")
print(growth_data.tail())
print()
# Fix 3: Safe MAPE Calculation
print("FIX 3: Safe MAPE Calculation")
print("-" * 30)
# Test MAPE with problematic data
actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55])
# Original MAPE (can fail)
try:
original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100
print(f"Original MAPE: {original_mape:.2f}%")
except:
print("Original MAPE: ERROR (division by zero)")
# Fixed MAPE
denominator = np.maximum(np.abs(actual_problematic), 1e-5)
fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100
print(f"Fixed MAPE: {fixed_mape:.2f}%")
print()
# Fix 4: Forecast Period Scaling
print("FIX 4: Forecast Period Scaling")
print("-" * 35)
base_periods = 4
freq_scaling = {'D': 90, 'M': 3, 'Q': 1}
print("Original forecast_periods = 4")
print("Scaled by frequency:")
for freq, scale in freq_scaling.items():
scaled = base_periods * scale
print(f" {freq} (daily): {base_periods} -> {scaled} periods")
print()
# Fix 5: Correlation Analysis with Normalized Data
print("FIX 5: Correlation Analysis with Normalized Data")
print("-" * 50)
# Original correlation (dominated by scale)
original_corr = raw_data.corr()
print("Original correlation (scale-dominated):")
print(original_corr.round(3))
print()
# Fixed correlation (normalized)
fixed_corr = growth_data.corr()
print("Fixed correlation (normalized growth rates):")
print(fixed_corr.round(3))
print()
# Fix 6: Data Quality Metrics
print("FIX 6: Enhanced Data Quality Metrics")
print("-" * 40)
# Calculate comprehensive quality metrics
quality_metrics = {}
for column in growth_data.columns:
series = growth_data[column].dropna()
quality_metrics[column] = {
'mean': series.mean(),
'std': series.std(),
'skewness': series.skew(),
'kurtosis': series.kurtosis(),
'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100
}
print("Quality metrics for growth rates:")
for col, metrics in quality_metrics.items():
print(f" {col}:")
print(f" Mean: {metrics['mean']:.4f}%")
print(f" Std: {metrics['std']:.4f}%")
print(f" Skewness: {metrics['skewness']:.4f}")
print(f" Kurtosis: {metrics['kurtosis']:.4f}")
print(f" Missing: {metrics['missing_pct']:.1f}%")
print()
# Summary of fixes
print("=== SUMMARY OF FIXES APPLIED ===")
print()
fixes = [
"1. Unit Normalization:",
" • GDP: billions → trillions",
" • Retail Sales: millions → billions",
" • Interest Rates: decimal → percentage",
"",
"2. Growth Rate Calculation:",
" • Explicit percent change calculation",
" • Proper interpretation of results",
"",
"3. Safe MAPE Calculation:",
" • Added epsilon to prevent division by zero",
" • More robust error metrics",
"",
"4. Forecast Period Scaling:",
" • Scale periods by data frequency",
" • Appropriate horizons for different series",
"",
"5. Data Normalization:",
" • Z-score or growth rate normalization",
" • Prevents scale bias in correlations",
"",
"6. Stationarity Enforcement:",
" • ADF tests before causality analysis",
" • Differencing for non-stationary series",
"",
"7. Enhanced Error Handling:",
" • Robust missing data handling",
" • Graceful failure recovery",
""
]
for fix in fixes:
print(fix)
print("=== IMPACT OF FIXES ===")
print()
impacts = [
"• More accurate economic interpretations",
"• Proper scale comparisons between indicators",
"• Robust forecasting with appropriate horizons",
"• Reliable statistical tests and correlations",
"• Better error handling and data quality",
"• Consistent frequency alignment",
"• Safe mathematical operations"
]
for impact in impacts:
print(impact)
print()
print("These fixes address all the major math issues identified in the original analysis.")
if __name__ == "__main__":
demonstrate_fixes()