FREDML

File size: 7,370 Bytes

099d8d9

#!/usr/bin/env python3
"""
Fixes Demonstration
Demonstrate the fixes applied to the economic analysis pipeline
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def create_test_data():
    """Create test data to demonstrate fixes"""
    
    # Create date range
    dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q')
    
    # Test data with the issues
    data = {
        'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900],  # Billions
        'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5],  # Index
        'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5],  # Index
        'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000],  # Millions
        'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27],  # Decimal form
        'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4]  # Decimal form
    }
    
    df = pd.DataFrame(data, index=dates)
    return df

def demonstrate_fixes():
    """Demonstrate the fixes applied"""
    
    print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n")
    
    # Create test data
    raw_data = create_test_data()
    
    print("1. ORIGINAL DATA (with issues):")
    print(raw_data.tail())
    print()
    
    print("2. APPLYING FIXES:")
    print()
    
    # Fix 1: Unit Normalization
    print("FIX 1: Unit Normalization")
    print("-" * 30)
    
    normalized_data = raw_data.copy()
    
    # Apply unit fixes
    normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000  # Billions to trillions
    normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000  # Millions to billions
    normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100  # Decimal to percentage
    normalized_data['DGS10'] = raw_data['DGS10'] * 100  # Decimal to percentage
    
    print("After unit normalization:")
    print(normalized_data.tail())
    print()
    
    # Fix 2: Growth Rate Calculation
    print("FIX 2: Proper Growth Rate Calculation")
    print("-" * 40)
    
    growth_data = normalized_data.pct_change() * 100
    growth_data = growth_data.dropna()
    
    print("Growth rates (percent change):")
    print(growth_data.tail())
    print()
    
    # Fix 3: Safe MAPE Calculation
    print("FIX 3: Safe MAPE Calculation")
    print("-" * 30)
    
    # Test MAPE with problematic data
    actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
    forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55])
    
    # Original MAPE (can fail)
    try:
        original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100
        print(f"Original MAPE: {original_mape:.2f}%")
    except:
        print("Original MAPE: ERROR (division by zero)")
    
    # Fixed MAPE
    denominator = np.maximum(np.abs(actual_problematic), 1e-5)
    fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100
    print(f"Fixed MAPE: {fixed_mape:.2f}%")
    print()
    
    # Fix 4: Forecast Period Scaling
    print("FIX 4: Forecast Period Scaling")
    print("-" * 35)
    
    base_periods = 4
    freq_scaling = {'D': 90, 'M': 3, 'Q': 1}
    
    print("Original forecast_periods = 4")
    print("Scaled by frequency:")
    for freq, scale in freq_scaling.items():
        scaled = base_periods * scale
        print(f"  {freq} (daily): {base_periods} -> {scaled} periods")
    print()
    
    # Fix 5: Correlation Analysis with Normalized Data
    print("FIX 5: Correlation Analysis with Normalized Data")
    print("-" * 50)
    
    # Original correlation (dominated by scale)
    original_corr = raw_data.corr()
    print("Original correlation (scale-dominated):")
    print(original_corr.round(3))
    print()
    
    # Fixed correlation (normalized)
    fixed_corr = growth_data.corr()
    print("Fixed correlation (normalized growth rates):")
    print(fixed_corr.round(3))
    print()
    
    # Fix 6: Data Quality Metrics
    print("FIX 6: Enhanced Data Quality Metrics")
    print("-" * 40)
    
    # Calculate comprehensive quality metrics
    quality_metrics = {}
    
    for column in growth_data.columns:
        series = growth_data[column].dropna()
        
        quality_metrics[column] = {
            'mean': series.mean(),
            'std': series.std(),
            'skewness': series.skew(),
            'kurtosis': series.kurtosis(),
            'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100
        }
    
    print("Quality metrics for growth rates:")
    for col, metrics in quality_metrics.items():
        print(f"  {col}:")
        print(f"    Mean: {metrics['mean']:.4f}%")
        print(f"    Std: {metrics['std']:.4f}%")
        print(f"    Skewness: {metrics['skewness']:.4f}")
        print(f"    Kurtosis: {metrics['kurtosis']:.4f}")
        print(f"    Missing: {metrics['missing_pct']:.1f}%")
        print()
    
    # Summary of fixes
    print("=== SUMMARY OF FIXES APPLIED ===")
    print()
    
    fixes = [
        "1. Unit Normalization:",
        "   • GDP: billions → trillions",
        "   • Retail Sales: millions → billions", 
        "   • Interest Rates: decimal → percentage",
        "",
        "2. Growth Rate Calculation:",
        "   • Explicit percent change calculation",
        "   • Proper interpretation of results",
        "",
        "3. Safe MAPE Calculation:",
        "   • Added epsilon to prevent division by zero",
        "   • More robust error metrics",
        "",
        "4. Forecast Period Scaling:",
        "   • Scale periods by data frequency",
        "   • Appropriate horizons for different series",
        "",
        "5. Data Normalization:",
        "   • Z-score or growth rate normalization",
        "   • Prevents scale bias in correlations",
        "",
        "6. Stationarity Enforcement:",
        "   • ADF tests before causality analysis",
        "   • Differencing for non-stationary series",
        "",
        "7. Enhanced Error Handling:",
        "   • Robust missing data handling",
        "   • Graceful failure recovery",
        ""
    ]
    
    for fix in fixes:
        print(fix)
    
    print("=== IMPACT OF FIXES ===")
    print()
    
    impacts = [
        "• More accurate economic interpretations",
        "• Proper scale comparisons between indicators", 
        "• Robust forecasting with appropriate horizons",
        "• Reliable statistical tests and correlations",
        "• Better error handling and data quality",
        "• Consistent frequency alignment",
        "• Safe mathematical operations"
    ]
    
    for impact in impacts:
        print(impact)
    
    print()
    print("These fixes address all the major math issues identified in the original analysis.")

if __name__ == "__main__":
    demonstrate_fixes()