File size: 7,370 Bytes
099d8d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python3
"""
Fixes Demonstration
Demonstrate the fixes applied to the economic analysis pipeline
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def create_test_data():
    """Create test data to demonstrate fixes"""
    
    # Create date range
    dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q')
    
    # Test data with the issues
    data = {
        'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900],  # Billions
        'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5],  # Index
        'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5],  # Index
        'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000],  # Millions
        'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27],  # Decimal form
        'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4]  # Decimal form
    }
    
    df = pd.DataFrame(data, index=dates)
    return df

def demonstrate_fixes():
    """Demonstrate the fixes applied"""
    
    print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n")
    
    # Create test data
    raw_data = create_test_data()
    
    print("1. ORIGINAL DATA (with issues):")
    print(raw_data.tail())
    print()
    
    print("2. APPLYING FIXES:")
    print()
    
    # Fix 1: Unit Normalization
    print("FIX 1: Unit Normalization")
    print("-" * 30)
    
    normalized_data = raw_data.copy()
    
    # Apply unit fixes
    normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000  # Billions to trillions
    normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000  # Millions to billions
    normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100  # Decimal to percentage
    normalized_data['DGS10'] = raw_data['DGS10'] * 100  # Decimal to percentage
    
    print("After unit normalization:")
    print(normalized_data.tail())
    print()
    
    # Fix 2: Growth Rate Calculation
    print("FIX 2: Proper Growth Rate Calculation")
    print("-" * 40)
    
    growth_data = normalized_data.pct_change() * 100
    growth_data = growth_data.dropna()
    
    print("Growth rates (percent change):")
    print(growth_data.tail())
    print()
    
    # Fix 3: Safe MAPE Calculation
    print("FIX 3: Safe MAPE Calculation")
    print("-" * 30)
    
    # Test MAPE with problematic data
    actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
    forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55])
    
    # Original MAPE (can fail)
    try:
        original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100
        print(f"Original MAPE: {original_mape:.2f}%")
    except:
        print("Original MAPE: ERROR (division by zero)")
    
    # Fixed MAPE
    denominator = np.maximum(np.abs(actual_problematic), 1e-5)
    fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100
    print(f"Fixed MAPE: {fixed_mape:.2f}%")
    print()
    
    # Fix 4: Forecast Period Scaling
    print("FIX 4: Forecast Period Scaling")
    print("-" * 35)
    
    base_periods = 4
    freq_scaling = {'D': 90, 'M': 3, 'Q': 1}
    
    print("Original forecast_periods = 4")
    print("Scaled by frequency:")
    for freq, scale in freq_scaling.items():
        scaled = base_periods * scale
        print(f"  {freq} (daily): {base_periods} -> {scaled} periods")
    print()
    
    # Fix 5: Correlation Analysis with Normalized Data
    print("FIX 5: Correlation Analysis with Normalized Data")
    print("-" * 50)
    
    # Original correlation (dominated by scale)
    original_corr = raw_data.corr()
    print("Original correlation (scale-dominated):")
    print(original_corr.round(3))
    print()
    
    # Fixed correlation (normalized)
    fixed_corr = growth_data.corr()
    print("Fixed correlation (normalized growth rates):")
    print(fixed_corr.round(3))
    print()
    
    # Fix 6: Data Quality Metrics
    print("FIX 6: Enhanced Data Quality Metrics")
    print("-" * 40)
    
    # Calculate comprehensive quality metrics
    quality_metrics = {}
    
    for column in growth_data.columns:
        series = growth_data[column].dropna()
        
        quality_metrics[column] = {
            'mean': series.mean(),
            'std': series.std(),
            'skewness': series.skew(),
            'kurtosis': series.kurtosis(),
            'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100
        }
    
    print("Quality metrics for growth rates:")
    for col, metrics in quality_metrics.items():
        print(f"  {col}:")
        print(f"    Mean: {metrics['mean']:.4f}%")
        print(f"    Std: {metrics['std']:.4f}%")
        print(f"    Skewness: {metrics['skewness']:.4f}")
        print(f"    Kurtosis: {metrics['kurtosis']:.4f}")
        print(f"    Missing: {metrics['missing_pct']:.1f}%")
        print()
    
    # Summary of fixes
    print("=== SUMMARY OF FIXES APPLIED ===")
    print()
    
    fixes = [
        "1. Unit Normalization:",
        "   • GDP: billions → trillions",
        "   • Retail Sales: millions → billions", 
        "   • Interest Rates: decimal → percentage",
        "",
        "2. Growth Rate Calculation:",
        "   • Explicit percent change calculation",
        "   • Proper interpretation of results",
        "",
        "3. Safe MAPE Calculation:",
        "   • Added epsilon to prevent division by zero",
        "   • More robust error metrics",
        "",
        "4. Forecast Period Scaling:",
        "   • Scale periods by data frequency",
        "   • Appropriate horizons for different series",
        "",
        "5. Data Normalization:",
        "   • Z-score or growth rate normalization",
        "   • Prevents scale bias in correlations",
        "",
        "6. Stationarity Enforcement:",
        "   • ADF tests before causality analysis",
        "   • Differencing for non-stationary series",
        "",
        "7. Enhanced Error Handling:",
        "   • Robust missing data handling",
        "   • Graceful failure recovery",
        ""
    ]
    
    for fix in fixes:
        print(fix)
    
    print("=== IMPACT OF FIXES ===")
    print()
    
    impacts = [
        "• More accurate economic interpretations",
        "• Proper scale comparisons between indicators", 
        "• Robust forecasting with appropriate horizons",
        "• Reliable statistical tests and correlations",
        "• Better error handling and data quality",
        "• Consistent frequency alignment",
        "• Safe mathematical operations"
    ]
    
    for impact in impacts:
        print(impact)
    
    print()
    print("These fixes address all the major math issues identified in the original analysis.")

if __name__ == "__main__":
    demonstrate_fixes()