FREDML / test_fixes_demonstration.py

Edwin Salguero

Initial commit after git-lfs re-init and bugfixes

099d8d9 about 2 months ago

7.37 kB

	#!/usr/bin/env python3
	"""
	Fixes Demonstration
	Demonstrate the fixes applied to the economic analysis pipeline
	"""

	import pandas as pd
	import numpy as np
	from datetime import datetime, timedelta

	def create_test_data():
	"""Create test data to demonstrate fixes"""

	# Create date range
	dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q')

	# Test data with the issues
	data = {
	'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900], # Billions
	'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5], # Index
	'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5], # Index
	'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000], # Millions
	'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27], # Decimal form
	'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4] # Decimal form
	}

	df = pd.DataFrame(data, index=dates)
	return df

	def demonstrate_fixes():
	"""Demonstrate the fixes applied"""

	print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n")

	# Create test data
	raw_data = create_test_data()

	print("1. ORIGINAL DATA (with issues):")
	print(raw_data.tail())
	print()

	print("2. APPLYING FIXES:")
	print()

	# Fix 1: Unit Normalization
	print("FIX 1: Unit Normalization")
	print("-" * 30)

	normalized_data = raw_data.copy()

	# Apply unit fixes
	normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000 # Billions to trillions
	normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000 # Millions to billions
	normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100 # Decimal to percentage
	normalized_data['DGS10'] = raw_data['DGS10'] * 100 # Decimal to percentage

	print("After unit normalization:")
	print(normalized_data.tail())
	print()

	# Fix 2: Growth Rate Calculation
	print("FIX 2: Proper Growth Rate Calculation")
	print("-" * 40)

	growth_data = normalized_data.pct_change() * 100
	growth_data = growth_data.dropna()

	print("Growth rates (percent change):")
	print(growth_data.tail())
	print()

	# Fix 3: Safe MAPE Calculation
	print("FIX 3: Safe MAPE Calculation")
	print("-" * 30)

	# Test MAPE with problematic data
	actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
	forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55])

	# Original MAPE (can fail)
	try:
	original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100
	print(f"Original MAPE: {original_mape:.2f}%")
	except:
	print("Original MAPE: ERROR (division by zero)")

	# Fixed MAPE
	denominator = np.maximum(np.abs(actual_problematic), 1e-5)
	fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100
	print(f"Fixed MAPE: {fixed_mape:.2f}%")
	print()

	# Fix 4: Forecast Period Scaling
	print("FIX 4: Forecast Period Scaling")
	print("-" * 35)

	base_periods = 4
	freq_scaling = {'D': 90, 'M': 3, 'Q': 1}

	print("Original forecast_periods = 4")
	print("Scaled by frequency:")
	for freq, scale in freq_scaling.items():
	scaled = base_periods * scale
	print(f" {freq} (daily): {base_periods} -> {scaled} periods")
	print()

	# Fix 5: Correlation Analysis with Normalized Data
	print("FIX 5: Correlation Analysis with Normalized Data")
	print("-" * 50)

	# Original correlation (dominated by scale)
	original_corr = raw_data.corr()
	print("Original correlation (scale-dominated):")
	print(original_corr.round(3))
	print()

	# Fixed correlation (normalized)
	fixed_corr = growth_data.corr()
	print("Fixed correlation (normalized growth rates):")
	print(fixed_corr.round(3))
	print()

	# Fix 6: Data Quality Metrics
	print("FIX 6: Enhanced Data Quality Metrics")
	print("-" * 40)

	# Calculate comprehensive quality metrics
	quality_metrics = {}

	for column in growth_data.columns:
	series = growth_data[column].dropna()

	quality_metrics[column] = {
	'mean': series.mean(),
	'std': series.std(),
	'skewness': series.skew(),
	'kurtosis': series.kurtosis(),
	'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100
	}

	print("Quality metrics for growth rates:")
	for col, metrics in quality_metrics.items():
	print(f" {col}:")
	print(f" Mean: {metrics['mean']:.4f}%")
	print(f" Std: {metrics['std']:.4f}%")
	print(f" Skewness: {metrics['skewness']:.4f}")
	print(f" Kurtosis: {metrics['kurtosis']:.4f}")
	print(f" Missing: {metrics['missing_pct']:.1f}%")
	print()

	# Summary of fixes
	print("=== SUMMARY OF FIXES APPLIED ===")
	print()

	fixes = [
	"1. Unit Normalization:",
	" • GDP: billions → trillions",
	" • Retail Sales: millions → billions",
	" • Interest Rates: decimal → percentage",
	"",
	"2. Growth Rate Calculation:",
	" • Explicit percent change calculation",
	" • Proper interpretation of results",
	"",
	"3. Safe MAPE Calculation:",
	" • Added epsilon to prevent division by zero",
	" • More robust error metrics",
	"",
	"4. Forecast Period Scaling:",
	" • Scale periods by data frequency",
	" • Appropriate horizons for different series",
	"",
	"5. Data Normalization:",
	" • Z-score or growth rate normalization",
	" • Prevents scale bias in correlations",
	"",
	"6. Stationarity Enforcement:",
	" • ADF tests before causality analysis",
	" • Differencing for non-stationary series",
	"",
	"7. Enhanced Error Handling:",
	" • Robust missing data handling",
	" • Graceful failure recovery",
	""
	]

	for fix in fixes:
	print(fix)

	print("=== IMPACT OF FIXES ===")
	print()

	impacts = [
	"• More accurate economic interpretations",
	"• Proper scale comparisons between indicators",
	"• Robust forecasting with appropriate horizons",
	"• Reliable statistical tests and correlations",
	"• Better error handling and data quality",
	"• Consistent frequency alignment",
	"• Safe mathematical operations"
	]

	for impact in impacts:
	print(impact)

	print()
	print("These fixes address all the major math issues identified in the original analysis.")

	if __name__ == "__main__":
	demonstrate_fixes()