yokoha commited on
Commit
8127935
Β·
verified Β·
1 Parent(s): 270d2c5

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +1688 -0
app-backup.py ADDED
@@ -0,0 +1,1688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import plotly.graph_objects as go
6
+ from datetime import date
7
+ from pathlib import Path
8
+ import matplotlib.font_manager as fm
9
+ import matplotlib as mpl
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ # ν•„μš”ν•œ μΆ”κ°€ 라이브러리 λ‘œλ“œ
14
+ try:
15
+ import statsmodels.api as sm
16
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
17
+ from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing, Holt
18
+ from statsmodels.tsa.seasonal import seasonal_decompose
19
+ from sklearn.linear_model import LinearRegression
20
+ from sklearn.metrics import mean_absolute_percentage_error
21
+ except ImportError:
22
+ st.error("ν•„μš”ν•œ λΌμ΄λΈŒλŸ¬λ¦¬κ°€ μ„€μΉ˜λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν„°λ―Έλ„μ—μ„œ λ‹€μŒ λͺ…령을 μ‹€ν–‰ν•˜μ„Έμš”:")
23
+ st.code("pip install statsmodels scikit-learn")
24
+ st.stop()
25
+
26
+ # -------------------------------------------------
27
+ # CONFIG ------------------------------------------
28
+ # -------------------------------------------------
29
+ CSV_PATH = Path("2025-domae.csv")
30
+ MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
31
+ MICRO_START, MICRO_END = "2024-01-01", "2026-12-31"
32
+
33
+
34
+ # ν•œκΈ€ 폰트 μ„€μ •
35
+ font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or
36
+ 'gulim' in f.name.lower() or 'malgun' in f.name.lower() or
37
+ 'nanum' in f.name.lower() or 'batang' in f.name.lower()]
38
+
39
+ if font_list:
40
+ font_name = font_list[0]
41
+ plt.rcParams['font.family'] = font_name
42
+ mpl.rcParams['axes.unicode_minus'] = False
43
+ else:
44
+ plt.rcParams['font.family'] = 'DejaVu Sans'
45
+
46
+ st.set_page_config(page_title="ν’ˆλͺ©λ³„ 가격 예츑", page_icon="πŸ“ˆ", layout="wide")
47
+
48
+ # -------------------------------------------------
49
+ # ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ λ§€ν•‘ ---------------------------
50
+ # -------------------------------------------------
51
+ item_models = {
52
+ "갈치": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.82, "model2": "Holt-Winters", "accuracy2": 99.80},
53
+ "감자": {"model1": "ETS(Multiplicative)", "accuracy1": 99.58, "model2": "SARIMA(1,0,1)(1,0,1,12)", "accuracy2": 98.70},
54
+ "건고좔": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.96, "model2": "Holt", "accuracy2": 99.79},
55
+ "κ±΄λ‹€μ‹œλ§ˆ": {"model1": "Naive", "accuracy1": 99.59, "model2": "SeasonalNaive", "accuracy2": 99.34},
56
+ "고ꡬ마": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.89, "model2": "ETS(Multiplicative)", "accuracy2": 98.91},
57
+ "κ³ λ“±μ–΄": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.48, "model2": "ETS(Additive)", "accuracy2": 99.42},
58
+ "κΉ€": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.99, "model2": "SARIMA(0,1,1)(0,1,1,12)", "accuracy2": 99.93},
59
+ "깐마늘(κ΅­μ‚°)": {"model1": "SeasonalNaive", "accuracy1": 99.79, "model2": "MovingAverage-6 m", "accuracy2": 98.65},
60
+ "깻잎": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.68, "model2": "Holt", "accuracy2": 99.54},
61
+ "녹두": {"model1": "WeightedMA-6 m", "accuracy1": 99.53, "model2": "Fourier + LR", "accuracy2": 99.53},
62
+ "λŠνƒ€λ¦¬λ²„μ„―": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.84, "model2": "LinearTrend", "accuracy2": 99.80},
63
+ "λ‹Ήκ·Ό": {"model1": "Holt", "accuracy1": 99.25, "model2": "ETS(Multiplicative)", "accuracy2": 97.27},
64
+ "λ“€κΉ¨": {"model1": "Holt", "accuracy1": 99.57, "model2": "Holt-Winters", "accuracy2": 99.17},
65
+ "땅콩": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.74, "model2": "ETS(Additive)", "accuracy2": 99.37},
66
+ "레λͺ¬": {"model1": "WeightedMA-6 m", "accuracy1": 99.99, "model2": "LinearTrend", "accuracy2": 98.99},
67
+ "망고": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.38, "model2": "Holt-Winters", "accuracy2": 99.02},
68
+ "λ©”λ°€": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.48, "model2": "SARIMA(0,1,1)(0,1,1,12)", "accuracy2": 98.99},
69
+ "멜둠": {"model1": "Naive", "accuracy1": 99.07, "model2": "ETS(Multiplicative)", "accuracy2": 99.01},
70
+ "λͺ…νƒœ": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 100.00, "model2": "MovingAverage-6 m", "accuracy2": 99.93},
71
+ "무": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.54, "model2": "SeasonalNaive", "accuracy2": 88.29, "special": "accuracy_drop"},
72
+ "λ¬Όμ˜€μ§•μ–΄": {"model1": "Holt-Winters", "accuracy1": 99.91, "model2": "ETS(Multiplicative)", "accuracy2": 99.36},
73
+ "λ―Έλ‚˜λ¦¬": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 98.71, "model2": "LinearTrend", "accuracy2": 98.54},
74
+ "λ°”λ‚˜λ‚˜": {"model1": "MovingAverage-6 m", "accuracy1": 99.81, "model2": "ETS(Multiplicative)", "accuracy2": 98.86},
75
+ "λ°©μšΈν† λ§ˆν† ": {"model1": "ETS(Multiplicative)", "accuracy1": 99.62, "model2": "Holt", "accuracy2": 98.28},
76
+ "λ°°": {"model1": "ETS(Additive)", "accuracy1": 99.34, "model2": "LinearTrend", "accuracy2": 98.57},
77
+ "λ°°μΆ”": {"model1": "Holt", "accuracy1": 99.98, "model2": "MovingAverage-6 m", "accuracy2": 99.71},
78
+ "뢁어": {"model1": "Fourier + LR", "accuracy1": 99.96, "model2": "MovingAverage-6 m", "accuracy2": 99.94},
79
+ "뢉은고좔": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.75, "model2": "LinearTrend", "accuracy2": 97.61},
80
+ "브둜콜리": {"model1": "Holt", "accuracy1": 99.54, "model2": "Naive", "accuracy2": 99.93},
81
+ "사과": {"model1": "Holt-Winters", "accuracy1": 99.89, "model2": "ETS(Multiplicative)", "accuracy2": 98.91},
82
+ "상좔": {"model1": "ETS(Additive)", "accuracy1": 99.11, "model2": "Holt-Winters", "accuracy2": 97.61},
83
+ "μƒˆμ†‘μ΄λ²„μ„―": {"model1": "SimpleExpSmoothing", "accuracy1": 99.95, "model2": "Holt-Winters", "accuracy2": 99.40},
84
+ "μƒˆμš°": {"model1": "ETS(Additive)", "accuracy1": 99.87, "model2": "Naive", "accuracy2": 99.96},
85
+ "생강": {"model1": "Naive", "accuracy1": 99.27, "model2": "ETS(Additive)", "accuracy2": 98.53},
86
+ "μˆ˜λ°•": {"model1": "Naive", "accuracy1": 99.91, "model2": "SARIMA(1,1,1)(1,1,1,12)", "accuracy2": 99.45},
87
+ "μ‹œκΈˆμΉ˜": {"model1": "Holt-Winters", "accuracy1": 99.70, "model2": "SeasonalNaive", "accuracy2": 98.73},
88
+ "μŒ€": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.99, "model2": "Holt-Winters", "accuracy2": 99.88},
89
+ "μ•Œλ°°κΈ°λ°°μΆ”": {"model1": "WeightedMA-6 m", "accuracy1": 98.19, "model2": "SeasonalNaive", "accuracy2": 95.73},
90
+ "μ–‘λ°°μΆ”": {"model1": "Holt-Winters", "accuracy1": 99.05, "model2": "WeightedMA-6 m", "accuracy2": 97.85},
91
+ "μ–‘νŒŒ": {"model1": "ETS(Additive)", "accuracy1": 99.93, "model2": "WeightedMA-6 m", "accuracy2": 99.51},
92
+ "μ–Όκ°ˆμ΄λ°°μΆ”": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.77, "model2": "SeasonalNaive", "accuracy2": 98.55},
93
+ "열무": {"model1": "SeasonalNaive", "accuracy1": 99.96, "model2": "Holt", "accuracy2": 99.50},
94
+ "였이": {"model1": "SeasonalNaive", "accuracy1": 99.82, "model2": "ETS(Additive)", "accuracy2": 98.48},
95
+ "전볡": {"model1": "Holt", "accuracy1": 99.90, "model2": "Fourier + LR", "accuracy2": 99.90},
96
+ "μ°ΈκΉ¨": {"model1": "WeightedMA-6 m", "accuracy1": 100.00, "model2": "LinearTrend", "accuracy2": 86.44, "special": "accuracy_drop"},
97
+ "μ°ΉμŒ€": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.71, "model2": "Naive", "accuracy2": 98.64, "special": "accuracy_drop"},
98
+ "콩": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.98, "model2": "ETS(Additive)", "accuracy2": 99.68},
99
+ "ν† λ§ˆν† ": {"model1": "SeasonalNaive", "accuracy1": 97.31, "model2": "MovingAverage-6 m", "accuracy2": 97.57},
100
+ "파": {"model1": "MovingAverage-6 m", "accuracy1": 99.92, "model2": "Holt-Winters", "accuracy2": 97.77},
101
+ "νŒŒμΈμ• ν”Œ": {"model1": "Naive", "accuracy1": 99.51, "model2": "SARIMA(1,0,1)(1,0,1,12)", "accuracy2": 96.39},
102
+ "νŒŒν”„λ¦¬μΉ΄": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.04, "model2": "WeightedMA-6 m", "accuracy2": 99.36},
103
+ "νŒ₯": {"model1": "ETS(Additive)", "accuracy1": 99.87, "model2": "Holt-Winters", "accuracy2": 75.08, "special": "accuracy_drop"},
104
+ "νŒ½μ΄λ²„μ„―": {"model1": "SeasonalNaive", "accuracy1": 99.84, "model2": "Fourier + LR", "accuracy2": 98.49},
105
+ "ν’‹κ³ μΆ”": {"model1": "Holt-Winters", "accuracy1": 98.95, "model2": "ETS(Multiplicative)", "accuracy2": 98.73},
106
+ "피망": {"model1": "Fourier + LR", "accuracy1": 99.64, "model2": "WeightedMA-6 m", "accuracy2": 98.93},
107
+ "ν˜Έλ°•": {"model1": "ETS(Multiplicative)", "accuracy1": 99.98, "model2": "SeasonalNaive", "accuracy2": 96.61},
108
+ "홍합": {"model1": "Naive", "accuracy1": 99.86, "model2": "SeasonalNaive", "accuracy2": 98.56},
109
+ }
110
+
111
+ # 기타 ν’ˆλͺ©μ— λŒ€ν•œ κΈ°λ³Έ λͺ¨λΈ (λ¦¬μŠ€νŠΈμ— μ—†λŠ” ν’ˆλͺ©)
112
+ default_models = {
113
+ "model1": "SARIMA(1,0,1)(1,0,1,12)",
114
+ "accuracy1": 99.0,
115
+ "model2": "ETS(Multiplicative)",
116
+ "accuracy2": 98.0
117
+ }
118
+
119
+ # -------------------------------------------------
120
+ # UTILITIES ---------------------------------------
121
+ # -------------------------------------------------
122
+ DATE_CANDIDATES = {"date", "ds", "ymd", "λ‚ μ§œ", "prce_reg_mm", "etl_ldg_dt"}
123
+ ITEM_CANDIDATES = {"item", "ν’ˆλͺ©", "code", "category", "pdlt_nm", "spcs_nm"}
124
+ PRICE_CANDIDATES = {"price", "y", "value", "가격", "avrg_prce"}
125
+
126
+ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
127
+ """Standardize column names to date/item/price and deduplicate."""
128
+ col_map = {}
129
+ for c in df.columns:
130
+ lc = c.lower()
131
+ if lc in DATE_CANDIDATES:
132
+ col_map[c] = "date"
133
+ elif lc in PRICE_CANDIDATES:
134
+ col_map[c] = "price"
135
+ elif lc in ITEM_CANDIDATES:
136
+ # first hit as item, second as species
137
+ if "item" not in col_map.values():
138
+ col_map[c] = "item"
139
+ else:
140
+ col_map[c] = "species"
141
+ df = df.rename(columns=col_map)
142
+
143
+ # ── handle duplicated columns after rename ─────────────────────────
144
+ if df.columns.duplicated().any():
145
+ df = df.loc[:, ~df.columns.duplicated()]
146
+
147
+ # ── index datetime to column ──────────────────��────────────────────
148
+ if "date" not in df.columns and df.index.dtype.kind == "M":
149
+ df.reset_index(inplace=True)
150
+ df.rename(columns={df.columns[0]: "date"}, inplace=True)
151
+
152
+ # ── convert YYYYMM string to datetime ──────────────────────────────────────
153
+ if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
154
+ if len(df) > 0:
155
+ # 더 μœ μ—°ν•œ λ‚ μ§œ λ³€ν™˜
156
+ try:
157
+ # μƒ˜ν”Œ 확인 (λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ μ•ˆμ „ν•˜κ²Œ 처리)
158
+ sample = str(df["date"].iloc[0])
159
+
160
+ # YYYYMM ν˜•μ‹ (6자리)
161
+ if sample.isdigit() and len(sample) == 6:
162
+ df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce")
163
+ df["date"] = df["date"] + pd.offsets.MonthEnd(0) # ν•΄λ‹Ή μ›”μ˜ λ§ˆμ§€λ§‰ λ‚ λ‘œ μ„€μ •
164
+
165
+ # YYYYMMDD ν˜•μ‹ (8자리)
166
+ elif sample.isdigit() and len(sample) == 8:
167
+ df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m%d", errors="coerce")
168
+
169
+ # 기타 ν˜•μ‹μ€ μžλ™ 감지
170
+ else:
171
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
172
+ except:
173
+ # μ‹€νŒ¨ μ‹œ 일반 λ³€ν™˜ μ‹œλ„
174
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
175
+
176
+ # ── build item from pdlt_nm + spcs_nm if needed ────────────────────
177
+ if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
178
+ df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
179
+
180
+ # ── merge item + species ───────────────────────────────────────────
181
+ if {"item", "species"}.issubset(df.columns):
182
+ df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
183
+ df.drop(columns=["species"], inplace=True)
184
+
185
+ return df
186
+
187
+ @st.cache_data(show_spinner=False)
188
+ def load_data() -> pd.DataFrame:
189
+ """Load price data from CSV file."""
190
+ try:
191
+ if not CSV_PATH.exists():
192
+ st.error(f"πŸ’Ύ {CSV_PATH} νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
193
+ st.stop()
194
+
195
+ # CSV 파일 직접 λ‘œλ“œ
196
+ df = pd.read_csv(CSV_PATH)
197
+ st.sidebar.success(f"CSV 데이터 λ‘œλ“œ μ™„λ£Œ: {len(df)}개 ν–‰")
198
+
199
+ # 데이터 ν‘œμ€€ν™” μ „ 원본 데이터 ν˜•νƒœ 확인
200
+ st.sidebar.write("원본 데이터 컬럼:", list(df.columns))
201
+
202
+ # ν‘œμ€€ν™” μ „ 상세 둜그
203
+ before_std = len(df)
204
+ df = _standardize_columns(df)
205
+ after_std = len(df)
206
+ if before_std != after_std:
207
+ st.sidebar.warning(f"ν‘œμ€€ν™” 쀑 {before_std - after_std}개 행이 μ œμ™Έλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
208
+
209
+ # ν‘œμ€€ν™” ν›„ 둜그
210
+ st.sidebar.write("ν‘œμ€€ν™” ν›„ 컬럼:", list(df.columns))
211
+
212
+ # ν•„μˆ˜ 컬럼 확인
213
+ missing = {c for c in ["date", "item", "price"] if c not in df.columns}
214
+ if missing:
215
+ st.error(f"ν•„μˆ˜ 컬럼 λˆ„λ½: {', '.join(missing)} β€” 파일 컬럼λͺ…을 ν™•μΈν•˜μ„Έμš”.")
216
+ st.stop()
217
+
218
+ # λ‚ μ§œ 데이터 확인
219
+ st.sidebar.write("λ‚ μ§œ 컬럼 데이터 μƒ˜ν”Œ:", df["date"].head().tolist())
220
+
221
+ # λ‚ μ§œ λ³€ν™˜ μ „ν›„ 데이터 수 확인
222
+ before_date_convert = len(df)
223
+
224
+ # YYYYMM ν˜•μ‹ λ³€ν™˜ (숫자둜 μ €μž₯된 κ²½μš°λ„ 처리)
225
+ try:
226
+ # 데이터 νƒ€μž… 확인
227
+ if pd.api.types.is_integer_dtype(df["date"]):
228
+ # μ •μˆ˜ν˜• YYYYMM을 λ¬Έμžμ—΄λ‘œ λ³€ν™˜ ν›„ 처리
229
+ df["date"] = df["date"].astype(str)
230
+
231
+ # λ¬Έμžμ—΄ ν˜•μ‹ 처리
232
+ if pd.api.types.is_object_dtype(df["date"]):
233
+ # YYYYMM ν˜•μ‹μΈμ§€ 확인 (6자리 숫자)
234
+ if df["date"].str.match(r'^\d{6}$').all():
235
+ # μ—°, μ›” κ΅¬λΆ„ν•΄μ„œ datetime으둜 λ³€ν™˜
236
+ df["year"] = df["date"].str[:4].astype(int)
237
+ df["month"] = df["date"].str[4:6].astype(int)
238
+ df["date"] = pd.to_datetime(dict(year=df["year"], month=df["month"], day=1))
239
+ # μ›”μ˜ λ§ˆμ§€λ§‰ λ‚ λ‘œ μ„€μ •
240
+ df["date"] = df["date"] + pd.offsets.MonthEnd(0)
241
+ # μž„μ‹œ 컬럼 μ‚­μ œ
242
+ df.drop(columns=["year", "month"], inplace=True)
243
+ else:
244
+ # 일반 λ³€ν™˜ μ‹œλ„
245
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
246
+ except Exception as e:
247
+ st.sidebar.warning(f"λ‚ μ§œ λ³€ν™˜ 였λ₯˜: {str(e)}")
248
+ # μ΅œν›„μ˜ λ°©λ²•μœΌλ‘œ λ‹€μ‹œ μ‹œλ„
249
+ try:
250
+ df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce")
251
+ df["date"] = df["date"] + pd.offsets.MonthEnd(0)
252
+ except:
253
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
254
+
255
+ # λ‚ μ§œ λ³€ν™˜ ν›„ 데이터 확인
256
+ st.sidebar.write("λ‚ μ§œ λ³€ν™˜ ν›„ μƒ˜ν”Œ:", df["date"].head().tolist())
257
+ after_date_convert = df.dropna(subset=["date"]).shape[0]
258
+ if before_date_convert != after_date_convert:
259
+ st.sidebar.warning(f"λ‚ μ§œ λ³€ν™˜ 쀑 {before_date_convert - after_date_convert}개 행이 μ œμ™Έλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
260
+
261
+ # 가격 데이터 숫자둜 λ³€ν™˜
262
+ df["price"] = pd.to_numeric(df["price"], errors="coerce")
263
+
264
+ # NA 데이터 처리 μ „ν›„ 수 확인
265
+ before_na_drop = len(df)
266
+ df = df.dropna(subset=["date", "item", "price"])
267
+ after_na_drop = len(df)
268
+ if before_na_drop != after_na_drop:
269
+ st.sidebar.warning(f"NA 제거 쀑 {before_na_drop - after_na_drop}개 행이 μ œμ™Έλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
270
+
271
+ # κ²°κ³Ό μ •λ ¬
272
+ df.sort_values("date", inplace=True)
273
+
274
+ # 데이터 정보 ν‘œμ‹œ
275
+ if len(df) > 0:
276
+ st.sidebar.write(f"μ΅œμ’… 데이터: {len(df)}개 ν–‰")
277
+ # datetime ν˜•μ‹μΈμ§€ 확인
278
+ if pd.api.types.is_datetime64_dtype(df["date"]):
279
+ st.sidebar.write(f"데이터 λ‚ μ§œ λ²”μœ„: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}")
280
+ else:
281
+ st.sidebar.write(f"데이터 λ‚ μ§œ λ²”μœ„: λ‚ μ§œ ν˜•μ‹ λ³€ν™˜ μ‹€νŒ¨. ν˜„μž¬ ν˜•μ‹: {type(df['date'].iloc[0])}")
282
+ st.sidebar.write(f"총 ν’ˆλͺ© 수: {df['item'].nunique()}")
283
+ st.sidebar.write(f"ν’ˆλͺ©λ³„ 평균 데이터 수: {len(df)/df['item'].nunique():.1f}개")
284
+ else:
285
+ st.error("μœ νš¨ν•œ 데이터가 μ—†μŠ΅λ‹ˆλ‹€!")
286
+
287
+ return df
288
+ except Exception as e:
289
+ st.error(f"데이터 λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
290
+ import traceback
291
+ st.code(traceback.format_exc())
292
+ st.stop()
293
+
294
+ @st.cache_data(show_spinner=False)
295
+ def get_items(df: pd.DataFrame):
296
+ return sorted(df["item"].unique())
297
+
298
+ def get_best_model_for_item(item):
299
+ """ν’ˆλͺ©μ— λ§žλŠ” 졜적 λͺ¨λΈ 정보 λ°˜ν™˜"""
300
+ return item_models.get(item, default_models)
301
+
302
+ def format_currency(value):
303
+ """원화 ν˜•μ‹μœΌλ‘œ 숫자 ν¬λ§·νŒ…"""
304
+ if pd.isna(value) or not np.isfinite(value):
305
+ return "N/A"
306
+ return f"{value:,.0f}원"
307
+
308
+ # -------------------------------------------------
309
+ # λͺ¨λΈ κ΅¬ν˜„λΆ€ --------------------------------------
310
+ # -------------------------------------------------
311
+ @st.cache_data(show_spinner=False, ttl=3600)
312
+ def prepare_monthly_data(df):
313
+ """월별 데이터 μ€€λΉ„"""
314
+ # μ›”λ³„λ‘œ 집계
315
+ monthly_df = df.copy()
316
+ monthly_df['year_month'] = monthly_df['date'].dt.strftime('%Y-%m')
317
+ monthly_df = monthly_df.groupby('year_month').agg({'date': 'last', 'price': 'mean'}).reset_index(drop=True)
318
+ monthly_df.sort_values('date', inplace=True)
319
+
320
+ # 인덱슀 μ„€μ •
321
+ monthly_df.set_index('date', inplace=True)
322
+
323
+ # 결츑치 보간 (월별 데이터에 빈 월이 μžˆμ„ 수 있음)
324
+ if len(monthly_df) > 1:
325
+ monthly_df = monthly_df.asfreq('M', method='ffill')
326
+
327
+ return monthly_df
328
+
329
+ def fit_sarima(df, order, seasonal_order, horizon_end):
330
+ """SARIMA λͺ¨λΈ κ΅¬ν˜„"""
331
+ import pandas as pd
332
+ import numpy as np
333
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
334
+
335
+ # 월별 데이터 μ€€λΉ„
336
+ monthly_df = prepare_monthly_data(df)
337
+
338
+ # λͺ¨λΈ ν•™μŠ΅
339
+ try:
340
+ model = SARIMAX(
341
+ monthly_df['price'],
342
+ order=order,
343
+ seasonal_order=seasonal_order,
344
+ enforce_stationarity=False,
345
+ enforce_invertibility=False
346
+ )
347
+ results = model.fit(disp=False)
348
+
349
+ # 예츑 κΈ°κ°„ 계산
350
+ last_date = monthly_df.index[-1]
351
+ end_date = pd.Timestamp(horizon_end)
352
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
353
+
354
+ # 예츑 μˆ˜ν–‰
355
+ forecast = results.get_forecast(steps=periods)
356
+ pred_mean = forecast.predicted_mean
357
+ pred_ci = forecast.conf_int()
358
+
359
+ # Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
360
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
361
+
362
+ fc_df = pd.DataFrame({
363
+ 'ds': future_dates,
364
+ 'yhat': pred_mean.values,
365
+ 'yhat_lower': pred_ci.iloc[:, 0].values,
366
+ 'yhat_upper': pred_ci.iloc[:, 1].values
367
+ })
368
+
369
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜ (λ‚ μ§œ, 가격)
370
+ fc_df_monthly = pd.DataFrame({
371
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
372
+ })
373
+
374
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°οΏ½οΏ½ μΆ”κ°€
375
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
376
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
377
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
378
+
379
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
380
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
381
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
382
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
383
+
384
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
385
+ fc_df_monthly['yearly'] = 0
386
+ fc_df_monthly['trend'] = 0
387
+
388
+ try:
389
+ # κ°€λŠ₯ν•˜λ©΄ κ³„μ ˆμ„± λΆ„ν•΄
390
+ decomposition = seasonal_decompose(monthly_df['price'], model='multiplicative', period=12)
391
+ trend = decomposition.trend
392
+ seasonal = decomposition.seasonal
393
+
394
+ # 결과에 κ³„μ ˆμ„± 반영
395
+ for i, date in enumerate(fc_df_monthly['ds']):
396
+ month = date.month
397
+ if month in seasonal.index.month:
398
+ seasonal_value = seasonal[seasonal.index.month == month].mean()
399
+ fc_df_monthly.loc[i, 'yearly'] = seasonal_value
400
+ except:
401
+ pass
402
+
403
+ return fc_df_monthly
404
+
405
+ except Exception as e:
406
+ st.error(f"SARIMA λͺ¨λΈ 였λ₯˜: {str(e)}")
407
+ return None
408
+
409
+ def fit_ets(df, seasonal_type, horizon_end):
410
+ """ETS λͺ¨λΈ κ΅¬ν˜„"""
411
+ # 월별 데이터 μ€€λΉ„
412
+ monthly_df = prepare_monthly_data(df)
413
+
414
+ # λͺ¨λΈ νŒŒλΌλ―Έν„° μ„€μ •
415
+ if seasonal_type == 'multiplicative':
416
+ trend_type = 'add'
417
+ seasonal = 'mul'
418
+ else: # additive
419
+ trend_type = 'add'
420
+ seasonal = 'add'
421
+
422
+ # λͺ¨λΈ ν•™μŠ΅
423
+ try:
424
+ model = ExponentialSmoothing(
425
+ monthly_df['price'],
426
+ trend=trend_type,
427
+ seasonal=seasonal,
428
+ seasonal_periods=12,
429
+ damped=True
430
+ )
431
+ results = model.fit(optimized=True)
432
+
433
+ # 예츑 κΈ°κ°„ 계산
434
+ last_date = monthly_df.index[-1]
435
+ end_date = pd.Timestamp(horizon_end)
436
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
437
+
438
+ # 예츑 μˆ˜ν–‰
439
+ forecast = results.forecast(periods)
440
+
441
+ # Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
442
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
443
+
444
+ # μ‹ λ’° ꡬ간 μΆ”μ • (ETSλŠ” κΈ°λ³Έ μ‹ λ’° ꡬ간을 μ œκ³΅ν•˜μ§€ μ•ŠμŒ)
445
+ std_error = np.std(results.resid)
446
+ lower_bound = forecast - 1.96 * std_error
447
+ upper_bound = forecast + 1.96 * std_error
448
+
449
+ fc_df = pd.DataFrame({
450
+ 'ds': future_dates,
451
+ 'yhat': forecast.values,
452
+ 'yhat_lower': lower_bound.values,
453
+ 'yhat_upper': upper_bound.values
454
+ })
455
+
456
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
457
+ fc_df_monthly = pd.DataFrame({
458
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
459
+ })
460
+
461
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
462
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
463
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
464
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
465
+
466
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
467
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
468
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
469
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
470
+
471
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
472
+ fc_df_monthly['yearly'] = 0
473
+ fc_df_monthly['trend'] = 0
474
+
475
+ try:
476
+ # κ°€λŠ₯ν•˜λ©΄ κ³„μ ˆμ„± λΆ„ν•΄
477
+ decomposition = seasonal_decompose(monthly_df['price'], model=seasonal_type, period=12)
478
+ trend = decomposition.trend
479
+ seasonal = decomposition.seasonal
480
+
481
+ # 결과에 κ³„μ ˆμ„± 반영
482
+ for i, date in enumerate(fc_df_monthly['ds']):
483
+ month = date.month
484
+ if month in seasonal.index.month:
485
+ seasonal_value = seasonal[seasonal.index.month == month].mean()
486
+ fc_df_monthly.loc[i, 'yearly'] = seasonal_value
487
+ except:
488
+ pass
489
+
490
+ return fc_df_monthly
491
+
492
+ except Exception as e:
493
+ st.error(f"ETS λͺ¨λΈ 였λ₯˜: {str(e)}")
494
+ return None
495
+
496
+ def fit_holt(df, horizon_end):
497
+ """Holt λͺ¨λΈ κ΅¬ν˜„"""
498
+ # 월별 데이터 μ€€λΉ„
499
+ monthly_df = prepare_monthly_data(df)
500
+
501
+ # λͺ¨λΈ ν•™μŠ΅
502
+ try:
503
+ model = Holt(monthly_df['price'], damped=True)
504
+ results = model.fit(optimized=True)
505
+
506
+ # 예츑 κΈ°κ°„ 계산
507
+ last_date = monthly_df.index[-1]
508
+ end_date = pd.Timestamp(horizon_end)
509
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
510
+
511
+ # 예츑 μˆ˜ν–‰
512
+ forecast = results.forecast(periods)
513
+
514
+ # Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
515
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
516
+
517
+ # μ‹ λ’° ꡬ간 μΆ”μ •
518
+ std_error = np.std(results.resid)
519
+ lower_bound = forecast - 1.96 * std_error
520
+ upper_bound = forecast + 1.96 * std_error
521
+
522
+ fc_df = pd.DataFrame({
523
+ 'ds': future_dates,
524
+ 'yhat': forecast.values,
525
+ 'yhat_lower': lower_bound.values,
526
+ 'yhat_upper': upper_bound.values
527
+ })
528
+
529
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
530
+ fc_df_monthly = pd.DataFrame({
531
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
532
+ })
533
+
534
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
535
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
536
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
537
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
538
+
539
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
540
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
541
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
542
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
543
+
544
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
545
+ fc_df_monthly['yearly'] = 0
546
+ fc_df_monthly['trend'] = fc_df_monthly['yhat'] # HoltλŠ” μΆ”μ„Έλ§Œ λͺ¨λΈλ§
547
+
548
+ return fc_df_monthly
549
+
550
+ except Exception as e:
551
+ st.error(f"Holt λͺ¨λΈ 였λ₯˜: {str(e)}")
552
+ return None
553
+
554
+ def fit_holt_winters(df, horizon_end):
555
+ """Holt-Winters λͺ¨λΈ κ΅¬ν˜„"""
556
+ # 월별 데이터 μ€€λΉ„
557
+ monthly_df = prepare_monthly_data(df)
558
+
559
+ # λͺ¨λΈ ν•™μŠ΅
560
+ try:
561
+ model = ExponentialSmoothing(
562
+ monthly_df['price'],
563
+ trend='add',
564
+ seasonal='mul', # κ³„μ ˆμ„±μ€ κ³±μ…ˆ 방식이 농산물 가격에 더 적합
565
+ seasonal_periods=12,
566
+ damped=True
567
+ )
568
+ results = model.fit(optimized=True)
569
+
570
+ # 예츑 κΈ°κ°„ 계산
571
+ last_date = monthly_df.index[-1]
572
+ end_date = pd.Timestamp(horizon_end)
573
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
574
+
575
+ # 예츑 μˆ˜ν–‰
576
+ forecast = results.forecast(periods)
577
+
578
+ # Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
579
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
580
+
581
+ # μ‹ λ’° ꡬ간 μΆ”μ •
582
+ std_error = np.std(results.resid)
583
+ lower_bound = forecast - 1.96 * std_error
584
+ upper_bound = forecast + 1.96 * std_error
585
+
586
+ fc_df = pd.DataFrame({
587
+ 'ds': future_dates,
588
+ 'yhat': forecast.values,
589
+ 'yhat_lower': lower_bound.values,
590
+ 'yhat_upper': upper_bound.values
591
+ })
592
+
593
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
594
+ fc_df_monthly = pd.DataFrame({
595
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
596
+ })
597
+
598
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
599
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
600
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
601
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
602
+
603
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
604
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
605
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
606
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
607
+
608
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
609
+ fc_df_monthly['yearly'] = 0
610
+ fc_df_monthly['trend'] = 0
611
+
612
+ try:
613
+ # Holt-Winters λͺ¨λΈμ—μ„œ κ³„μ ˆμ„± μΆ”μΆœ
614
+ seasonal = results.seasonal_
615
+
616
+ # 결과에 κ³„μ ˆμ„± 반영
617
+ for i, date in enumerate(fc_df_monthly['ds']):
618
+ month = date.month - 1 # 0-indexed
619
+ if month < len(seasonal):
620
+ fc_df_monthly.loc[i, 'yearly'] = seasonal[month] * fc_df_monthly.loc[i, 'yhat']
621
+ fc_df_monthly.loc[i, 'trend'] = fc_df_monthly.loc[i, 'yhat'] - fc_df_monthly.loc[i, 'yearly']
622
+ except:
623
+ pass
624
+
625
+ return fc_df_monthly
626
+
627
+ except Exception as e:
628
+ st.error(f"Holt-Winters λͺ¨λΈ 였λ₯˜: {str(e)}")
629
+ return None
630
+
631
+ def fit_moving_average(df, window, horizon_end):
632
+ """이동 평균 λͺ¨λΈ κ΅¬ν˜„"""
633
+ # 월별 데이터 μ€€λΉ„
634
+ monthly_df = prepare_monthly_data(df)
635
+
636
+ try:
637
+ # λ§ˆμ§€λ§‰ window κ°œμ›”μ˜ 평균 계산
638
+ last_values = monthly_df['price'].iloc[-window:]
639
+ ma_value = last_values.mean()
640
+
641
+ # 예츑 κΈ°κ°„ 계산
642
+ last_date = monthly_df.index[-1]
643
+ end_date = pd.Timestamp(horizon_end)
644
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
645
+
646
+ # 예츑 μˆ˜ν–‰ (λͺ¨λ“  미래 μ‹œμ μ— λ™μΌν•œ κ°’)
647
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
648
+
649
+ # μ‹ λ’° ꡬ간 μΆ”μ •
650
+ std_error = last_values.std()
651
+ lower_bound = ma_value - 1.96 * std_error
652
+ upper_bound = ma_value + 1.96 * std_error
653
+
654
+ fc_df = pd.DataFrame({
655
+ 'ds': future_dates,
656
+ 'yhat': [ma_value] * len(future_dates),
657
+ 'yhat_lower': [lower_bound] * len(future_dates),
658
+ 'yhat_upper': [upper_bound] * len(future_dates)
659
+ })
660
+
661
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
662
+ fc_df_monthly = pd.DataFrame({
663
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
664
+ })
665
+
666
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
667
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
668
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
669
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
670
+
671
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
672
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
673
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
674
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
675
+
676
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
677
+ fc_df_monthly['yearly'] = 0
678
+ fc_df_monthly['trend'] = fc_df_monthly['yhat']
679
+
680
+ return fc_df_monthly
681
+
682
+ except Exception as e:
683
+ st.error(f"이동 평균 λͺ¨λΈ 였λ₯˜: {str(e)}")
684
+ return None
685
+
686
+ def fit_weighted_ma(df, window, horizon_end):
687
+ """가쀑 이동 평균 λͺ¨λΈ κ΅¬ν˜„"""
688
+ # 월별 데이터 μ€€λΉ„
689
+ monthly_df = prepare_monthly_data(df)
690
+
691
+ try:
692
+ # λ§ˆμ§€λ§‰ window κ°œμ›”μ˜ 가쀑 평균 계산
693
+ last_values = monthly_df['price'].iloc[-window:].to_numpy()
694
+
695
+ # κ°€μ€‘μΉ˜ 생성 (졜근 데이터에 더 높은 κ°€μ€‘μΉ˜)
696
+ weights = np.arange(1, window + 1)
697
+ weights = weights / np.sum(weights)
698
+
699
+ wma_value = np.sum(last_values * weights)
700
+
701
+ # 예츑 κΈ°κ°„ 계산
702
+ last_date = monthly_df.index[-1]
703
+ end_date = pd.Timestamp(horizon_end)
704
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
705
+
706
+ # 예츑 μˆ˜ν–‰ (λͺ¨λ“  미래 μ‹œμ μ— λ™μΌν•œ κ°’)
707
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
708
+
709
+ # μ‹ λ’° ꡬ간 μΆ”μ •
710
+ std_error = np.std(last_values)
711
+ lower_bound = wma_value - 1.96 * std_error
712
+ upper_bound = wma_value + 1.96 * std_error
713
+
714
+ fc_df = pd.DataFrame({
715
+ 'ds': future_dates,
716
+ 'yhat': [wma_value] * len(future_dates),
717
+ 'yhat_lower': [lower_bound] * len(future_dates),
718
+ 'yhat_upper': [upper_bound] * len(future_dates)
719
+ })
720
+
721
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
722
+ fc_df_monthly = pd.DataFrame({
723
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
724
+ })
725
+
726
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
727
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
728
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
729
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
730
+
731
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
732
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
733
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
734
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
735
+
736
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
737
+ fc_df_monthly['yearly'] = 0
738
+ fc_df_monthly['trend'] = fc_df_monthly['yhat']
739
+
740
+ return fc_df_monthly
741
+
742
+ except Exception as e:
743
+ st.error(f"가쀑 이동 평균 λͺ¨λΈ 였λ₯˜: {str(e)}")
744
+ return None
745
+
746
+ def fit_naive(df, horizon_end):
747
+ """λ‹¨μˆœ Naive λͺ¨λΈ κ΅¬ν˜„"""
748
+ # 월별 데이터 μ€€λΉ„
749
+ monthly_df = prepare_monthly_data(df)
750
+
751
+ try:
752
+ # λ§ˆμ§€λ§‰ κ°’ μ‚¬μš©
753
+ last_value = monthly_df['price'].iloc[-1]
754
+
755
+ # 예츑 κΈ°κ°„ 계산
756
+ last_date = monthly_df.index[-1]
757
+ end_date = pd.Timestamp(horizon_end)
758
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
759
+
760
+ # 예츑 μˆ˜ν–‰ (λͺ¨λ“  미래 μ‹œμ μ— λ§ˆμ§€λ§‰ κ°’)
761
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
762
+
763
+ # μ‹ λ’° ꡬ간 μΆ”μ • (κ³Όκ±° 12κ°œμ›” ν‘œμ€€νŽΈμ°¨ μ‚¬μš©)
764
+ history_std = monthly_df['price'].iloc[-12:].std() if len(monthly_df) >= 12 else monthly_df['price'].std()
765
+ lower_bound = last_value - 1.96 * history_std
766
+ upper_bound = last_value + 1.96 * history_std
767
+
768
+ fc_df = pd.DataFrame({
769
+ 'ds': future_dates,
770
+ 'yhat': [last_value] * len(future_dates),
771
+ 'yhat_lower': [lower_bound] * len(future_dates),
772
+ 'yhat_upper': [upper_bound] * len(future_dates)
773
+ })
774
+
775
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
776
+ fc_df_monthly = pd.DataFrame({
777
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
778
+ })
779
+
780
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
781
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
782
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
783
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
784
+
785
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
786
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
787
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
788
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
789
+
790
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
791
+ fc_df_monthly['yearly'] = 0
792
+ fc_df_monthly['trend'] = fc_df_monthly['yhat']
793
+
794
+ return fc_df_monthly
795
+
796
+ except Exception as e:
797
+ st.error(f"Naive λͺ¨λΈ 였λ₯˜: {str(e)}")
798
+ return None
799
+
800
+ def fit_seasonal_naive(df, horizon_end):
801
+ """κ³„μ ˆμ„± Naive λͺ¨λΈ κ΅¬ν˜„"""
802
+ # 월별 데이터 μ€€λΉ„
803
+ monthly_df = prepare_monthly_data(df)
804
+
805
+ try:
806
+ # 예츑 κΈ°κ°„ 계산
807
+ last_date = monthly_df.index[-1]
808
+ end_date = pd.Timestamp(horizon_end)
809
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
810
+
811
+ # 예츑 μˆ˜ν–‰ (각 월에 λŒ€ν•΄ μž‘λ…„ 같은 달 가격 μ‚¬μš©)
812
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
813
+ future_values = []
814
+ lower_bounds = []
815
+ upper_bounds = []
816
+
817
+ for date in future_dates:
818
+ # 같은 μ›”μ˜ κ°’ μ°ΎκΈ°
819
+ same_month_values = monthly_df[monthly_df.index.month == date.month]['price']
820
+
821
+ if len(same_month_values) > 0:
822
+ # 같은 μ›” κ°€μž₯ 졜근 κ°’ μ‚¬μš©
823
+ forecast_value = same_month_values.iloc[-1]
824
+
825
+ # μ‹ λ’° ꡬ간
826
+ std_error = same_month_values.std() if len(same_month_values) > 1 else monthly_df['price'].std()
827
+ lower_bound = forecast_value - 1.96 * std_error
828
+ upper_bound = forecast_value + 1.96 * std_error
829
+ else:
830
+ # 같은 μ›” 데이터 μ—†μœΌλ©΄ 전체 평균 μ‚¬μš©
831
+ forecast_value = monthly_df['price'].mean()
832
+ std_error = monthly_df['price'].std()
833
+ lower_bound = forecast_value - 1.96 * std_error
834
+ upper_bound = forecast_value + 1.96 * std_error
835
+
836
+ future_values.append(forecast_value)
837
+ lower_bounds.append(lower_bound)
838
+ upper_bounds.append(upper_bound)
839
+
840
+ fc_df = pd.DataFrame({
841
+ 'ds': future_dates,
842
+ 'yhat': future_values,
843
+ 'yhat_lower': lower_bounds,
844
+ 'yhat_upper': upper_bounds
845
+ })
846
+
847
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
848
+ fc_df_monthly = pd.DataFrame({
849
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
850
+ })
851
+
852
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
853
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
854
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
855
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
856
+
857
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
858
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
859
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
860
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
861
+
862
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
863
+ fc_df_monthly['yearly'] = fc_df_monthly['yhat']
864
+ fc_df_monthly['trend'] = 0
865
+
866
+ return fc_df_monthly
867
+
868
+ except Exception as e:
869
+ st.error(f"Seasonal Naive λͺ¨λΈ 였λ₯˜: {str(e)}")
870
+ return None
871
+
872
+ def fit_fourier_lr(df, horizon_end):
873
+ """Fourier + μ„ ν˜• νšŒκ·€ λͺ¨λΈ κ΅¬ν˜„"""
874
+ from sklearn.linear_model import LinearRegression
875
+
876
+ # 월별 데이터 μ€€λΉ„
877
+ monthly_df = prepare_monthly_data(df)
878
+
879
+ try:
880
+ # μ‹œκ°„ λ³€μˆ˜ 생성
881
+ y = monthly_df['price'].values
882
+ t = np.arange(len(y))
883
+
884
+ # Fourier νŠΉμ„± 생성 (μ—°κ°„ κ³„μ ˆμ„±)
885
+ p = 12 # μ£ΌκΈ° (1λ…„)
886
+ X = np.column_stack([
887
+ t, # μ„ ν˜• μΆ”μ„Έ
888
+ np.sin(2 * np.pi * t / p),
889
+ np.cos(2 * np.pi * t / p),
890
+ np.sin(4 * np.pi * t / p),
891
+ np.cos(4 * np.pi * t / p)
892
+ ])
893
+
894
+ # λͺ¨λΈ ν•™μŠ΅
895
+ model = LinearRegression()
896
+ model.fit(X, y)
897
+
898
+ # 예츑 κΈ°κ°„ 계산
899
+ last_date = monthly_df.index[-1]
900
+ end_date = pd.Timestamp(horizon_end)
901
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
902
+
903
+ # 예츑 μˆ˜ν–‰
904
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
905
+
906
+ # 미래 μ‹œμ  νŠΉμ„± 생성
907
+ t_future = np.arange(len(y), len(y) + periods)
908
+ X_future = np.column_stack([
909
+ t_future,
910
+ np.sin(2 * np.pi * t_future / p),
911
+ np.cos(2 * np.pi * t_future / p),
912
+ np.sin(4 * np.pi * t_future / p),
913
+ np.cos(4 * np.pi * t_future / p)
914
+ ])
915
+
916
+ # 예츑
917
+ forecast = model.predict(X_future)
918
+
919
+ # μ‹ λ’° ꡬ간 μΆ”μ •
920
+ y_pred = model.predict(X)
921
+ mse = np.mean((y - y_pred) ** 2)
922
+ std_error = np.sqrt(mse)
923
+
924
+ lower_bound = forecast - 1.96 * std_error
925
+ upper_bound = forecast + 1.96 * std_error
926
+
927
+ fc_df = pd.DataFrame({
928
+ 'ds': future_dates,
929
+ 'yhat': forecast,
930
+ 'yhat_lower': lower_bound,
931
+ 'yhat_upper': upper_bound
932
+ })
933
+
934
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
935
+ fc_df_monthly = pd.DataFrame({
936
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
937
+ })
938
+
939
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
940
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
941
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
942
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
943
+
944
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
945
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
946
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
947
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
948
+
949
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
950
+ fc_df_monthly['trend'] = model.coef_[0] * np.arange(len(fc_df_monthly)) + model.intercept_
951
+
952
+ # κ³„μ ˆμ„± 계산
953
+ season_features = np.column_stack([
954
+ np.sin(2 * np.pi * np.arange(len(fc_df_monthly)) / p),
955
+ np.cos(2 * np.pi * np.arange(len(fc_df_monthly)) / p),
956
+ np.sin(4 * np.pi * np.arange(len(fc_df_monthly)) / p),
957
+ np.cos(4 * np.pi * np.arange(len(fc_df_monthly)) / p)
958
+ ])
959
+
960
+ seasonal_effect = np.dot(season_features, model.coef_[1:5])
961
+ fc_df_monthly['yearly'] = seasonal_effect
962
+
963
+ return fc_df_monthly
964
+
965
+ except Exception as e:
966
+ st.error(f"Fourier + LR λͺ¨λΈ 였λ₯˜: {str(e)}")
967
+ return None
968
+
969
+ def fit_linear_trend(df, horizon_end):
970
+ """μ„ ν˜• μΆ”μ„Έ λͺ¨λΈ κ΅¬ν˜„"""
971
+ from sklearn.linear_model import LinearRegression
972
+
973
+ # 월별 데이터 μ€€λΉ„
974
+ monthly_df = prepare_monthly_data(df)
975
+
976
+ try:
977
+ # μ‹œκ°„ λ³€μˆ˜ 생성
978
+ y = monthly_df['price'].values
979
+ t = np.arange(len(y)).reshape(-1, 1)
980
+
981
+ # λͺ¨λΈ ν•™μŠ΅
982
+ model = LinearRegression()
983
+ model.fit(t, y)
984
+
985
+ # 예츑 κΈ°κ°„ 계산
986
+ last_date = monthly_df.index[-1]
987
+ end_date = pd.Timestamp(horizon_end)
988
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
989
+
990
+ # 예츑 μˆ˜ν–‰
991
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
992
+ t_future = np.arange(len(y), len(y) + periods).reshape(-1, 1)
993
+ forecast = model.predict(t_future)
994
+
995
+ # μ‹ λ’° ꡬ간 μΆ”μ •
996
+ y_pred = model.predict(t)
997
+ mse = np.mean((y - y_pred) ** 2)
998
+ std_error = np.sqrt(mse)
999
+
1000
+ lower_bound = forecast - 1.96 * std_error
1001
+ upper_bound = forecast + 1.96 * std_error
1002
+
1003
+ fc_df = pd.DataFrame({
1004
+ 'ds': future_dates,
1005
+ 'yhat': forecast,
1006
+ 'yhat_lower': lower_bound,
1007
+ 'yhat_upper': upper_bound
1008
+ })
1009
+
1010
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
1011
+ fc_df_monthly = pd.DataFrame({
1012
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
1013
+ })
1014
+
1015
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
1016
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
1017
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
1018
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
1019
+
1020
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
1021
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
1022
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
1023
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
1024
+
1025
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
1026
+ fc_df_monthly['yearly'] = 0
1027
+ fc_df_monthly['trend'] = fc_df_monthly['yhat']
1028
+
1029
+ return fc_df_monthly
1030
+
1031
+ except Exception as e:
1032
+ st.error(f"Linear Trend λͺ¨λΈ 였λ₯˜: {str(e)}")
1033
+ return None
1034
+
1035
+ def fit_simple_exp_smoothing(df, horizon_end):
1036
+ """λ‹¨μˆœ μ§€μˆ˜ ν‰ν™œ λͺ¨λΈ κ΅¬ν˜„"""
1037
+ # 월별 데이터 μ€€λΉ„
1038
+ monthly_df = prepare_monthly_data(df)
1039
+
1040
+ try:
1041
+ # λͺ¨λΈ ν•™μŠ΅
1042
+ model = SimpleExpSmoothing(monthly_df['price'])
1043
+ results = model.fit(optimized=True)
1044
+
1045
+ # 예츑 κΈ°κ°„ 계산
1046
+ last_date = monthly_df.index[-1]
1047
+ end_date = pd.Timestamp(horizon_end)
1048
+ periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
1049
+
1050
+ # 예츑 μˆ˜ν–‰
1051
+ forecast = results.forecast(periods)
1052
+
1053
+ # μ‹ λ’° ꡬ간 μΆ”μ •
1054
+ std_error = np.std(results.resid)
1055
+ lower_bound = forecast - 1.96 * std_error
1056
+ upper_bound = forecast + 1.96 * std_error
1057
+
1058
+ # Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
1059
+ future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
1060
+
1061
+ fc_df = pd.DataFrame({
1062
+ 'ds': future_dates,
1063
+ 'yhat': forecast.values,
1064
+ 'yhat_lower': lower_bound.values,
1065
+ 'yhat_upper': upper_bound.values
1066
+ })
1067
+
1068
+ # μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
1069
+ fc_df_monthly = pd.DataFrame({
1070
+ 'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
1071
+ })
1072
+
1073
+ # ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
1074
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
1075
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
1076
+ fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
1077
+
1078
+ # 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
1079
+ fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
1080
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
1081
+ fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
1082
+
1083
+ # yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
1084
+ fc_df_monthly['yearly'] = 0
1085
+ fc_df_monthly['trend'] = fc_df_monthly['yhat']
1086
+
1087
+ return fc_df_monthly
1088
+
1089
+ except Exception as e:
1090
+ st.error(f"Simple Exponential Smoothing λͺ¨λΈ 였λ₯˜: {str(e)}")
1091
+ return None
1092
+
1093
+ @st.cache_data(show_spinner=False, ttl=3600)
1094
+ def fit_optimal_model(df, item_name, horizon_end, model_type="primary"):
1095
+ """ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ 적용"""
1096
+ # 데이터 μ€€λΉ„ 및 정리
1097
+ df = df.copy()
1098
+ df = df.dropna(subset=["date", "price"])
1099
+
1100
+ # ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ 선택
1101
+ model_info = get_best_model_for_item(item_name)
1102
+
1103
+ if model_type == "primary":
1104
+ model_name = model_info["model1"]
1105
+ accuracy = model_info["accuracy1"]
1106
+ else: # backup
1107
+ model_name = model_info["model2"]
1108
+ accuracy = model_info["accuracy2"]
1109
+
1110
+ st.info(f"{item_name}에 μ΅œμ ν™”λœ {model_name} λͺ¨λΈ 적용 (정확도: {accuracy}%)")
1111
+
1112
+ # 특수 μ²˜λ¦¬κ°€ ν•„μš”ν•œ ν’ˆλͺ© 확인
1113
+ needs_monitoring = "special" in model_info and model_info["special"] == "accuracy_drop"
1114
+ if needs_monitoring:
1115
+ st.warning(f"⚠️ {item_name}λŠ” νŠΉμ • 월에 정확도가 급락할 수 μžˆλŠ” ν’ˆλͺ©μž…λ‹ˆλ‹€. 예츑 κ²°κ³Όλ₯Ό 주의 깊게 μ‚΄νŽ΄λ³΄μ„Έμš”.")
1116
+
1117
+ # λͺ¨λΈ 선택 및 ν•™μŠ΅
1118
+ if "SARIMA(1,0,1)(1,0,1,12)" in model_name:
1119
+ return fit_sarima(df, order=(1,0,1), seasonal_order=(1,0,1,12), horizon_end=horizon_end)
1120
+ elif "SARIMA(1,1,1)(1,1,1,12)" in model_name:
1121
+ return fit_sarima(df, order=(1,1,1), seasonal_order=(1,1,1,12), horizon_end=horizon_end)
1122
+ elif "SARIMA(0,1,1)(0,1,1,12)" in model_name:
1123
+ return fit_sarima(df, order=(0,1,1), seasonal_order=(0,1,1,12), horizon_end=horizon_end)
1124
+ elif "ETS(Multiplicative)" in model_name:
1125
+ return fit_ets(df, seasonal_type="multiplicative", horizon_end=horizon_end)
1126
+ elif "ETS(Additive)" in model_name:
1127
+ return fit_ets(df, seasonal_type="additive", horizon_end=horizon_end)
1128
+ elif "Holt-Winters" in model_name:
1129
+ return fit_holt_winters(df, horizon_end=horizon_end)
1130
+ elif "Holt" in model_name:
1131
+ return fit_holt(df, horizon_end=horizon_end)
1132
+ elif "MovingAverage-6 m" in model_name:
1133
+ return fit_moving_average(df, window=6, horizon_end=horizon_end)
1134
+ elif "WeightedMA-6 m" in model_name:
1135
+ return fit_weighted_ma(df, window=6, horizon_end=horizon_end)
1136
+ elif "Naive" in model_name and "Seasonal" not in model_name:
1137
+ return fit_naive(df, horizon_end=horizon_end)
1138
+ elif "SeasonalNaive" in model_name:
1139
+ return fit_seasonal_naive(df, horizon_end=horizon_end)
1140
+ elif "Fourier + LR" in model_name:
1141
+ return fit_fourier_lr(df, horizon_end=horizon_end)
1142
+ elif "LinearTrend" in model_name:
1143
+ return fit_linear_trend(df, horizon_end=horizon_end)
1144
+ elif "SimpleExpSmoothing" in model_name:
1145
+ return fit_simple_exp_smoothing(df, horizon_end=horizon_end)
1146
+ else:
1147
+ st.warning(f"μ•Œ 수 μ—†λŠ” λͺ¨λΈ: {model_name}. κΈ°λ³Έ λͺ¨λΈ(SARIMA)을 μ‚¬μš©ν•©λ‹ˆλ‹€.")
1148
+ return fit_sarima(df, order=(1,0,1), seasonal_order=(1,0,1,12), horizon_end=horizon_end)
1149
+
1150
+ def fit_ensemble_model(df, item_name, horizon_end):
1151
+ """1μœ„μ™€ 2μœ„ λͺ¨λΈμ˜ 앙상블 μˆ˜ν–‰"""
1152
+ # 1μœ„ λͺ¨λΈ 예츑
1153
+ fc1 = fit_optimal_model(df, item_name, horizon_end, model_type="primary")
1154
+
1155
+ # 2μœ„ λͺ¨λΈ 예츑
1156
+ fc2 = fit_optimal_model(df, item_name, horizon_end, model_type="backup")
1157
+
1158
+ # 두 λͺ¨λΈ λͺ¨λ‘ μ„±κ³΅ν•œ 경우만 앙상블
1159
+ if fc1 is not None and fc2 is not None:
1160
+ # 앙상블 κ°€μ€‘μΉ˜ 계산 (정확도 기반)
1161
+ model_info = get_best_model_for_item(item_name)
1162
+ acc1 = model_info["accuracy1"]
1163
+ acc2 = model_info["accuracy2"]
1164
+
1165
+ # 정확도 차이가 0.2%p 이내인 경우 앙상블 μˆ˜ν–‰
1166
+ accuracy_diff = abs(acc1 - acc2)
1167
+
1168
+ if accuracy_diff <= 0.2:
1169
+ st.success(f"두 λͺ¨λΈμ˜ 정확도 차이가 {accuracy_diff:.2f}%p둜 μž‘μ•„ 앙상블을 μˆ˜ν–‰ν•©λ‹ˆλ‹€.")
1170
+
1171
+ # 정확도 기반 κ°€μ€‘μΉ˜ 계산
1172
+ total_acc = acc1 + acc2
1173
+ w1 = acc1 / total_acc
1174
+ w2 = acc2 / total_acc
1175
+
1176
+ # 앙상블 κ²°κ³Ό 생성
1177
+ fc_ensemble = fc1.copy()
1178
+ fc_ensemble['yhat'] = w1 * fc1['yhat'] + w2 * fc2['yhat']
1179
+ fc_ensemble['yhat_lower'] = w1 * fc1['yhat_lower'] + w2 * fc2['yhat_lower']
1180
+ fc_ensemble['yhat_upper'] = w1 * fc1['yhat_upper'] + w2 * fc2['yhat_upper']
1181
+
1182
+ return fc_ensemble
1183
+ else:
1184
+ st.info(f"정확도 차이가 {accuracy_diff:.2f}%p둜 μ»€μ„œ 1μœ„ λͺ¨λΈλ§Œ μ‚¬μš©ν•©λ‹ˆλ‹€.")
1185
+ return fc1
1186
+
1187
+ # ν•˜λ‚˜λΌλ„ μ‹€νŒ¨ν•œ 경우 μ„±κ³΅ν•œ λͺ¨λΈ λ°˜ν™˜
1188
+ return fc1 if fc1 is not None else fc2
1189
+
1190
+ # -------------------------------------------------
1191
+ # MAIN APP ---------------------------------------
1192
+ # -------------------------------------------------
1193
+ # 데이터 λ‘œλ“œ
1194
+ raw_df = load_data()
1195
+
1196
+ if len(raw_df) == 0:
1197
+ st.error("데이터가 λΉ„μ–΄ μžˆμŠ΅λ‹ˆλ‹€. νŒŒμΌμ„ ν™•μΈν•΄μ£Όμ„Έμš”.")
1198
+ st.stop()
1199
+
1200
+ st.sidebar.header("πŸ” ν’ˆλͺ© 선택")
1201
+ selected_item = st.sidebar.selectbox("ν’ˆλͺ©", get_items(raw_df))
1202
+ current_date = date.today()
1203
+ st.sidebar.caption(f"였늘: {current_date}")
1204
+
1205
+ # μ„ νƒλœ ν’ˆλͺ©μ˜ 졜적 λͺ¨λΈ 정보 ν‘œμ‹œ
1206
+ model_info = get_best_model_for_item(selected_item)
1207
+ st.sidebar.subheader("ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ")
1208
+ st.sidebar.markdown(f"**1μœ„ λͺ¨λΈ:** {model_info['model1']} (정확도: {model_info['accuracy1']}%)")
1209
+ st.sidebar.markdown(f"**2μœ„ λͺ¨λΈ:** {model_info['model2']} (정확도: {model_info['accuracy2']}%)")
1210
+
1211
+ # 데이터 필터링
1212
+ item_df = raw_df.query("item == @selected_item").copy()
1213
+ if item_df.empty:
1214
+ st.error("μ„ νƒν•œ ν’ˆλͺ© 데이터 μ—†μŒ")
1215
+ st.stop()
1216
+
1217
+ # 데이터 수 검사
1218
+ if len(item_df) < 2:
1219
+ st.warning(f"μ„ νƒν•œ ν’ˆλͺ© '{selected_item}' 데이터가 λ„ˆλ¬΄ μ μŠ΅λ‹ˆλ‹€ (데이터 수: {len(item_df)}). 예츑이 λΆ€μ •ν™•ν•  수 μžˆμŠ΅λ‹ˆλ‹€.")
1220
+ else:
1221
+ st.success(f"μ„ νƒν•œ ν’ˆλͺ© '{selected_item}'에 λŒ€ν•΄ {len(item_df)}개의 데이터가 μžˆμŠ΅λ‹ˆλ‹€.")
1222
+
1223
+ # -------------------------------------------------
1224
+ # MACRO FORECAST 1996‑2030 ------------------------
1225
+ # -------------------------------------------------
1226
+ # -------------------------------------------------
1227
+ # MACRO FORECAST 1996‑2030 ------------------------
1228
+ # -------------------------------------------------
1229
+ st.header(f"πŸ“ˆ {selected_item} 가격 예츑 λŒ€μ‹œλ³΄λ“œ")
1230
+
1231
+ # 데이터 필터링 둜직
1232
+ try:
1233
+ macro_start_dt = pd.Timestamp("1996-01-01")
1234
+ # λ°μ΄ν„°μ˜ μ‹œμž‘μΌμ΄ 1996λ…„ 이후인지 확인
1235
+ if item_df["date"].min() > macro_start_dt:
1236
+ macro_start_dt = item_df["date"].min()
1237
+
1238
+ macro_df = item_df[item_df["date"] >= macro_start_dt].copy()
1239
+ except Exception as e:
1240
+ st.error(f"λ‚ μ§œ 필터링 였λ₯˜: {str(e)}")
1241
+ macro_df = item_df.copy() # 필터링 없이 전체 데이터 μ‚¬μš©
1242
+
1243
+ # Add diagnostic info
1244
+ with st.expander("데이터 진단"):
1245
+ st.write(f"- 전체 데이터 수: {len(item_df)}")
1246
+ st.write(f"- 뢄석 데이터 수: {len(macro_df)}")
1247
+ if len(macro_df) > 0:
1248
+ st.write(f"- κΈ°κ°„: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}")
1249
+ st.dataframe(macro_df.head())
1250
+ else:
1251
+ st.write("데이터가 μ—†μŠ΅λ‹ˆλ‹€.")
1252
+
1253
+ if len(macro_df) < 2:
1254
+ st.warning(f"{selected_item}에 λŒ€ν•œ 데이터가 μΆ©λΆ„ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. 전체 κΈ°κ°„ 데이터λ₯Ό ν‘œμ‹œν•©λ‹ˆλ‹€.")
1255
+ fig = go.Figure()
1256
+ fig.add_trace(go.Scatter(x=item_df["date"], y=item_df["price"], mode="lines", name="μ‹€μ œ 가격"))
1257
+ fig.update_layout(title=f"{selected_item} κ³Όκ±° 가격")
1258
+ st.plotly_chart(fig, use_container_width=True)
1259
+ else:
1260
+ try:
1261
+ # 데이터 μΆ©λΆ„ν•œ 경우 ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ μ‚¬μš©
1262
+ use_ensemble = st.checkbox("앙상블 λͺ¨λΈ μ‚¬μš© (1μœ„ + 2μœ„ λͺ¨λΈ κ²°ν•©)", value=False)
1263
+
1264
+ with st.spinner("μž₯κΈ° 예츑 λͺ¨λΈ 생성 쀑..."):
1265
+ if use_ensemble:
1266
+ fc_macro = fit_ensemble_model(macro_df, selected_item, MACRO_END)
1267
+ else:
1268
+ fc_macro = fit_optimal_model(macro_df, selected_item, MACRO_END)
1269
+
1270
+ if fc_macro is not None:
1271
+ # μ‹€μ œ 데이터와 예츑 데이터 ꡬ뢄
1272
+ cutoff_date = pd.Timestamp("2025-01-01")
1273
+
1274
+ # ν”Œλ‘― 생성
1275
+ fig = go.Figure()
1276
+
1277
+ # μ‹€μ œ 데이터 μΆ”κ°€ (1996-2024)
1278
+ historical_data = macro_df[macro_df["date"] < cutoff_date].copy()
1279
+ if not historical_data.empty:
1280
+ fig.add_trace(go.Scatter(
1281
+ x=historical_data["date"],
1282
+ y=historical_data["price"],
1283
+ mode="lines",
1284
+ name="μ‹€μ œ 가격 (1996-2024)",
1285
+ line=dict(color="blue", width=2)
1286
+ ))
1287
+
1288
+ # 예츑 κΈ°κ°„ 자λ₯΄κΈ°
1289
+ forecast_data = fc_macro[fc_macro["ds"] >= cutoff_date].copy()
1290
+
1291
+ # 2025-2030 예츑 데이터
1292
+ if not forecast_data.empty:
1293
+ fig.add_trace(go.Scatter(
1294
+ x=forecast_data["ds"],
1295
+ y=forecast_data["yhat"],
1296
+ mode="lines",
1297
+ name="예츑 가격 (2025-2030)",
1298
+ line=dict(color="red", width=2, dash="dash")
1299
+ ))
1300
+
1301
+ # μ‹ λ’° ꡬ간 μΆ”κ°€
1302
+ fig.add_trace(go.Scatter(
1303
+ x=forecast_data["ds"],
1304
+ y=forecast_data["yhat_upper"],
1305
+ mode="lines",
1306
+ line=dict(width=0),
1307
+ showlegend=False
1308
+ ))
1309
+ fig.add_trace(go.Scatter(
1310
+ x=forecast_data["ds"],
1311
+ y=forecast_data["yhat_lower"],
1312
+ mode="lines",
1313
+ line=dict(width=0),
1314
+ fill="tonexty",
1315
+ fillcolor="rgba(255, 0, 0, 0.1)",
1316
+ name="95% μ‹ λ’° ꡬ간"
1317
+ ))
1318
+
1319
+ # 음수 μ˜ˆμΈ‘κ°’ 제거
1320
+ fig.update_yaxes(range=[0, None])
1321
+
1322
+ # λ ˆμ΄μ•„μ›ƒ μ„€μ •
1323
+ fig.update_layout(
1324
+ title=f"{selected_item} μž₯κΈ° 가격 예츑 (1996-2030)",
1325
+ xaxis_title="연도",
1326
+ yaxis_title="가격 (원)",
1327
+ legend=dict(
1328
+ orientation="h",
1329
+ yanchor="bottom",
1330
+ y=1.02,
1331
+ xanchor="right",
1332
+ x=1
1333
+ )
1334
+ )
1335
+
1336
+ # 차트 ν‘œμ‹œ
1337
+ st.plotly_chart(fig, use_container_width=True)
1338
+
1339
+ # 연도별 μ˜ˆμΈ‘κ°€ ν‘œμ‹œ
1340
+ try:
1341
+ latest_price = macro_df.iloc[-1]["price"]
1342
+
1343
+ # 연도별 μ˜ˆμΈ‘κ°€ 계산을 μœ„ν•œ ν•¨μˆ˜
1344
+ def get_yearly_prediction(year_end):
1345
+ target_date = pd.Timestamp(f"{year_end}-12-31")
1346
+ # λ‚ μ§œ 기반으둜 κ°€μž₯ κ°€κΉŒμš΄ λ‚ μ§œμ˜ μ˜ˆμΈ‘κ°’ μ°ΎκΈ°
1347
+ date_diffs = abs(fc_macro["ds"] - target_date)
1348
+ closest_idx = date_diffs.idxmin()
1349
+ pred_value = fc_macro.loc[closest_idx, "yhat"]
1350
+ pct_change = (pred_value - latest_price) / latest_price * 100
1351
+ return pred_value, pct_change
1352
+
1353
+ # 연도별 μ˜ˆμΈ‘κ°€ ν‘œμ‹œ
1354
+ col1, col2, col3 = st.columns(3)
1355
+
1356
+ # 2025λ…„ μ˜ˆμΈ‘κ°€
1357
+ pred_2025, pct_2025 = get_yearly_prediction(2025)
1358
+ col1.metric("2025λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2025), f"{pct_2025:+.1f}%")
1359
+
1360
+ # 2027λ…„ μ˜ˆμΈ‘κ°€
1361
+ pred_2027, pct_2027 = get_yearly_prediction(2027)
1362
+ col2.metric("2027λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2027), f"{pct_2027:+.1f}%")
1363
+
1364
+ # 2030λ…„ μ˜ˆμΈ‘κ°€
1365
+ pred_2030, pct_2030 = get_yearly_prediction(2030)
1366
+ col3.metric("2030λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2030), f"{pct_2030:+.1f}%")
1367
+
1368
+ # μΆ”κ°€ 연도 μ˜ˆμΈ‘κ°€ (ν™•μž₯ κ°€λŠ₯)
1369
+ with st.expander("더 λ§Žμ€ 연도별 μ˜ˆμΈ‘κ°€ 보기"):
1370
+ col4, col5, col6 = st.columns(3)
1371
+
1372
+ # 2026λ…„ μ˜ˆμΈ‘κ°€
1373
+ pred_2026, pct_2026 = get_yearly_prediction(2026)
1374
+ col4.metric("2026λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2026), f"{pct_2026:+.1f}%")
1375
+
1376
+ # 2028λ…„ μ˜ˆμΈ‘κ°€
1377
+ pred_2028, pct_2028 = get_yearly_prediction(2028)
1378
+ col5.metric("2028λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2028), f"{pct_2028:+.1f}%")
1379
+
1380
+ # 2029λ…„ μ˜ˆμΈ‘κ°€
1381
+ pred_2029, pct_2029 = get_yearly_prediction(2029)
1382
+ col6.metric("2029λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2029), f"{pct_2029:+.1f}%")
1383
+
1384
+ except Exception as e:
1385
+ st.error(f"μ˜ˆμΈ‘κ°€ 계산 였λ₯˜: {str(e)}")
1386
+ else:
1387
+ st.warning("예츑 λͺ¨λΈμ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
1388
+ fig = go.Figure()
1389
+ fig.add_trace(go.Scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ‹€μ œ 가격"))
1390
+ fig.update_layout(title=f"{selected_item} κ³Όκ±° 가격")
1391
+ st.plotly_chart(fig, use_container_width=True)
1392
+ except Exception as e:
1393
+ st.error(f"μž₯κΈ° 예츑 였λ₯˜ λ°œμƒ: {str(e)}")
1394
+ import traceback
1395
+ st.code(traceback.format_exc())
1396
+ fig = go.Figure()
1397
+ fig.add_trace(go.Scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ‹€μ œ 가격"))
1398
+ fig.update_layout(title=f"{selected_item} κ³Όκ±° 가격")
1399
+ st.plotly_chart(fig, use_container_width=True)
1400
+
1401
+ # -------------------------------------------------
1402
+ # MICRO FORECAST 2024‑2026 ------------------------
1403
+ # -------------------------------------------------
1404
+ # -------------------------------------------------
1405
+ # MICRO FORECAST 2024‑2026 ------------------------
1406
+ # -------------------------------------------------
1407
+ st.subheader("πŸ”Ž 2024–2026 단기 예츑 (월별)")
1408
+
1409
+ # 데이터 필터링 - 졜근 3λ…„ 데이터 ν™œμš©
1410
+ try:
1411
+ three_years_ago = pd.Timestamp("2021-01-01")
1412
+ if item_df["date"].min() > three_years_ago:
1413
+ three_years_ago = item_df["date"].min()
1414
+
1415
+ micro_df = item_df[item_df["date"] >= three_years_ago].copy()
1416
+ except Exception as e:
1417
+ st.error(f"단기 예츑 데이터 필터링 였λ₯˜: {str(e)}")
1418
+ # 졜근 데이터 μ‚¬μš©
1419
+ micro_df = item_df.sort_values("date").tail(24).copy()
1420
+
1421
+ if len(micro_df) < 2:
1422
+ st.warning(f"졜근 데이터가 μΆ©λΆ„ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
1423
+ fig = go.Figure()
1424
+ fig.add_trace(go.Scatter(x=item_df["date"], y=item_df["price"], mode="lines", name="μ‹€μ œ 가격"))
1425
+ fig.update_layout(title=f"{selected_item} 졜근 가격")
1426
+ st.plotly_chart(fig, use_container_width=True)
1427
+ else:
1428
+ try:
1429
+ with st.spinner("단기 예츑 λͺ¨λΈ 생성 쀑..."):
1430
+ if use_ensemble:
1431
+ fc_micro = fit_ensemble_model(micro_df, selected_item, MICRO_END)
1432
+ else:
1433
+ fc_micro = fit_optimal_model(micro_df, selected_item, MICRO_END)
1434
+
1435
+ if fc_micro is not None:
1436
+ # 2024-01-01λΆ€ν„° 2026-12-31κΉŒμ§€ 필터링
1437
+ start_date = pd.Timestamp("2024-01-01")
1438
+ end_date = pd.Timestamp("2026-12-31")
1439
+
1440
+ # 월별 데이터 μ€€λΉ„
1441
+ monthly_historical = micro_df.copy()
1442
+ monthly_historical["year_month"] = monthly_historical["date"].dt.strftime("%Y-%m")
1443
+ monthly_historical = monthly_historical.groupby("year_month").agg({
1444
+ "date": "first",
1445
+ "price": "mean"
1446
+ }).reset_index(drop=True)
1447
+
1448
+ monthly_historical = monthly_historical[
1449
+ (monthly_historical["date"] >= start_date) &
1450
+ (monthly_historical["date"] <= end_date)
1451
+ ]
1452
+
1453
+ monthly_forecast = fc_micro[
1454
+ (fc_micro["ds"] >= start_date) &
1455
+ (fc_micro["ds"] <= end_date)
1456
+ ].copy()
1457
+
1458
+ # 월별 차트 생성
1459
+ fig = go.Figure()
1460
+
1461
+ # 2024λ…„ μ‹€μ œ 데이터
1462
+ actual_2024 = monthly_historical[
1463
+ (monthly_historical["date"] >= pd.Timestamp("2024-01-01")) &
1464
+ (monthly_historical["date"] <= pd.Timestamp("2024-12-31"))
1465
+ ]
1466
+
1467
+ if not actual_2024.empty:
1468
+ fig.add_trace(go.Scatter(
1469
+ x=actual_2024["date"],
1470
+ y=actual_2024["price"],
1471
+ mode="lines+markers",
1472
+ name="2024 μ‹€μ œ 가격",
1473
+ line=dict(color="blue", width=2),
1474
+ marker=dict(size=8)
1475
+ ))
1476
+
1477
+ # 2024λ…„ 이후 예츑 데이터
1478
+ cutoff = pd.Timestamp("2024-12-31")
1479
+ future_data = monthly_forecast[monthly_forecast["ds"] > cutoff]
1480
+
1481
+ if not future_data.empty:
1482
+ fig.add_trace(go.Scatter(
1483
+ x=future_data["ds"],
1484
+ y=future_data["yhat"],
1485
+ mode="lines+markers",
1486
+ name="2025-2026 예츑 가격",
1487
+ line=dict(color="red", width=2, dash="dash"),
1488
+ marker=dict(size=8)
1489
+ ))
1490
+
1491
+ # μ‹ λ’° ꡬ간 μΆ”κ°€
1492
+ fig.add_trace(go.Scatter(
1493
+ x=future_data["ds"],
1494
+ y=future_data["yhat_upper"],
1495
+ mode="lines",
1496
+ line=dict(width=0),
1497
+ showlegend=False
1498
+ ))
1499
+ fig.add_trace(go.Scatter(
1500
+ x=future_data["ds"],
1501
+ y=future_data["yhat_lower"],
1502
+ mode="lines",
1503
+ line=dict(width=0),
1504
+ fill="tonexty",
1505
+ fillcolor="rgba(255, 0, 0, 0.1)",
1506
+ name="95% μ‹ λ’° ꡬ간"
1507
+ ))
1508
+
1509
+ # 음수 μ˜ˆμΈ‘κ°’ 제거
1510
+ fig.update_yaxes(range=[0, None])
1511
+
1512
+ # λ ˆμ΄μ•„μ›ƒ μ„€μ •
1513
+ fig.update_layout(
1514
+ title=f"{selected_item} 월별 단기 예츑 (2024-2026)",
1515
+ xaxis_title="μ›”",
1516
+ yaxis_title="가격 (원)",
1517
+ xaxis=dict(
1518
+ tickformat="%Y-%m",
1519
+ dtick="M3", # 3κ°œμ›” 간격
1520
+ tickangle=45
1521
+ ),
1522
+ legend=dict(
1523
+ orientation="h",
1524
+ yanchor="bottom",
1525
+ y=1.02,
1526
+ xanchor="right",
1527
+ x=1
1528
+ )
1529
+ )
1530
+
1531
+ # 차트 ν‘œμ‹œ
1532
+ st.plotly_chart(fig, use_container_width=True)
1533
+
1534
+ # 월별 예츑 가격 ν‘œμ‹œ (2025-2026)
1535
+ with st.expander("월별 예츑 가격 상세보기"):
1536
+ monthly_detail = monthly_forecast[monthly_forecast["ds"] > cutoff].copy()
1537
+ monthly_detail["λ‚ μ§œ"] = monthly_detail["ds"].dt.strftime("%Yλ…„ %mμ›”")
1538
+ monthly_detail["μ˜ˆμΈ‘κ°€κ²©"] = monthly_detail["yhat"].apply(format_currency)
1539
+ monthly_detail["ν•˜ν•œκ°’"] = monthly_detail["yhat_lower"].apply(format_currency)
1540
+ monthly_detail["μƒν•œκ°’"] = monthly_detail["yhat_upper"].apply(format_currency)
1541
+
1542
+ st.dataframe(
1543
+ monthly_detail[["λ‚ μ§œ", "μ˜ˆμΈ‘κ°€κ²©", "ν•˜ν•œκ°’", "μƒν•œκ°’"]],
1544
+ hide_index=True
1545
+ )
1546
+
1547
+ # 월별/연도별 μ˜ˆμΈ‘κ°€ ν‘œμ‹œ ν•¨μˆ˜
1548
+ def get_monthly_prediction(year, month):
1549
+ target_date = pd.Timestamp(f"{year}-{month:02d}-01")
1550
+ # κ°€μž₯ κ°€κΉŒμš΄ λ‚ μ§œμ˜ μ˜ˆμΈ‘κ°’ μ°ΎκΈ°
1551
+ date_diffs = abs(monthly_forecast["ds"] - target_date)
1552
+ closest_idx = date_diffs.idxmin()
1553
+
1554
+ if closest_idx in monthly_forecast.index:
1555
+ pred_value = monthly_forecast.loc[closest_idx, "yhat"]
1556
+
1557
+ # ν˜„μž¬ 가격 κΈ°μ€€ λ³€ν™”μœ¨ 계산
1558
+ latest_price = monthly_historical.iloc[-1]["price"] if not monthly_historical.empty else micro_df.iloc[-1]["price"]
1559
+ pct_change = (pred_value - latest_price) / latest_price * 100
1560
+
1561
+ return pred_value, pct_change
1562
+ else:
1563
+ return None, None
1564
+
1565
+ # 2025λ…„κ³Ό 2026λ…„μ˜ μ£Όμš” 월별 μ˜ˆμΈ‘κ°€
1566
+ st.subheader("μ£Όμš” 월별 μ˜ˆμΈ‘κ°€")
1567
+
1568
+ col1, col2, col3 = st.columns(3)
1569
+
1570
+ # 2025λ…„ 6μ›” μ˜ˆμΈ‘κ°€
1571
+ pred_2025_06, pct_2025_06 = get_monthly_prediction(2025, 6)
1572
+ if pred_2025_06 is not None:
1573
+ col1.metric("2025λ…„ 6μ›”", format_currency(pred_2025_06), f"{pct_2025_06:+.1f}%")
1574
+ else:
1575
+ col1.metric("2025λ…„ 6μ›”", "데이터 μ—†μŒ", "0%")
1576
+
1577
+ # 2025λ…„ 12μ›” μ˜ˆμΈ‘κ°€
1578
+ pred_2025_12, pct_2025_12 = get_monthly_prediction(2025, 12)
1579
+ if pred_2025_12 is not None:
1580
+ col2.metric("2025λ…„ 12μ›”", format_currency(pred_2025_12), f"{pct_2025_12:+.1f}%")
1581
+ else:
1582
+ col2.metric("2025λ…„ 12μ›”", "데이터 μ—†μŒ", "0%")
1583
+
1584
+ # 2026λ…„ 12μ›” μ˜ˆμΈ‘κ°€
1585
+ pred_2026_12, pct_2026_12 = get_monthly_prediction(2026, 12)
1586
+ if pred_2026_12 is not None:
1587
+ col3.metric("2026λ…„ 12μ›”", format_currency(pred_2026_12), f"{pct_2026_12:+.1f}%")
1588
+ else:
1589
+ col3.metric("2026λ…„ 12μ›”", "데이터 μ—†μŒ", "0%")
1590
+
1591
+ # 농산물 κ³„μ ˆμ„±μ— λ§žλŠ” μΆ”κ°€ 월별 데이터 ν‘œμ‹œ
1592
+ with st.expander("더 λ§Žμ€ 월별 μ˜ˆμΈ‘κ°€ 보기"):
1593
+ # λΆ„κΈ°λ³„λ‘œ λ‚˜λˆ μ„œ ν‘œμ‹œ
1594
+ for year in [2025, 2026]:
1595
+ st.write(f"### {year}λ…„ 뢄기별 μ˜ˆμΈ‘κ°€")
1596
+ q1, q2, q3, q4 = st.columns(4)
1597
+
1598
+ # 1λΆ„κΈ° (3μ›”)
1599
+ pred_q1, pct_q1 = get_monthly_prediction(year, 3)
1600
+ if pred_q1 is not None:
1601
+ q1.metric(f"{year}λ…„ 3μ›”", format_currency(pred_q1), f"{pct_q1:+.1f}%")
1602
+ else:
1603
+ q1.metric(f"{year}λ…„ 3μ›”", "데이터 μ—†μŒ", "0%")
1604
+
1605
+ # 2λΆ„κΈ° (6μ›”)
1606
+ pred_q2, pct_q2 = get_monthly_prediction(year, 6)
1607
+ if pred_q2 is not None:
1608
+ q2.metric(f"{year}λ…„ 6μ›”", format_currency(pred_q2), f"{pct_q2:+.1f}%")
1609
+ else:
1610
+ q2.metric(f"{year}λ…„ 6μ›”", "데이터 μ—†μŒ", "0%")
1611
+
1612
+ # 3λΆ„κΈ° (9μ›”)
1613
+ pred_q3, pct_q3 = get_monthly_prediction(year, 9)
1614
+ if pred_q3 is not None:
1615
+ q3.metric(f"{year}λ…„ 9μ›”", format_currency(pred_q3), f"{pct_q3:+.1f}%")
1616
+ else:
1617
+ q3.metric(f"{year}λ…„ 9μ›”", "데이터 μ—†μŒ", "0%")
1618
+
1619
+ # 4λΆ„κΈ° (12μ›”)
1620
+ pred_q4, pct_q4 = get_monthly_prediction(year, 12)
1621
+ if pred_q4 is not None:
1622
+ q4.metric(f"{year}λ…„ 12μ›”", format_currency(pred_q4), f"{pct_q4:+.1f}%")
1623
+ else:
1624
+ q4.metric(f"{year}λ…„ 12μ›”", "데이터 μ—†μŒ", "0%")
1625
+
1626
+ else:
1627
+ st.warning("단기 예츑 λͺ¨λΈμ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
1628
+ except Exception as e:
1629
+ st.error(f"단기 예츑 였λ₯˜: {str(e)}")
1630
+ st.code(traceback.format_exc())
1631
+
1632
+ # -------------------------------------------------
1633
+ # SEASONALITY & PATTERN ---------------------------
1634
+ # -------------------------------------------------
1635
+ if 'fc_micro' in locals() and fc_micro is not None:
1636
+ with st.expander("πŸ“† μ‹œμ¦ˆλ„λ¦¬ν‹° & νŒ¨ν„΄ μ„€λͺ…"):
1637
+ try:
1638
+ # 월별 κ³„μ ˆμ„± 뢄석
1639
+ if "yearly" in fc_micro.columns and fc_micro["yearly"].sum() != 0:
1640
+ month_season = fc_micro.copy()
1641
+ month_season["month"] = month_season["ds"].dt.month
1642
+ month_seasonality = month_season.groupby("month")["yearly"].mean()
1643
+
1644
+ # μ›” 이름 μ„€μ •
1645
+ month_names = ["1μ›”", "2μ›”", "3μ›”", "4μ›”", "5μ›”", "6μ›”", "7μ›”", "8μ›”", "9μ›”", "10μ›”", "11μ›”", "12μ›”"]
1646
+
1647
+ # κ³„μ ˆμ„± 차트 그리기
1648
+ fig = go.Figure()
1649
+ fig.add_trace(go.Bar(
1650
+ x=month_names,
1651
+ y=month_seasonality.values,
1652
+ marker_color=['blue' if x >= 0 else 'red' for x in month_seasonality.values]
1653
+ ))
1654
+
1655
+ fig.update_layout(
1656
+ title=f"{selected_item} 월별 κ³„μ ˆμ„± νŒ¨ν„΄",
1657
+ xaxis_title="μ›”",
1658
+ yaxis_title="μƒλŒ€μ  가격 변동",
1659
+ )
1660
+
1661
+ st.plotly_chart(fig, use_container_width=True)
1662
+
1663
+ # 피크와 저점 계산
1664
+ peak_month = month_seasonality.idxmax()
1665
+ low_month = month_seasonality.idxmin()
1666
+ seasonality_range = month_seasonality.max() - month_seasonality.min()
1667
+
1668
+ st.markdown(
1669
+ f"**μ—°κ°„ 피크 μ›”:** {month_names[peak_month-1]} \n"
1670
+ f"**μ—°κ°„ 저점 μ›”:** {month_names[low_month-1]} \n"
1671
+ f"**μ—°κ°„ 변동폭:** {seasonality_range:.1f}")
1672
+
1673
+ # κ³„μ ˆμ„±μ΄ 높은 ν’ˆλͺ©μΈμ§€ μ„€λͺ…
1674
+ if abs(seasonality_range) > 30:
1675
+ st.info(f"{selected_item}은(λŠ”) κ³„μ ˆμ„±μ΄ 맀우 κ°•ν•œ ν’ˆλͺ©μž…λ‹ˆλ‹€. νŠΉμ • 달에 가격이 크게 변동할 수 μžˆμŠ΅λ‹ˆλ‹€.")
1676
+ elif abs(seasonality_range) > 10:
1677
+ st.info(f"{selected_item}은(λŠ”) κ³„μ ˆμ„±μ΄ 쀑간 정도인 οΏ½οΏ½οΏ½λͺ©μž…λ‹ˆλ‹€.")
1678
+ else:
1679
+ st.info(f"{selected_item}은(λŠ”) κ³„μ ˆμ„±μ΄ μ•½ν•œ ν’ˆλͺ©μž…λ‹ˆλ‹€. 연쀑 가격이 비ꡐ적 μ•ˆμ •μ μž…λ‹ˆλ‹€.")
1680
+ except Exception as e:
1681
+ st.error(f"κ³„μ ˆμ„± 뢄석 였λ₯˜: {str(e)}")
1682
+ st.info("이 ν’ˆλͺ©μ— λŒ€ν•œ κ³„μ ˆμ„± νŒ¨ν„΄μ„ 뢄석할 수 μ—†μŠ΅λ‹ˆλ‹€.")
1683
+
1684
+ # -------------------------------------------------
1685
+ # FOOTER ------------------------------------------
1686
+ # -------------------------------------------------
1687
+ st.markdown("---")
1688
+ st.caption("Β© 2025 ν’ˆλͺ©λ³„ 가격 예츑 μ‹œμŠ€ν…œ | 데이터 뢄석 μžλ™ν™”")