NH-Prediction / app-backup.py
yokoha's picture
Create app-backup.py
8127935 verified
raw
history blame
75.7 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import date
from pathlib import Path
import matplotlib.font_manager as fm
import matplotlib as mpl
import warnings
warnings.filterwarnings('ignore')
# ν•„μš”ν•œ μΆ”κ°€ 라이브러리 λ‘œλ“œ
try:
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing, Holt
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error
except ImportError:
st.error("ν•„μš”ν•œ λΌμ΄λΈŒλŸ¬λ¦¬κ°€ μ„€μΉ˜λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν„°λ―Έλ„μ—μ„œ λ‹€μŒ λͺ…령을 μ‹€ν–‰ν•˜μ„Έμš”:")
st.code("pip install statsmodels scikit-learn")
st.stop()
# -------------------------------------------------
# CONFIG ------------------------------------------
# -------------------------------------------------
CSV_PATH = Path("2025-domae.csv")
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
MICRO_START, MICRO_END = "2024-01-01", "2026-12-31"
# ν•œκΈ€ 폰트 μ„€μ •
font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or
'gulim' in f.name.lower() or 'malgun' in f.name.lower() or
'nanum' in f.name.lower() or 'batang' in f.name.lower()]
if font_list:
font_name = font_list[0]
plt.rcParams['font.family'] = font_name
mpl.rcParams['axes.unicode_minus'] = False
else:
plt.rcParams['font.family'] = 'DejaVu Sans'
st.set_page_config(page_title="ν’ˆλͺ©λ³„ 가격 예츑", page_icon="πŸ“ˆ", layout="wide")
# -------------------------------------------------
# ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ λ§€ν•‘ ---------------------------
# -------------------------------------------------
item_models = {
"갈치": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.82, "model2": "Holt-Winters", "accuracy2": 99.80},
"감자": {"model1": "ETS(Multiplicative)", "accuracy1": 99.58, "model2": "SARIMA(1,0,1)(1,0,1,12)", "accuracy2": 98.70},
"건고좔": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.96, "model2": "Holt", "accuracy2": 99.79},
"κ±΄λ‹€μ‹œλ§ˆ": {"model1": "Naive", "accuracy1": 99.59, "model2": "SeasonalNaive", "accuracy2": 99.34},
"고ꡬ마": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.89, "model2": "ETS(Multiplicative)", "accuracy2": 98.91},
"κ³ λ“±μ–΄": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.48, "model2": "ETS(Additive)", "accuracy2": 99.42},
"κΉ€": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.99, "model2": "SARIMA(0,1,1)(0,1,1,12)", "accuracy2": 99.93},
"깐마늘(κ΅­μ‚°)": {"model1": "SeasonalNaive", "accuracy1": 99.79, "model2": "MovingAverage-6 m", "accuracy2": 98.65},
"깻잎": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.68, "model2": "Holt", "accuracy2": 99.54},
"녹두": {"model1": "WeightedMA-6 m", "accuracy1": 99.53, "model2": "Fourier + LR", "accuracy2": 99.53},
"λŠνƒ€λ¦¬λ²„μ„―": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.84, "model2": "LinearTrend", "accuracy2": 99.80},
"λ‹Ήκ·Ό": {"model1": "Holt", "accuracy1": 99.25, "model2": "ETS(Multiplicative)", "accuracy2": 97.27},
"λ“€κΉ¨": {"model1": "Holt", "accuracy1": 99.57, "model2": "Holt-Winters", "accuracy2": 99.17},
"땅콩": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.74, "model2": "ETS(Additive)", "accuracy2": 99.37},
"레λͺ¬": {"model1": "WeightedMA-6 m", "accuracy1": 99.99, "model2": "LinearTrend", "accuracy2": 98.99},
"망고": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.38, "model2": "Holt-Winters", "accuracy2": 99.02},
"λ©”λ°€": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.48, "model2": "SARIMA(0,1,1)(0,1,1,12)", "accuracy2": 98.99},
"멜둠": {"model1": "Naive", "accuracy1": 99.07, "model2": "ETS(Multiplicative)", "accuracy2": 99.01},
"λͺ…νƒœ": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 100.00, "model2": "MovingAverage-6 m", "accuracy2": 99.93},
"무": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.54, "model2": "SeasonalNaive", "accuracy2": 88.29, "special": "accuracy_drop"},
"λ¬Όμ˜€μ§•μ–΄": {"model1": "Holt-Winters", "accuracy1": 99.91, "model2": "ETS(Multiplicative)", "accuracy2": 99.36},
"λ―Έλ‚˜λ¦¬": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 98.71, "model2": "LinearTrend", "accuracy2": 98.54},
"λ°”λ‚˜λ‚˜": {"model1": "MovingAverage-6 m", "accuracy1": 99.81, "model2": "ETS(Multiplicative)", "accuracy2": 98.86},
"λ°©μšΈν† λ§ˆν† ": {"model1": "ETS(Multiplicative)", "accuracy1": 99.62, "model2": "Holt", "accuracy2": 98.28},
"λ°°": {"model1": "ETS(Additive)", "accuracy1": 99.34, "model2": "LinearTrend", "accuracy2": 98.57},
"λ°°μΆ”": {"model1": "Holt", "accuracy1": 99.98, "model2": "MovingAverage-6 m", "accuracy2": 99.71},
"뢁어": {"model1": "Fourier + LR", "accuracy1": 99.96, "model2": "MovingAverage-6 m", "accuracy2": 99.94},
"뢉은고좔": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.75, "model2": "LinearTrend", "accuracy2": 97.61},
"브둜콜리": {"model1": "Holt", "accuracy1": 99.54, "model2": "Naive", "accuracy2": 99.93},
"사과": {"model1": "Holt-Winters", "accuracy1": 99.89, "model2": "ETS(Multiplicative)", "accuracy2": 98.91},
"상좔": {"model1": "ETS(Additive)", "accuracy1": 99.11, "model2": "Holt-Winters", "accuracy2": 97.61},
"μƒˆμ†‘μ΄λ²„μ„―": {"model1": "SimpleExpSmoothing", "accuracy1": 99.95, "model2": "Holt-Winters", "accuracy2": 99.40},
"μƒˆμš°": {"model1": "ETS(Additive)", "accuracy1": 99.87, "model2": "Naive", "accuracy2": 99.96},
"생강": {"model1": "Naive", "accuracy1": 99.27, "model2": "ETS(Additive)", "accuracy2": 98.53},
"μˆ˜λ°•": {"model1": "Naive", "accuracy1": 99.91, "model2": "SARIMA(1,1,1)(1,1,1,12)", "accuracy2": 99.45},
"μ‹œκΈˆμΉ˜": {"model1": "Holt-Winters", "accuracy1": 99.70, "model2": "SeasonalNaive", "accuracy2": 98.73},
"μŒ€": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.99, "model2": "Holt-Winters", "accuracy2": 99.88},
"μ•Œλ°°κΈ°λ°°μΆ”": {"model1": "WeightedMA-6 m", "accuracy1": 98.19, "model2": "SeasonalNaive", "accuracy2": 95.73},
"μ–‘λ°°μΆ”": {"model1": "Holt-Winters", "accuracy1": 99.05, "model2": "WeightedMA-6 m", "accuracy2": 97.85},
"μ–‘νŒŒ": {"model1": "ETS(Additive)", "accuracy1": 99.93, "model2": "WeightedMA-6 m", "accuracy2": 99.51},
"μ–Όκ°ˆμ΄λ°°μΆ”": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.77, "model2": "SeasonalNaive", "accuracy2": 98.55},
"열무": {"model1": "SeasonalNaive", "accuracy1": 99.96, "model2": "Holt", "accuracy2": 99.50},
"였이": {"model1": "SeasonalNaive", "accuracy1": 99.82, "model2": "ETS(Additive)", "accuracy2": 98.48},
"전볡": {"model1": "Holt", "accuracy1": 99.90, "model2": "Fourier + LR", "accuracy2": 99.90},
"μ°ΈκΉ¨": {"model1": "WeightedMA-6 m", "accuracy1": 100.00, "model2": "LinearTrend", "accuracy2": 86.44, "special": "accuracy_drop"},
"μ°ΉμŒ€": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.71, "model2": "Naive", "accuracy2": 98.64, "special": "accuracy_drop"},
"콩": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.98, "model2": "ETS(Additive)", "accuracy2": 99.68},
"ν† λ§ˆν† ": {"model1": "SeasonalNaive", "accuracy1": 97.31, "model2": "MovingAverage-6 m", "accuracy2": 97.57},
"파": {"model1": "MovingAverage-6 m", "accuracy1": 99.92, "model2": "Holt-Winters", "accuracy2": 97.77},
"νŒŒμΈμ• ν”Œ": {"model1": "Naive", "accuracy1": 99.51, "model2": "SARIMA(1,0,1)(1,0,1,12)", "accuracy2": 96.39},
"νŒŒν”„λ¦¬μΉ΄": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.04, "model2": "WeightedMA-6 m", "accuracy2": 99.36},
"νŒ₯": {"model1": "ETS(Additive)", "accuracy1": 99.87, "model2": "Holt-Winters", "accuracy2": 75.08, "special": "accuracy_drop"},
"νŒ½μ΄λ²„μ„―": {"model1": "SeasonalNaive", "accuracy1": 99.84, "model2": "Fourier + LR", "accuracy2": 98.49},
"ν’‹κ³ μΆ”": {"model1": "Holt-Winters", "accuracy1": 98.95, "model2": "ETS(Multiplicative)", "accuracy2": 98.73},
"피망": {"model1": "Fourier + LR", "accuracy1": 99.64, "model2": "WeightedMA-6 m", "accuracy2": 98.93},
"ν˜Έλ°•": {"model1": "ETS(Multiplicative)", "accuracy1": 99.98, "model2": "SeasonalNaive", "accuracy2": 96.61},
"홍합": {"model1": "Naive", "accuracy1": 99.86, "model2": "SeasonalNaive", "accuracy2": 98.56},
}
# 기타 ν’ˆλͺ©μ— λŒ€ν•œ κΈ°λ³Έ λͺ¨λΈ (λ¦¬μŠ€νŠΈμ— μ—†λŠ” ν’ˆλͺ©)
default_models = {
"model1": "SARIMA(1,0,1)(1,0,1,12)",
"accuracy1": 99.0,
"model2": "ETS(Multiplicative)",
"accuracy2": 98.0
}
# -------------------------------------------------
# UTILITIES ---------------------------------------
# -------------------------------------------------
DATE_CANDIDATES = {"date", "ds", "ymd", "λ‚ μ§œ", "prce_reg_mm", "etl_ldg_dt"}
ITEM_CANDIDATES = {"item", "ν’ˆλͺ©", "code", "category", "pdlt_nm", "spcs_nm"}
PRICE_CANDIDATES = {"price", "y", "value", "가격", "avrg_prce"}
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
"""Standardize column names to date/item/price and deduplicate."""
col_map = {}
for c in df.columns:
lc = c.lower()
if lc in DATE_CANDIDATES:
col_map[c] = "date"
elif lc in PRICE_CANDIDATES:
col_map[c] = "price"
elif lc in ITEM_CANDIDATES:
# first hit as item, second as species
if "item" not in col_map.values():
col_map[c] = "item"
else:
col_map[c] = "species"
df = df.rename(columns=col_map)
# ── handle duplicated columns after rename ─────────────────────────
if df.columns.duplicated().any():
df = df.loc[:, ~df.columns.duplicated()]
# ── index datetime to column ───────────────────────────────────────
if "date" not in df.columns and df.index.dtype.kind == "M":
df.reset_index(inplace=True)
df.rename(columns={df.columns[0]: "date"}, inplace=True)
# ── convert YYYYMM string to datetime ──────────────────────────────────────
if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
if len(df) > 0:
# 더 μœ μ—°ν•œ λ‚ μ§œ λ³€ν™˜
try:
# μƒ˜ν”Œ 확인 (λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ μ•ˆμ „ν•˜κ²Œ 처리)
sample = str(df["date"].iloc[0])
# YYYYMM ν˜•μ‹ (6자리)
if sample.isdigit() and len(sample) == 6:
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce")
df["date"] = df["date"] + pd.offsets.MonthEnd(0) # ν•΄λ‹Ή μ›”μ˜ λ§ˆμ§€λ§‰ λ‚ λ‘œ μ„€μ •
# YYYYMMDD ν˜•μ‹ (8자리)
elif sample.isdigit() and len(sample) == 8:
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m%d", errors="coerce")
# 기타 ν˜•μ‹μ€ μžλ™ 감지
else:
df["date"] = pd.to_datetime(df["date"], errors="coerce")
except:
# μ‹€νŒ¨ μ‹œ 일반 λ³€ν™˜ μ‹œλ„
df["date"] = pd.to_datetime(df["date"], errors="coerce")
# ── build item from pdlt_nm + spcs_nm if needed ────────────────────
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
# ── merge item + species ───────────────────────────────────────────
if {"item", "species"}.issubset(df.columns):
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
df.drop(columns=["species"], inplace=True)
return df
@st.cache_data(show_spinner=False)
def load_data() -> pd.DataFrame:
"""Load price data from CSV file."""
try:
if not CSV_PATH.exists():
st.error(f"πŸ’Ύ {CSV_PATH} νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.")
st.stop()
# CSV 파일 직접 λ‘œλ“œ
df = pd.read_csv(CSV_PATH)
st.sidebar.success(f"CSV 데이터 λ‘œλ“œ μ™„λ£Œ: {len(df)}개 ν–‰")
# 데이터 ν‘œμ€€ν™” μ „ 원본 데이터 ν˜•νƒœ 확인
st.sidebar.write("원본 데이터 컬럼:", list(df.columns))
# ν‘œμ€€ν™” μ „ 상세 둜그
before_std = len(df)
df = _standardize_columns(df)
after_std = len(df)
if before_std != after_std:
st.sidebar.warning(f"ν‘œμ€€ν™” 쀑 {before_std - after_std}개 행이 μ œμ™Έλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# ν‘œμ€€ν™” ν›„ 둜그
st.sidebar.write("ν‘œμ€€ν™” ν›„ 컬럼:", list(df.columns))
# ν•„μˆ˜ 컬럼 확인
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
if missing:
st.error(f"ν•„μˆ˜ 컬럼 λˆ„λ½: {', '.join(missing)} β€” 파일 컬럼λͺ…을 ν™•μΈν•˜μ„Έμš”.")
st.stop()
# λ‚ μ§œ 데이터 확인
st.sidebar.write("λ‚ μ§œ 컬럼 데이터 μƒ˜ν”Œ:", df["date"].head().tolist())
# λ‚ μ§œ λ³€ν™˜ μ „ν›„ 데이터 수 확인
before_date_convert = len(df)
# YYYYMM ν˜•μ‹ λ³€ν™˜ (숫자둜 μ €μž₯된 κ²½μš°λ„ 처리)
try:
# 데이터 νƒ€μž… 확인
if pd.api.types.is_integer_dtype(df["date"]):
# μ •μˆ˜ν˜• YYYYMM을 λ¬Έμžμ—΄λ‘œ λ³€ν™˜ ν›„ 처리
df["date"] = df["date"].astype(str)
# λ¬Έμžμ—΄ ν˜•μ‹ 처리
if pd.api.types.is_object_dtype(df["date"]):
# YYYYMM ν˜•μ‹μΈμ§€ 확인 (6자리 숫자)
if df["date"].str.match(r'^\d{6}$').all():
# μ—°, μ›” κ΅¬λΆ„ν•΄μ„œ datetime으둜 λ³€ν™˜
df["year"] = df["date"].str[:4].astype(int)
df["month"] = df["date"].str[4:6].astype(int)
df["date"] = pd.to_datetime(dict(year=df["year"], month=df["month"], day=1))
# μ›”μ˜ λ§ˆμ§€λ§‰ λ‚ λ‘œ μ„€μ •
df["date"] = df["date"] + pd.offsets.MonthEnd(0)
# μž„μ‹œ 컬럼 μ‚­μ œ
df.drop(columns=["year", "month"], inplace=True)
else:
# 일반 λ³€ν™˜ μ‹œλ„
df["date"] = pd.to_datetime(df["date"], errors="coerce")
except Exception as e:
st.sidebar.warning(f"λ‚ μ§œ λ³€ν™˜ 였λ₯˜: {str(e)}")
# μ΅œν›„μ˜ λ°©λ²•μœΌλ‘œ λ‹€μ‹œ μ‹œλ„
try:
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce")
df["date"] = df["date"] + pd.offsets.MonthEnd(0)
except:
df["date"] = pd.to_datetime(df["date"], errors="coerce")
# λ‚ μ§œ λ³€ν™˜ ν›„ 데이터 확인
st.sidebar.write("λ‚ μ§œ λ³€ν™˜ ν›„ μƒ˜ν”Œ:", df["date"].head().tolist())
after_date_convert = df.dropna(subset=["date"]).shape[0]
if before_date_convert != after_date_convert:
st.sidebar.warning(f"λ‚ μ§œ λ³€ν™˜ 쀑 {before_date_convert - after_date_convert}개 행이 μ œμ™Έλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# 가격 데이터 숫자둜 λ³€ν™˜
df["price"] = pd.to_numeric(df["price"], errors="coerce")
# NA 데이터 처리 μ „ν›„ 수 확인
before_na_drop = len(df)
df = df.dropna(subset=["date", "item", "price"])
after_na_drop = len(df)
if before_na_drop != after_na_drop:
st.sidebar.warning(f"NA 제거 쀑 {before_na_drop - after_na_drop}개 행이 μ œμ™Έλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# κ²°κ³Ό μ •λ ¬
df.sort_values("date", inplace=True)
# 데이터 정보 ν‘œμ‹œ
if len(df) > 0:
st.sidebar.write(f"μ΅œμ’… 데이터: {len(df)}개 ν–‰")
# datetime ν˜•μ‹μΈμ§€ 확인
if pd.api.types.is_datetime64_dtype(df["date"]):
st.sidebar.write(f"데이터 λ‚ μ§œ λ²”μœ„: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}")
else:
st.sidebar.write(f"데이터 λ‚ μ§œ λ²”μœ„: λ‚ μ§œ ν˜•μ‹ λ³€ν™˜ μ‹€νŒ¨. ν˜„μž¬ ν˜•μ‹: {type(df['date'].iloc[0])}")
st.sidebar.write(f"총 ν’ˆλͺ© 수: {df['item'].nunique()}")
st.sidebar.write(f"ν’ˆλͺ©λ³„ 평균 데이터 수: {len(df)/df['item'].nunique():.1f}개")
else:
st.error("μœ νš¨ν•œ 데이터가 μ—†μŠ΅λ‹ˆλ‹€!")
return df
except Exception as e:
st.error(f"데이터 λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
import traceback
st.code(traceback.format_exc())
st.stop()
@st.cache_data(show_spinner=False)
def get_items(df: pd.DataFrame):
return sorted(df["item"].unique())
def get_best_model_for_item(item):
"""ν’ˆλͺ©μ— λ§žλŠ” 졜적 λͺ¨λΈ 정보 λ°˜ν™˜"""
return item_models.get(item, default_models)
def format_currency(value):
"""원화 ν˜•μ‹μœΌλ‘œ 숫자 ν¬λ§·νŒ…"""
if pd.isna(value) or not np.isfinite(value):
return "N/A"
return f"{value:,.0f}원"
# -------------------------------------------------
# λͺ¨λΈ κ΅¬ν˜„λΆ€ --------------------------------------
# -------------------------------------------------
@st.cache_data(show_spinner=False, ttl=3600)
def prepare_monthly_data(df):
"""월별 데이터 μ€€λΉ„"""
# μ›”λ³„λ‘œ 집계
monthly_df = df.copy()
monthly_df['year_month'] = monthly_df['date'].dt.strftime('%Y-%m')
monthly_df = monthly_df.groupby('year_month').agg({'date': 'last', 'price': 'mean'}).reset_index(drop=True)
monthly_df.sort_values('date', inplace=True)
# 인덱슀 μ„€μ •
monthly_df.set_index('date', inplace=True)
# 결츑치 보간 (월별 데이터에 빈 월이 μžˆμ„ 수 있음)
if len(monthly_df) > 1:
monthly_df = monthly_df.asfreq('M', method='ffill')
return monthly_df
def fit_sarima(df, order, seasonal_order, horizon_end):
"""SARIMA λͺ¨λΈ κ΅¬ν˜„"""
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
# λͺ¨λΈ ν•™μŠ΅
try:
model = SARIMAX(
monthly_df['price'],
order=order,
seasonal_order=seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
results = model.fit(disp=False)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
forecast = results.get_forecast(steps=periods)
pred_mean = forecast.predicted_mean
pred_ci = forecast.conf_int()
# Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': pred_mean.values,
'yhat_lower': pred_ci.iloc[:, 0].values,
'yhat_upper': pred_ci.iloc[:, 1].values
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜ (λ‚ μ§œ, 가격)
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = 0
try:
# κ°€λŠ₯ν•˜λ©΄ κ³„μ ˆμ„± λΆ„ν•΄
decomposition = seasonal_decompose(monthly_df['price'], model='multiplicative', period=12)
trend = decomposition.trend
seasonal = decomposition.seasonal
# 결과에 κ³„μ ˆμ„± 반영
for i, date in enumerate(fc_df_monthly['ds']):
month = date.month
if month in seasonal.index.month:
seasonal_value = seasonal[seasonal.index.month == month].mean()
fc_df_monthly.loc[i, 'yearly'] = seasonal_value
except:
pass
return fc_df_monthly
except Exception as e:
st.error(f"SARIMA λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_ets(df, seasonal_type, horizon_end):
"""ETS λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
# λͺ¨λΈ νŒŒλΌλ―Έν„° μ„€μ •
if seasonal_type == 'multiplicative':
trend_type = 'add'
seasonal = 'mul'
else: # additive
trend_type = 'add'
seasonal = 'add'
# λͺ¨λΈ ν•™μŠ΅
try:
model = ExponentialSmoothing(
monthly_df['price'],
trend=trend_type,
seasonal=seasonal,
seasonal_periods=12,
damped=True
)
results = model.fit(optimized=True)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
forecast = results.forecast(periods)
# Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# μ‹ λ’° ꡬ간 μΆ”μ • (ETSλŠ” κΈ°λ³Έ μ‹ λ’° ꡬ간을 μ œκ³΅ν•˜μ§€ μ•ŠμŒ)
std_error = np.std(results.resid)
lower_bound = forecast - 1.96 * std_error
upper_bound = forecast + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': forecast.values,
'yhat_lower': lower_bound.values,
'yhat_upper': upper_bound.values
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = 0
try:
# κ°€λŠ₯ν•˜λ©΄ κ³„μ ˆμ„± λΆ„ν•΄
decomposition = seasonal_decompose(monthly_df['price'], model=seasonal_type, period=12)
trend = decomposition.trend
seasonal = decomposition.seasonal
# 결과에 κ³„μ ˆμ„± 반영
for i, date in enumerate(fc_df_monthly['ds']):
month = date.month
if month in seasonal.index.month:
seasonal_value = seasonal[seasonal.index.month == month].mean()
fc_df_monthly.loc[i, 'yearly'] = seasonal_value
except:
pass
return fc_df_monthly
except Exception as e:
st.error(f"ETS λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_holt(df, horizon_end):
"""Holt λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
# λͺ¨λΈ ν•™μŠ΅
try:
model = Holt(monthly_df['price'], damped=True)
results = model.fit(optimized=True)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
forecast = results.forecast(periods)
# Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# μ‹ λ’° ꡬ간 μΆ”μ •
std_error = np.std(results.resid)
lower_bound = forecast - 1.96 * std_error
upper_bound = forecast + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': forecast.values,
'yhat_lower': lower_bound.values,
'yhat_upper': upper_bound.values
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = fc_df_monthly['yhat'] # HoltλŠ” μΆ”μ„Έλ§Œ λͺ¨λΈλ§
return fc_df_monthly
except Exception as e:
st.error(f"Holt λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_holt_winters(df, horizon_end):
"""Holt-Winters λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
# λͺ¨λΈ ν•™μŠ΅
try:
model = ExponentialSmoothing(
monthly_df['price'],
trend='add',
seasonal='mul', # κ³„μ ˆμ„±μ€ κ³±μ…ˆ 방식이 농산물 가격에 더 적합
seasonal_periods=12,
damped=True
)
results = model.fit(optimized=True)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
forecast = results.forecast(periods)
# Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# μ‹ λ’° ꡬ간 μΆ”μ •
std_error = np.std(results.resid)
lower_bound = forecast - 1.96 * std_error
upper_bound = forecast + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': forecast.values,
'yhat_lower': lower_bound.values,
'yhat_upper': upper_bound.values
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = 0
try:
# Holt-Winters λͺ¨λΈμ—μ„œ κ³„μ ˆμ„± μΆ”μΆœ
seasonal = results.seasonal_
# 결과에 κ³„μ ˆμ„± 반영
for i, date in enumerate(fc_df_monthly['ds']):
month = date.month - 1 # 0-indexed
if month < len(seasonal):
fc_df_monthly.loc[i, 'yearly'] = seasonal[month] * fc_df_monthly.loc[i, 'yhat']
fc_df_monthly.loc[i, 'trend'] = fc_df_monthly.loc[i, 'yhat'] - fc_df_monthly.loc[i, 'yearly']
except:
pass
return fc_df_monthly
except Exception as e:
st.error(f"Holt-Winters λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_moving_average(df, window, horizon_end):
"""이동 평균 λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# λ§ˆμ§€λ§‰ window κ°œμ›”μ˜ 평균 계산
last_values = monthly_df['price'].iloc[-window:]
ma_value = last_values.mean()
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰ (λͺ¨λ“  미래 μ‹œμ μ— λ™μΌν•œ κ°’)
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# μ‹ λ’° ꡬ간 μΆ”μ •
std_error = last_values.std()
lower_bound = ma_value - 1.96 * std_error
upper_bound = ma_value + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': [ma_value] * len(future_dates),
'yhat_lower': [lower_bound] * len(future_dates),
'yhat_upper': [upper_bound] * len(future_dates)
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = fc_df_monthly['yhat']
return fc_df_monthly
except Exception as e:
st.error(f"이동 평균 λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_weighted_ma(df, window, horizon_end):
"""가쀑 이동 평균 λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# λ§ˆμ§€λ§‰ window κ°œμ›”μ˜ 가쀑 평균 계산
last_values = monthly_df['price'].iloc[-window:].to_numpy()
# κ°€μ€‘μΉ˜ 생성 (졜근 데이터에 더 높은 κ°€μ€‘μΉ˜)
weights = np.arange(1, window + 1)
weights = weights / np.sum(weights)
wma_value = np.sum(last_values * weights)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰ (λͺ¨λ“  미래 μ‹œμ μ— λ™μΌν•œ κ°’)
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# μ‹ λ’° ꡬ간 μΆ”μ •
std_error = np.std(last_values)
lower_bound = wma_value - 1.96 * std_error
upper_bound = wma_value + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': [wma_value] * len(future_dates),
'yhat_lower': [lower_bound] * len(future_dates),
'yhat_upper': [upper_bound] * len(future_dates)
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = fc_df_monthly['yhat']
return fc_df_monthly
except Exception as e:
st.error(f"가쀑 이동 평균 λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_naive(df, horizon_end):
"""λ‹¨μˆœ Naive λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# λ§ˆμ§€λ§‰ κ°’ μ‚¬μš©
last_value = monthly_df['price'].iloc[-1]
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰ (λͺ¨λ“  미래 μ‹œμ μ— λ§ˆμ§€λ§‰ κ°’)
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# μ‹ λ’° ꡬ간 μΆ”μ • (κ³Όκ±° 12κ°œμ›” ν‘œμ€€νŽΈμ°¨ μ‚¬μš©)
history_std = monthly_df['price'].iloc[-12:].std() if len(monthly_df) >= 12 else monthly_df['price'].std()
lower_bound = last_value - 1.96 * history_std
upper_bound = last_value + 1.96 * history_std
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': [last_value] * len(future_dates),
'yhat_lower': [lower_bound] * len(future_dates),
'yhat_upper': [upper_bound] * len(future_dates)
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = fc_df_monthly['yhat']
return fc_df_monthly
except Exception as e:
st.error(f"Naive λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_seasonal_naive(df, horizon_end):
"""κ³„μ ˆμ„± Naive λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰ (각 월에 λŒ€ν•΄ μž‘λ…„ 같은 달 가격 μ‚¬μš©)
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
future_values = []
lower_bounds = []
upper_bounds = []
for date in future_dates:
# 같은 μ›”μ˜ κ°’ μ°ΎκΈ°
same_month_values = monthly_df[monthly_df.index.month == date.month]['price']
if len(same_month_values) > 0:
# 같은 μ›” κ°€μž₯ 졜근 κ°’ μ‚¬μš©
forecast_value = same_month_values.iloc[-1]
# μ‹ λ’° ꡬ간
std_error = same_month_values.std() if len(same_month_values) > 1 else monthly_df['price'].std()
lower_bound = forecast_value - 1.96 * std_error
upper_bound = forecast_value + 1.96 * std_error
else:
# 같은 μ›” 데이터 μ—†μœΌλ©΄ 전체 평균 μ‚¬μš©
forecast_value = monthly_df['price'].mean()
std_error = monthly_df['price'].std()
lower_bound = forecast_value - 1.96 * std_error
upper_bound = forecast_value + 1.96 * std_error
future_values.append(forecast_value)
lower_bounds.append(lower_bound)
upper_bounds.append(upper_bound)
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': future_values,
'yhat_lower': lower_bounds,
'yhat_upper': upper_bounds
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = fc_df_monthly['yhat']
fc_df_monthly['trend'] = 0
return fc_df_monthly
except Exception as e:
st.error(f"Seasonal Naive λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_fourier_lr(df, horizon_end):
"""Fourier + μ„ ν˜• νšŒκ·€ λͺ¨λΈ κ΅¬ν˜„"""
from sklearn.linear_model import LinearRegression
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# μ‹œκ°„ λ³€μˆ˜ 생성
y = monthly_df['price'].values
t = np.arange(len(y))
# Fourier νŠΉμ„± 생성 (μ—°κ°„ κ³„μ ˆμ„±)
p = 12 # μ£ΌκΈ° (1λ…„)
X = np.column_stack([
t, # μ„ ν˜• μΆ”μ„Έ
np.sin(2 * np.pi * t / p),
np.cos(2 * np.pi * t / p),
np.sin(4 * np.pi * t / p),
np.cos(4 * np.pi * t / p)
])
# λͺ¨λΈ ν•™μŠ΅
model = LinearRegression()
model.fit(X, y)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
# 미래 μ‹œμ  νŠΉμ„± 생성
t_future = np.arange(len(y), len(y) + periods)
X_future = np.column_stack([
t_future,
np.sin(2 * np.pi * t_future / p),
np.cos(2 * np.pi * t_future / p),
np.sin(4 * np.pi * t_future / p),
np.cos(4 * np.pi * t_future / p)
])
# 예츑
forecast = model.predict(X_future)
# μ‹ λ’° ꡬ간 μΆ”μ •
y_pred = model.predict(X)
mse = np.mean((y - y_pred) ** 2)
std_error = np.sqrt(mse)
lower_bound = forecast - 1.96 * std_error
upper_bound = forecast + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': forecast,
'yhat_lower': lower_bound,
'yhat_upper': upper_bound
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['trend'] = model.coef_[0] * np.arange(len(fc_df_monthly)) + model.intercept_
# κ³„μ ˆμ„± 계산
season_features = np.column_stack([
np.sin(2 * np.pi * np.arange(len(fc_df_monthly)) / p),
np.cos(2 * np.pi * np.arange(len(fc_df_monthly)) / p),
np.sin(4 * np.pi * np.arange(len(fc_df_monthly)) / p),
np.cos(4 * np.pi * np.arange(len(fc_df_monthly)) / p)
])
seasonal_effect = np.dot(season_features, model.coef_[1:5])
fc_df_monthly['yearly'] = seasonal_effect
return fc_df_monthly
except Exception as e:
st.error(f"Fourier + LR λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_linear_trend(df, horizon_end):
"""μ„ ν˜• μΆ”μ„Έ λͺ¨λΈ κ΅¬ν˜„"""
from sklearn.linear_model import LinearRegression
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# μ‹œκ°„ λ³€μˆ˜ 생성
y = monthly_df['price'].values
t = np.arange(len(y)).reshape(-1, 1)
# λͺ¨λΈ ν•™μŠ΅
model = LinearRegression()
model.fit(t, y)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
t_future = np.arange(len(y), len(y) + periods).reshape(-1, 1)
forecast = model.predict(t_future)
# μ‹ λ’° ꡬ간 μΆ”μ •
y_pred = model.predict(t)
mse = np.mean((y - y_pred) ** 2)
std_error = np.sqrt(mse)
lower_bound = forecast - 1.96 * std_error
upper_bound = forecast + 1.96 * std_error
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': forecast,
'yhat_lower': lower_bound,
'yhat_upper': upper_bound
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = fc_df_monthly['yhat']
return fc_df_monthly
except Exception as e:
st.error(f"Linear Trend λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
def fit_simple_exp_smoothing(df, horizon_end):
"""λ‹¨μˆœ μ§€μˆ˜ ν‰ν™œ λͺ¨λΈ κ΅¬ν˜„"""
# 월별 데이터 μ€€λΉ„
monthly_df = prepare_monthly_data(df)
try:
# λͺ¨λΈ ν•™μŠ΅
model = SimpleExpSmoothing(monthly_df['price'])
results = model.fit(optimized=True)
# 예츑 κΈ°κ°„ 계산
last_date = monthly_df.index[-1]
end_date = pd.Timestamp(horizon_end)
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
# 예츑 μˆ˜ν–‰
forecast = results.forecast(periods)
# μ‹ λ’° ꡬ간 μΆ”μ •
std_error = np.std(results.resid)
lower_bound = forecast - 1.96 * std_error
upper_bound = forecast + 1.96 * std_error
# Prophet ν˜•μ‹μœΌλ‘œ κ²°κ³Ό λ³€ν™˜
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M')
fc_df = pd.DataFrame({
'ds': future_dates,
'yhat': forecast.values,
'yhat_lower': lower_bound.values,
'yhat_upper': upper_bound.values
})
# μ›”λ³„λ‘œ κ²°κ³Ό λ³€ν™˜
fc_df_monthly = pd.DataFrame({
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'),
})
# ν•™μŠ΅ 데이터 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values
# 예츑 κΈ°κ°„μ˜ κ²°κ³Ό μΆ”κ°€
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values
# yearly, trend μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€ (Prophet ν˜Έν™˜)
fc_df_monthly['yearly'] = 0
fc_df_monthly['trend'] = fc_df_monthly['yhat']
return fc_df_monthly
except Exception as e:
st.error(f"Simple Exponential Smoothing λͺ¨λΈ 였λ₯˜: {str(e)}")
return None
@st.cache_data(show_spinner=False, ttl=3600)
def fit_optimal_model(df, item_name, horizon_end, model_type="primary"):
"""ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ 적용"""
# 데이터 μ€€λΉ„ 및 정리
df = df.copy()
df = df.dropna(subset=["date", "price"])
# ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ 선택
model_info = get_best_model_for_item(item_name)
if model_type == "primary":
model_name = model_info["model1"]
accuracy = model_info["accuracy1"]
else: # backup
model_name = model_info["model2"]
accuracy = model_info["accuracy2"]
st.info(f"{item_name}에 μ΅œμ ν™”λœ {model_name} λͺ¨λΈ 적용 (정확도: {accuracy}%)")
# 특수 μ²˜λ¦¬κ°€ ν•„μš”ν•œ ν’ˆλͺ© 확인
needs_monitoring = "special" in model_info and model_info["special"] == "accuracy_drop"
if needs_monitoring:
st.warning(f"⚠️ {item_name}λŠ” νŠΉμ • 월에 정확도가 급락할 수 μžˆλŠ” ν’ˆλͺ©μž…λ‹ˆλ‹€. 예츑 κ²°κ³Όλ₯Ό 주의 깊게 μ‚΄νŽ΄λ³΄μ„Έμš”.")
# λͺ¨λΈ 선택 및 ν•™μŠ΅
if "SARIMA(1,0,1)(1,0,1,12)" in model_name:
return fit_sarima(df, order=(1,0,1), seasonal_order=(1,0,1,12), horizon_end=horizon_end)
elif "SARIMA(1,1,1)(1,1,1,12)" in model_name:
return fit_sarima(df, order=(1,1,1), seasonal_order=(1,1,1,12), horizon_end=horizon_end)
elif "SARIMA(0,1,1)(0,1,1,12)" in model_name:
return fit_sarima(df, order=(0,1,1), seasonal_order=(0,1,1,12), horizon_end=horizon_end)
elif "ETS(Multiplicative)" in model_name:
return fit_ets(df, seasonal_type="multiplicative", horizon_end=horizon_end)
elif "ETS(Additive)" in model_name:
return fit_ets(df, seasonal_type="additive", horizon_end=horizon_end)
elif "Holt-Winters" in model_name:
return fit_holt_winters(df, horizon_end=horizon_end)
elif "Holt" in model_name:
return fit_holt(df, horizon_end=horizon_end)
elif "MovingAverage-6 m" in model_name:
return fit_moving_average(df, window=6, horizon_end=horizon_end)
elif "WeightedMA-6 m" in model_name:
return fit_weighted_ma(df, window=6, horizon_end=horizon_end)
elif "Naive" in model_name and "Seasonal" not in model_name:
return fit_naive(df, horizon_end=horizon_end)
elif "SeasonalNaive" in model_name:
return fit_seasonal_naive(df, horizon_end=horizon_end)
elif "Fourier + LR" in model_name:
return fit_fourier_lr(df, horizon_end=horizon_end)
elif "LinearTrend" in model_name:
return fit_linear_trend(df, horizon_end=horizon_end)
elif "SimpleExpSmoothing" in model_name:
return fit_simple_exp_smoothing(df, horizon_end=horizon_end)
else:
st.warning(f"μ•Œ 수 μ—†λŠ” λͺ¨λΈ: {model_name}. κΈ°λ³Έ λͺ¨λΈ(SARIMA)을 μ‚¬μš©ν•©λ‹ˆλ‹€.")
return fit_sarima(df, order=(1,0,1), seasonal_order=(1,0,1,12), horizon_end=horizon_end)
def fit_ensemble_model(df, item_name, horizon_end):
"""1μœ„μ™€ 2μœ„ λͺ¨λΈμ˜ 앙상블 μˆ˜ν–‰"""
# 1μœ„ λͺ¨λΈ 예츑
fc1 = fit_optimal_model(df, item_name, horizon_end, model_type="primary")
# 2μœ„ λͺ¨λΈ 예츑
fc2 = fit_optimal_model(df, item_name, horizon_end, model_type="backup")
# 두 λͺ¨λΈ λͺ¨λ‘ μ„±κ³΅ν•œ 경우만 앙상블
if fc1 is not None and fc2 is not None:
# 앙상블 κ°€μ€‘μΉ˜ 계산 (정확도 기반)
model_info = get_best_model_for_item(item_name)
acc1 = model_info["accuracy1"]
acc2 = model_info["accuracy2"]
# 정확도 차이가 0.2%p 이내인 경우 앙상블 μˆ˜ν–‰
accuracy_diff = abs(acc1 - acc2)
if accuracy_diff <= 0.2:
st.success(f"두 λͺ¨λΈμ˜ 정확도 차이가 {accuracy_diff:.2f}%p둜 μž‘μ•„ 앙상블을 μˆ˜ν–‰ν•©λ‹ˆλ‹€.")
# 정확도 기반 κ°€μ€‘μΉ˜ 계산
total_acc = acc1 + acc2
w1 = acc1 / total_acc
w2 = acc2 / total_acc
# 앙상블 κ²°κ³Ό 생성
fc_ensemble = fc1.copy()
fc_ensemble['yhat'] = w1 * fc1['yhat'] + w2 * fc2['yhat']
fc_ensemble['yhat_lower'] = w1 * fc1['yhat_lower'] + w2 * fc2['yhat_lower']
fc_ensemble['yhat_upper'] = w1 * fc1['yhat_upper'] + w2 * fc2['yhat_upper']
return fc_ensemble
else:
st.info(f"정확도 차이가 {accuracy_diff:.2f}%p둜 μ»€μ„œ 1μœ„ λͺ¨λΈλ§Œ μ‚¬μš©ν•©λ‹ˆλ‹€.")
return fc1
# ν•˜λ‚˜λΌλ„ μ‹€νŒ¨ν•œ 경우 μ„±κ³΅ν•œ λͺ¨λΈ λ°˜ν™˜
return fc1 if fc1 is not None else fc2
# -------------------------------------------------
# MAIN APP ---------------------------------------
# -------------------------------------------------
# 데이터 λ‘œλ“œ
raw_df = load_data()
if len(raw_df) == 0:
st.error("데이터가 λΉ„μ–΄ μžˆμŠ΅λ‹ˆλ‹€. νŒŒμΌμ„ ν™•μΈν•΄μ£Όμ„Έμš”.")
st.stop()
st.sidebar.header("πŸ” ν’ˆλͺ© 선택")
selected_item = st.sidebar.selectbox("ν’ˆλͺ©", get_items(raw_df))
current_date = date.today()
st.sidebar.caption(f"였늘: {current_date}")
# μ„ νƒλœ ν’ˆλͺ©μ˜ 졜적 λͺ¨λΈ 정보 ν‘œμ‹œ
model_info = get_best_model_for_item(selected_item)
st.sidebar.subheader("ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ")
st.sidebar.markdown(f"**1μœ„ λͺ¨λΈ:** {model_info['model1']} (정확도: {model_info['accuracy1']}%)")
st.sidebar.markdown(f"**2μœ„ λͺ¨λΈ:** {model_info['model2']} (정확도: {model_info['accuracy2']}%)")
# 데이터 필터링
item_df = raw_df.query("item == @selected_item").copy()
if item_df.empty:
st.error("μ„ νƒν•œ ν’ˆλͺ© 데이터 μ—†μŒ")
st.stop()
# 데이터 수 검사
if len(item_df) < 2:
st.warning(f"μ„ νƒν•œ ν’ˆλͺ© '{selected_item}' 데이터가 λ„ˆλ¬΄ μ μŠ΅λ‹ˆλ‹€ (데이터 수: {len(item_df)}). 예츑이 λΆ€μ •ν™•ν•  수 μžˆμŠ΅λ‹ˆλ‹€.")
else:
st.success(f"μ„ νƒν•œ ν’ˆλͺ© '{selected_item}'에 λŒ€ν•΄ {len(item_df)}개의 데이터가 μžˆμŠ΅λ‹ˆλ‹€.")
# -------------------------------------------------
# MACRO FORECAST 1996‑2030 ------------------------
# -------------------------------------------------
# -------------------------------------------------
# MACRO FORECAST 1996‑2030 ------------------------
# -------------------------------------------------
st.header(f"πŸ“ˆ {selected_item} 가격 예츑 λŒ€μ‹œλ³΄λ“œ")
# 데이터 필터링 둜직
try:
macro_start_dt = pd.Timestamp("1996-01-01")
# λ°μ΄ν„°μ˜ μ‹œμž‘μΌμ΄ 1996λ…„ 이후인지 확인
if item_df["date"].min() > macro_start_dt:
macro_start_dt = item_df["date"].min()
macro_df = item_df[item_df["date"] >= macro_start_dt].copy()
except Exception as e:
st.error(f"λ‚ μ§œ 필터링 였λ₯˜: {str(e)}")
macro_df = item_df.copy() # 필터링 없이 전체 데이터 μ‚¬μš©
# Add diagnostic info
with st.expander("데이터 진단"):
st.write(f"- 전체 데이터 수: {len(item_df)}")
st.write(f"- 뢄석 데이터 수: {len(macro_df)}")
if len(macro_df) > 0:
st.write(f"- κΈ°κ°„: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}")
st.dataframe(macro_df.head())
else:
st.write("데이터가 μ—†μŠ΅λ‹ˆλ‹€.")
if len(macro_df) < 2:
st.warning(f"{selected_item}에 λŒ€ν•œ 데이터가 μΆ©λΆ„ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. 전체 κΈ°κ°„ 데이터λ₯Ό ν‘œμ‹œν•©λ‹ˆλ‹€.")
fig = go.Figure()
fig.add_trace(go.Scatter(x=item_df["date"], y=item_df["price"], mode="lines", name="μ‹€μ œ 가격"))
fig.update_layout(title=f"{selected_item} κ³Όκ±° 가격")
st.plotly_chart(fig, use_container_width=True)
else:
try:
# 데이터 μΆ©λΆ„ν•œ 경우 ν’ˆλͺ©λ³„ 졜적 λͺ¨λΈ μ‚¬μš©
use_ensemble = st.checkbox("앙상블 λͺ¨λΈ μ‚¬μš© (1μœ„ + 2μœ„ λͺ¨λΈ κ²°ν•©)", value=False)
with st.spinner("μž₯κΈ° 예츑 λͺ¨λΈ 생성 쀑..."):
if use_ensemble:
fc_macro = fit_ensemble_model(macro_df, selected_item, MACRO_END)
else:
fc_macro = fit_optimal_model(macro_df, selected_item, MACRO_END)
if fc_macro is not None:
# μ‹€μ œ 데이터와 예츑 데이터 ꡬ뢄
cutoff_date = pd.Timestamp("2025-01-01")
# ν”Œλ‘― 생성
fig = go.Figure()
# μ‹€μ œ 데이터 μΆ”κ°€ (1996-2024)
historical_data = macro_df[macro_df["date"] < cutoff_date].copy()
if not historical_data.empty:
fig.add_trace(go.Scatter(
x=historical_data["date"],
y=historical_data["price"],
mode="lines",
name="μ‹€μ œ 가격 (1996-2024)",
line=dict(color="blue", width=2)
))
# 예츑 κΈ°κ°„ 자λ₯΄κΈ°
forecast_data = fc_macro[fc_macro["ds"] >= cutoff_date].copy()
# 2025-2030 예츑 데이터
if not forecast_data.empty:
fig.add_trace(go.Scatter(
x=forecast_data["ds"],
y=forecast_data["yhat"],
mode="lines",
name="예츑 가격 (2025-2030)",
line=dict(color="red", width=2, dash="dash")
))
# μ‹ λ’° ꡬ간 μΆ”κ°€
fig.add_trace(go.Scatter(
x=forecast_data["ds"],
y=forecast_data["yhat_upper"],
mode="lines",
line=dict(width=0),
showlegend=False
))
fig.add_trace(go.Scatter(
x=forecast_data["ds"],
y=forecast_data["yhat_lower"],
mode="lines",
line=dict(width=0),
fill="tonexty",
fillcolor="rgba(255, 0, 0, 0.1)",
name="95% μ‹ λ’° ꡬ간"
))
# 음수 μ˜ˆμΈ‘κ°’ 제거
fig.update_yaxes(range=[0, None])
# λ ˆμ΄μ•„μ›ƒ μ„€μ •
fig.update_layout(
title=f"{selected_item} μž₯κΈ° 가격 예츑 (1996-2030)",
xaxis_title="연도",
yaxis_title="가격 (원)",
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
# 차트 ν‘œμ‹œ
st.plotly_chart(fig, use_container_width=True)
# 연도별 μ˜ˆμΈ‘κ°€ ν‘œμ‹œ
try:
latest_price = macro_df.iloc[-1]["price"]
# 연도별 μ˜ˆμΈ‘κ°€ 계산을 μœ„ν•œ ν•¨μˆ˜
def get_yearly_prediction(year_end):
target_date = pd.Timestamp(f"{year_end}-12-31")
# λ‚ μ§œ 기반으둜 κ°€μž₯ κ°€κΉŒμš΄ λ‚ μ§œμ˜ μ˜ˆμΈ‘κ°’ μ°ΎκΈ°
date_diffs = abs(fc_macro["ds"] - target_date)
closest_idx = date_diffs.idxmin()
pred_value = fc_macro.loc[closest_idx, "yhat"]
pct_change = (pred_value - latest_price) / latest_price * 100
return pred_value, pct_change
# 연도별 μ˜ˆμΈ‘κ°€ ν‘œμ‹œ
col1, col2, col3 = st.columns(3)
# 2025λ…„ μ˜ˆμΈ‘κ°€
pred_2025, pct_2025 = get_yearly_prediction(2025)
col1.metric("2025λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2025), f"{pct_2025:+.1f}%")
# 2027λ…„ μ˜ˆμΈ‘κ°€
pred_2027, pct_2027 = get_yearly_prediction(2027)
col2.metric("2027λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2027), f"{pct_2027:+.1f}%")
# 2030λ…„ μ˜ˆμΈ‘κ°€
pred_2030, pct_2030 = get_yearly_prediction(2030)
col3.metric("2030λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2030), f"{pct_2030:+.1f}%")
# μΆ”κ°€ 연도 μ˜ˆμΈ‘κ°€ (ν™•μž₯ κ°€λŠ₯)
with st.expander("더 λ§Žμ€ 연도별 μ˜ˆμΈ‘κ°€ 보기"):
col4, col5, col6 = st.columns(3)
# 2026λ…„ μ˜ˆμΈ‘κ°€
pred_2026, pct_2026 = get_yearly_prediction(2026)
col4.metric("2026λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2026), f"{pct_2026:+.1f}%")
# 2028λ…„ μ˜ˆμΈ‘κ°€
pred_2028, pct_2028 = get_yearly_prediction(2028)
col5.metric("2028λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2028), f"{pct_2028:+.1f}%")
# 2029λ…„ μ˜ˆμΈ‘κ°€
pred_2029, pct_2029 = get_yearly_prediction(2029)
col6.metric("2029λ…„ μ˜ˆμΈ‘κ°€", format_currency(pred_2029), f"{pct_2029:+.1f}%")
except Exception as e:
st.error(f"μ˜ˆμΈ‘κ°€ 계산 였λ₯˜: {str(e)}")
else:
st.warning("예츑 λͺ¨λΈμ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
fig = go.Figure()
fig.add_trace(go.Scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ‹€μ œ 가격"))
fig.update_layout(title=f"{selected_item} κ³Όκ±° 가격")
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"μž₯κΈ° 예츑 였λ₯˜ λ°œμƒ: {str(e)}")
import traceback
st.code(traceback.format_exc())
fig = go.Figure()
fig.add_trace(go.Scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ‹€μ œ 가격"))
fig.update_layout(title=f"{selected_item} κ³Όκ±° 가격")
st.plotly_chart(fig, use_container_width=True)
# -------------------------------------------------
# MICRO FORECAST 2024‑2026 ------------------------
# -------------------------------------------------
# -------------------------------------------------
# MICRO FORECAST 2024‑2026 ------------------------
# -------------------------------------------------
st.subheader("πŸ”Ž 2024–2026 단기 예츑 (월별)")
# 데이터 필터링 - 졜근 3λ…„ 데이터 ν™œμš©
try:
three_years_ago = pd.Timestamp("2021-01-01")
if item_df["date"].min() > three_years_ago:
three_years_ago = item_df["date"].min()
micro_df = item_df[item_df["date"] >= three_years_ago].copy()
except Exception as e:
st.error(f"단기 예츑 데이터 필터링 였λ₯˜: {str(e)}")
# 졜근 데이터 μ‚¬μš©
micro_df = item_df.sort_values("date").tail(24).copy()
if len(micro_df) < 2:
st.warning(f"졜근 데이터가 μΆ©λΆ„ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
fig = go.Figure()
fig.add_trace(go.Scatter(x=item_df["date"], y=item_df["price"], mode="lines", name="μ‹€μ œ 가격"))
fig.update_layout(title=f"{selected_item} 졜근 가격")
st.plotly_chart(fig, use_container_width=True)
else:
try:
with st.spinner("단기 예츑 λͺ¨λΈ 생성 쀑..."):
if use_ensemble:
fc_micro = fit_ensemble_model(micro_df, selected_item, MICRO_END)
else:
fc_micro = fit_optimal_model(micro_df, selected_item, MICRO_END)
if fc_micro is not None:
# 2024-01-01λΆ€ν„° 2026-12-31κΉŒμ§€ 필터링
start_date = pd.Timestamp("2024-01-01")
end_date = pd.Timestamp("2026-12-31")
# 월별 데이터 μ€€λΉ„
monthly_historical = micro_df.copy()
monthly_historical["year_month"] = monthly_historical["date"].dt.strftime("%Y-%m")
monthly_historical = monthly_historical.groupby("year_month").agg({
"date": "first",
"price": "mean"
}).reset_index(drop=True)
monthly_historical = monthly_historical[
(monthly_historical["date"] >= start_date) &
(monthly_historical["date"] <= end_date)
]
monthly_forecast = fc_micro[
(fc_micro["ds"] >= start_date) &
(fc_micro["ds"] <= end_date)
].copy()
# 월별 차트 생성
fig = go.Figure()
# 2024λ…„ μ‹€μ œ 데이터
actual_2024 = monthly_historical[
(monthly_historical["date"] >= pd.Timestamp("2024-01-01")) &
(monthly_historical["date"] <= pd.Timestamp("2024-12-31"))
]
if not actual_2024.empty:
fig.add_trace(go.Scatter(
x=actual_2024["date"],
y=actual_2024["price"],
mode="lines+markers",
name="2024 μ‹€μ œ 가격",
line=dict(color="blue", width=2),
marker=dict(size=8)
))
# 2024λ…„ 이후 예츑 데이터
cutoff = pd.Timestamp("2024-12-31")
future_data = monthly_forecast[monthly_forecast["ds"] > cutoff]
if not future_data.empty:
fig.add_trace(go.Scatter(
x=future_data["ds"],
y=future_data["yhat"],
mode="lines+markers",
name="2025-2026 예츑 가격",
line=dict(color="red", width=2, dash="dash"),
marker=dict(size=8)
))
# μ‹ λ’° ꡬ간 μΆ”κ°€
fig.add_trace(go.Scatter(
x=future_data["ds"],
y=future_data["yhat_upper"],
mode="lines",
line=dict(width=0),
showlegend=False
))
fig.add_trace(go.Scatter(
x=future_data["ds"],
y=future_data["yhat_lower"],
mode="lines",
line=dict(width=0),
fill="tonexty",
fillcolor="rgba(255, 0, 0, 0.1)",
name="95% μ‹ λ’° ꡬ간"
))
# 음수 μ˜ˆμΈ‘κ°’ 제거
fig.update_yaxes(range=[0, None])
# λ ˆμ΄μ•„μ›ƒ μ„€μ •
fig.update_layout(
title=f"{selected_item} 월별 단기 예츑 (2024-2026)",
xaxis_title="μ›”",
yaxis_title="가격 (원)",
xaxis=dict(
tickformat="%Y-%m",
dtick="M3", # 3κ°œμ›” 간격
tickangle=45
),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
)
)
# 차트 ν‘œμ‹œ
st.plotly_chart(fig, use_container_width=True)
# 월별 예츑 가격 ν‘œμ‹œ (2025-2026)
with st.expander("월별 예츑 가격 상세보기"):
monthly_detail = monthly_forecast[monthly_forecast["ds"] > cutoff].copy()
monthly_detail["λ‚ μ§œ"] = monthly_detail["ds"].dt.strftime("%Yλ…„ %mμ›”")
monthly_detail["μ˜ˆμΈ‘κ°€κ²©"] = monthly_detail["yhat"].apply(format_currency)
monthly_detail["ν•˜ν•œκ°’"] = monthly_detail["yhat_lower"].apply(format_currency)
monthly_detail["μƒν•œκ°’"] = monthly_detail["yhat_upper"].apply(format_currency)
st.dataframe(
monthly_detail[["λ‚ μ§œ", "μ˜ˆμΈ‘κ°€κ²©", "ν•˜ν•œκ°’", "μƒν•œκ°’"]],
hide_index=True
)
# 월별/연도별 μ˜ˆμΈ‘κ°€ ν‘œμ‹œ ν•¨μˆ˜
def get_monthly_prediction(year, month):
target_date = pd.Timestamp(f"{year}-{month:02d}-01")
# κ°€μž₯ κ°€κΉŒμš΄ λ‚ μ§œμ˜ μ˜ˆμΈ‘κ°’ μ°ΎκΈ°
date_diffs = abs(monthly_forecast["ds"] - target_date)
closest_idx = date_diffs.idxmin()
if closest_idx in monthly_forecast.index:
pred_value = monthly_forecast.loc[closest_idx, "yhat"]
# ν˜„μž¬ 가격 κΈ°μ€€ λ³€ν™”μœ¨ 계산
latest_price = monthly_historical.iloc[-1]["price"] if not monthly_historical.empty else micro_df.iloc[-1]["price"]
pct_change = (pred_value - latest_price) / latest_price * 100
return pred_value, pct_change
else:
return None, None
# 2025λ…„κ³Ό 2026λ…„μ˜ μ£Όμš” 월별 μ˜ˆμΈ‘κ°€
st.subheader("μ£Όμš” 월별 μ˜ˆμΈ‘κ°€")
col1, col2, col3 = st.columns(3)
# 2025λ…„ 6μ›” μ˜ˆμΈ‘κ°€
pred_2025_06, pct_2025_06 = get_monthly_prediction(2025, 6)
if pred_2025_06 is not None:
col1.metric("2025λ…„ 6μ›”", format_currency(pred_2025_06), f"{pct_2025_06:+.1f}%")
else:
col1.metric("2025λ…„ 6μ›”", "데이터 μ—†μŒ", "0%")
# 2025λ…„ 12μ›” μ˜ˆμΈ‘κ°€
pred_2025_12, pct_2025_12 = get_monthly_prediction(2025, 12)
if pred_2025_12 is not None:
col2.metric("2025λ…„ 12μ›”", format_currency(pred_2025_12), f"{pct_2025_12:+.1f}%")
else:
col2.metric("2025λ…„ 12μ›”", "데이터 μ—†μŒ", "0%")
# 2026λ…„ 12μ›” μ˜ˆμΈ‘κ°€
pred_2026_12, pct_2026_12 = get_monthly_prediction(2026, 12)
if pred_2026_12 is not None:
col3.metric("2026λ…„ 12μ›”", format_currency(pred_2026_12), f"{pct_2026_12:+.1f}%")
else:
col3.metric("2026λ…„ 12μ›”", "데이터 μ—†μŒ", "0%")
# 농산물 κ³„μ ˆμ„±μ— λ§žλŠ” μΆ”κ°€ 월별 데이터 ν‘œμ‹œ
with st.expander("더 λ§Žμ€ 월별 μ˜ˆμΈ‘κ°€ 보기"):
# λΆ„κΈ°λ³„λ‘œ λ‚˜λˆ μ„œ ν‘œμ‹œ
for year in [2025, 2026]:
st.write(f"### {year}λ…„ 뢄기별 μ˜ˆμΈ‘κ°€")
q1, q2, q3, q4 = st.columns(4)
# 1λΆ„κΈ° (3μ›”)
pred_q1, pct_q1 = get_monthly_prediction(year, 3)
if pred_q1 is not None:
q1.metric(f"{year}λ…„ 3μ›”", format_currency(pred_q1), f"{pct_q1:+.1f}%")
else:
q1.metric(f"{year}λ…„ 3μ›”", "데이터 μ—†μŒ", "0%")
# 2λΆ„κΈ° (6μ›”)
pred_q2, pct_q2 = get_monthly_prediction(year, 6)
if pred_q2 is not None:
q2.metric(f"{year}λ…„ 6μ›”", format_currency(pred_q2), f"{pct_q2:+.1f}%")
else:
q2.metric(f"{year}λ…„ 6μ›”", "데이터 μ—†μŒ", "0%")
# 3λΆ„κΈ° (9μ›”)
pred_q3, pct_q3 = get_monthly_prediction(year, 9)
if pred_q3 is not None:
q3.metric(f"{year}λ…„ 9μ›”", format_currency(pred_q3), f"{pct_q3:+.1f}%")
else:
q3.metric(f"{year}λ…„ 9μ›”", "데이터 μ—†μŒ", "0%")
# 4λΆ„κΈ° (12μ›”)
pred_q4, pct_q4 = get_monthly_prediction(year, 12)
if pred_q4 is not None:
q4.metric(f"{year}λ…„ 12μ›”", format_currency(pred_q4), f"{pct_q4:+.1f}%")
else:
q4.metric(f"{year}λ…„ 12μ›”", "데이터 μ—†μŒ", "0%")
else:
st.warning("단기 예츑 λͺ¨λΈμ„ 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
except Exception as e:
st.error(f"단기 예츑 였λ₯˜: {str(e)}")
st.code(traceback.format_exc())
# -------------------------------------------------
# SEASONALITY & PATTERN ---------------------------
# -------------------------------------------------
if 'fc_micro' in locals() and fc_micro is not None:
with st.expander("πŸ“† μ‹œμ¦ˆλ„λ¦¬ν‹° & νŒ¨ν„΄ μ„€λͺ…"):
try:
# 월별 κ³„μ ˆμ„± 뢄석
if "yearly" in fc_micro.columns and fc_micro["yearly"].sum() != 0:
month_season = fc_micro.copy()
month_season["month"] = month_season["ds"].dt.month
month_seasonality = month_season.groupby("month")["yearly"].mean()
# μ›” 이름 μ„€μ •
month_names = ["1μ›”", "2μ›”", "3μ›”", "4μ›”", "5μ›”", "6μ›”", "7μ›”", "8μ›”", "9μ›”", "10μ›”", "11μ›”", "12μ›”"]
# κ³„μ ˆμ„± 차트 그리기
fig = go.Figure()
fig.add_trace(go.Bar(
x=month_names,
y=month_seasonality.values,
marker_color=['blue' if x >= 0 else 'red' for x in month_seasonality.values]
))
fig.update_layout(
title=f"{selected_item} 월별 κ³„μ ˆμ„± νŒ¨ν„΄",
xaxis_title="μ›”",
yaxis_title="μƒλŒ€μ  가격 변동",
)
st.plotly_chart(fig, use_container_width=True)
# 피크와 저점 계산
peak_month = month_seasonality.idxmax()
low_month = month_seasonality.idxmin()
seasonality_range = month_seasonality.max() - month_seasonality.min()
st.markdown(
f"**μ—°κ°„ 피크 μ›”:** {month_names[peak_month-1]} \n"
f"**μ—°κ°„ 저점 μ›”:** {month_names[low_month-1]} \n"
f"**μ—°κ°„ 변동폭:** {seasonality_range:.1f}")
# κ³„μ ˆμ„±μ΄ 높은 ν’ˆλͺ©μΈμ§€ μ„€λͺ…
if abs(seasonality_range) > 30:
st.info(f"{selected_item}은(λŠ”) κ³„μ ˆμ„±μ΄ 맀우 κ°•ν•œ ν’ˆλͺ©μž…λ‹ˆλ‹€. νŠΉμ • 달에 가격이 크게 변동할 수 μžˆμŠ΅λ‹ˆλ‹€.")
elif abs(seasonality_range) > 10:
st.info(f"{selected_item}은(λŠ”) κ³„μ ˆμ„±μ΄ 쀑간 정도인 ν’ˆλͺ©μž…λ‹ˆλ‹€.")
else:
st.info(f"{selected_item}은(λŠ”) κ³„μ ˆμ„±μ΄ μ•½ν•œ ν’ˆλͺ©μž…λ‹ˆλ‹€. 연쀑 가격이 비ꡐ적 μ•ˆμ •μ μž…λ‹ˆλ‹€.")
except Exception as e:
st.error(f"κ³„μ ˆμ„± 뢄석 였λ₯˜: {str(e)}")
st.info("이 ν’ˆλͺ©μ— λŒ€ν•œ κ³„μ ˆμ„± νŒ¨ν„΄μ„ 뢄석할 수 μ—†μŠ΅λ‹ˆλ‹€.")
# -------------------------------------------------
# FOOTER ------------------------------------------
# -------------------------------------------------
st.markdown("---")
st.caption("Β© 2025 ν’ˆλͺ©λ³„ 가격 예츑 μ‹œμŠ€ν…œ | 데이터 뢄석 μžλ™ν™”")