Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import plotly.graph_objects as go | |
from datetime import date | |
from pathlib import Path | |
import matplotlib.font_manager as fm | |
import matplotlib as mpl | |
import warnings | |
warnings.filterwarnings('ignore') | |
# νμν μΆκ° λΌμ΄λΈλ¬λ¦¬ λ‘λ | |
try: | |
import statsmodels.api as sm | |
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing, Holt | |
from statsmodels.tsa.seasonal import seasonal_decompose | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_absolute_percentage_error | |
except ImportError: | |
st.error("νμν λΌμ΄λΈλ¬λ¦¬κ° μ€μΉλμ§ μμμ΅λλ€. ν°λ―Έλμμ λ€μ λͺ λ Ήμ μ€ννμΈμ:") | |
st.code("pip install statsmodels scikit-learn") | |
st.stop() | |
# ------------------------------------------------- | |
# CONFIG ------------------------------------------ | |
# ------------------------------------------------- | |
CSV_PATH = Path("2025-domae.csv") | |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31" | |
MICRO_START, MICRO_END = "2024-01-01", "2026-12-31" | |
# νκΈ ν°νΈ μ€μ | |
font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or | |
'gulim' in f.name.lower() or 'malgun' in f.name.lower() or | |
'nanum' in f.name.lower() or 'batang' in f.name.lower()] | |
if font_list: | |
font_name = font_list[0] | |
plt.rcParams['font.family'] = font_name | |
mpl.rcParams['axes.unicode_minus'] = False | |
else: | |
plt.rcParams['font.family'] = 'DejaVu Sans' | |
st.set_page_config(page_title="νλͺ©λ³ κ°κ²© μμΈ‘", page_icon="π", layout="wide") | |
# ------------------------------------------------- | |
# νλͺ©λ³ μ΅μ λͺ¨λΈ λ§€ν --------------------------- | |
# ------------------------------------------------- | |
item_models = { | |
"κ°μΉ": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.82, "model2": "Holt-Winters", "accuracy2": 99.80}, | |
"κ°μ": {"model1": "ETS(Multiplicative)", "accuracy1": 99.58, "model2": "SARIMA(1,0,1)(1,0,1,12)", "accuracy2": 98.70}, | |
"κ±΄κ³ μΆ": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.96, "model2": "Holt", "accuracy2": 99.79}, | |
"건λ€μλ§": {"model1": "Naive", "accuracy1": 99.59, "model2": "SeasonalNaive", "accuracy2": 99.34}, | |
"κ³ κ΅¬λ§": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.89, "model2": "ETS(Multiplicative)", "accuracy2": 98.91}, | |
"κ³ λ±μ΄": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.48, "model2": "ETS(Additive)", "accuracy2": 99.42}, | |
"κΉ": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.99, "model2": "SARIMA(0,1,1)(0,1,1,12)", "accuracy2": 99.93}, | |
"κΉλ§λ(κ΅μ°)": {"model1": "SeasonalNaive", "accuracy1": 99.79, "model2": "MovingAverage-6 m", "accuracy2": 98.65}, | |
"κΉ»μ": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.68, "model2": "Holt", "accuracy2": 99.54}, | |
"λ Ήλ": {"model1": "WeightedMA-6 m", "accuracy1": 99.53, "model2": "Fourier + LR", "accuracy2": 99.53}, | |
"λν리λ²μ―": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.84, "model2": "LinearTrend", "accuracy2": 99.80}, | |
"λΉκ·Ό": {"model1": "Holt", "accuracy1": 99.25, "model2": "ETS(Multiplicative)", "accuracy2": 97.27}, | |
"λ€κΉ¨": {"model1": "Holt", "accuracy1": 99.57, "model2": "Holt-Winters", "accuracy2": 99.17}, | |
"λ 콩": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.74, "model2": "ETS(Additive)", "accuracy2": 99.37}, | |
"λ λͺ¬": {"model1": "WeightedMA-6 m", "accuracy1": 99.99, "model2": "LinearTrend", "accuracy2": 98.99}, | |
"λ§κ³ ": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.38, "model2": "Holt-Winters", "accuracy2": 99.02}, | |
"λ©λ°": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.48, "model2": "SARIMA(0,1,1)(0,1,1,12)", "accuracy2": 98.99}, | |
"λ©λ‘ ": {"model1": "Naive", "accuracy1": 99.07, "model2": "ETS(Multiplicative)", "accuracy2": 99.01}, | |
"λͺ ν": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 100.00, "model2": "MovingAverage-6 m", "accuracy2": 99.93}, | |
"무": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.54, "model2": "SeasonalNaive", "accuracy2": 88.29, "special": "accuracy_drop"}, | |
"λ¬Όμ€μ§μ΄": {"model1": "Holt-Winters", "accuracy1": 99.91, "model2": "ETS(Multiplicative)", "accuracy2": 99.36}, | |
"λ―Έλ리": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 98.71, "model2": "LinearTrend", "accuracy2": 98.54}, | |
"λ°λλ": {"model1": "MovingAverage-6 m", "accuracy1": 99.81, "model2": "ETS(Multiplicative)", "accuracy2": 98.86}, | |
"λ°©μΈν λ§ν ": {"model1": "ETS(Multiplicative)", "accuracy1": 99.62, "model2": "Holt", "accuracy2": 98.28}, | |
"λ°°": {"model1": "ETS(Additive)", "accuracy1": 99.34, "model2": "LinearTrend", "accuracy2": 98.57}, | |
"λ°°μΆ": {"model1": "Holt", "accuracy1": 99.98, "model2": "MovingAverage-6 m", "accuracy2": 99.71}, | |
"λΆμ΄": {"model1": "Fourier + LR", "accuracy1": 99.96, "model2": "MovingAverage-6 m", "accuracy2": 99.94}, | |
"λΆμκ³ μΆ": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.75, "model2": "LinearTrend", "accuracy2": 97.61}, | |
"λΈλ‘μ½λ¦¬": {"model1": "Holt", "accuracy1": 99.54, "model2": "Naive", "accuracy2": 99.93}, | |
"μ¬κ³Ό": {"model1": "Holt-Winters", "accuracy1": 99.89, "model2": "ETS(Multiplicative)", "accuracy2": 98.91}, | |
"μμΆ": {"model1": "ETS(Additive)", "accuracy1": 99.11, "model2": "Holt-Winters", "accuracy2": 97.61}, | |
"μμ‘μ΄λ²μ―": {"model1": "SimpleExpSmoothing", "accuracy1": 99.95, "model2": "Holt-Winters", "accuracy2": 99.40}, | |
"μμ°": {"model1": "ETS(Additive)", "accuracy1": 99.87, "model2": "Naive", "accuracy2": 99.96}, | |
"μκ°": {"model1": "Naive", "accuracy1": 99.27, "model2": "ETS(Additive)", "accuracy2": 98.53}, | |
"μλ°": {"model1": "Naive", "accuracy1": 99.91, "model2": "SARIMA(1,1,1)(1,1,1,12)", "accuracy2": 99.45}, | |
"μκΈμΉ": {"model1": "Holt-Winters", "accuracy1": 99.70, "model2": "SeasonalNaive", "accuracy2": 98.73}, | |
"μ": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.99, "model2": "Holt-Winters", "accuracy2": 99.88}, | |
"μλ°°κΈ°λ°°μΆ": {"model1": "WeightedMA-6 m", "accuracy1": 98.19, "model2": "SeasonalNaive", "accuracy2": 95.73}, | |
"μλ°°μΆ": {"model1": "Holt-Winters", "accuracy1": 99.05, "model2": "WeightedMA-6 m", "accuracy2": 97.85}, | |
"μν": {"model1": "ETS(Additive)", "accuracy1": 99.93, "model2": "WeightedMA-6 m", "accuracy2": 99.51}, | |
"μΌκ°μ΄λ°°μΆ": {"model1": "SARIMA(1,1,1)(1,1,1,12)", "accuracy1": 99.77, "model2": "SeasonalNaive", "accuracy2": 98.55}, | |
"μ΄λ¬΄": {"model1": "SeasonalNaive", "accuracy1": 99.96, "model2": "Holt", "accuracy2": 99.50}, | |
"μ€μ΄": {"model1": "SeasonalNaive", "accuracy1": 99.82, "model2": "ETS(Additive)", "accuracy2": 98.48}, | |
"μ 볡": {"model1": "Holt", "accuracy1": 99.90, "model2": "Fourier + LR", "accuracy2": 99.90}, | |
"μ°ΈκΉ¨": {"model1": "WeightedMA-6 m", "accuracy1": 100.00, "model2": "LinearTrend", "accuracy2": 86.44, "special": "accuracy_drop"}, | |
"μ°Ήμ": {"model1": "SARIMA(1,0,1)(1,0,1,12)", "accuracy1": 99.71, "model2": "Naive", "accuracy2": 98.64, "special": "accuracy_drop"}, | |
"콩": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.98, "model2": "ETS(Additive)", "accuracy2": 99.68}, | |
"ν λ§ν ": {"model1": "SeasonalNaive", "accuracy1": 97.31, "model2": "MovingAverage-6 m", "accuracy2": 97.57}, | |
"ν": {"model1": "MovingAverage-6 m", "accuracy1": 99.92, "model2": "Holt-Winters", "accuracy2": 97.77}, | |
"νμΈμ ν": {"model1": "Naive", "accuracy1": 99.51, "model2": "SARIMA(1,0,1)(1,0,1,12)", "accuracy2": 96.39}, | |
"νν리카": {"model1": "SARIMA(0,1,1)(0,1,1,12)", "accuracy1": 99.04, "model2": "WeightedMA-6 m", "accuracy2": 99.36}, | |
"ν₯": {"model1": "ETS(Additive)", "accuracy1": 99.87, "model2": "Holt-Winters", "accuracy2": 75.08, "special": "accuracy_drop"}, | |
"ν½μ΄λ²μ―": {"model1": "SeasonalNaive", "accuracy1": 99.84, "model2": "Fourier + LR", "accuracy2": 98.49}, | |
"νκ³ μΆ": {"model1": "Holt-Winters", "accuracy1": 98.95, "model2": "ETS(Multiplicative)", "accuracy2": 98.73}, | |
"νΌλ§": {"model1": "Fourier + LR", "accuracy1": 99.64, "model2": "WeightedMA-6 m", "accuracy2": 98.93}, | |
"νΈλ°": {"model1": "ETS(Multiplicative)", "accuracy1": 99.98, "model2": "SeasonalNaive", "accuracy2": 96.61}, | |
"νν©": {"model1": "Naive", "accuracy1": 99.86, "model2": "SeasonalNaive", "accuracy2": 98.56}, | |
} | |
# κΈ°ν νλͺ©μ λν κΈ°λ³Έ λͺ¨λΈ (리μ€νΈμ μλ νλͺ©) | |
default_models = { | |
"model1": "SARIMA(1,0,1)(1,0,1,12)", | |
"accuracy1": 99.0, | |
"model2": "ETS(Multiplicative)", | |
"accuracy2": 98.0 | |
} | |
# ------------------------------------------------- | |
# UTILITIES --------------------------------------- | |
# ------------------------------------------------- | |
DATE_CANDIDATES = {"date", "ds", "ymd", "λ μ§", "prce_reg_mm", "etl_ldg_dt"} | |
ITEM_CANDIDATES = {"item", "νλͺ©", "code", "category", "pdlt_nm", "spcs_nm"} | |
PRICE_CANDIDATES = {"price", "y", "value", "κ°κ²©", "avrg_prce"} | |
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame: | |
"""Standardize column names to date/item/price and deduplicate.""" | |
col_map = {} | |
for c in df.columns: | |
lc = c.lower() | |
if lc in DATE_CANDIDATES: | |
col_map[c] = "date" | |
elif lc in PRICE_CANDIDATES: | |
col_map[c] = "price" | |
elif lc in ITEM_CANDIDATES: | |
# first hit as item, second as species | |
if "item" not in col_map.values(): | |
col_map[c] = "item" | |
else: | |
col_map[c] = "species" | |
df = df.rename(columns=col_map) | |
# ββ handle duplicated columns after rename βββββββββββββββββββββββββ | |
if df.columns.duplicated().any(): | |
df = df.loc[:, ~df.columns.duplicated()] | |
# ββ index datetime to column βββββββββββββββββββββββββββββββββββββββ | |
if "date" not in df.columns and df.index.dtype.kind == "M": | |
df.reset_index(inplace=True) | |
df.rename(columns={df.columns[0]: "date"}, inplace=True) | |
# ββ convert YYYYMM string to datetime ββββββββββββββββββββββββββββββββββββββ | |
if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]): | |
if len(df) > 0: | |
# λ μ μ°ν λ μ§ λ³ν | |
try: | |
# μν νμΈ (λ¬Έμμ΄λ‘ λ³ννμ¬ μμ νκ² μ²λ¦¬) | |
sample = str(df["date"].iloc[0]) | |
# YYYYMM νμ (6μ리) | |
if sample.isdigit() and len(sample) == 6: | |
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce") | |
df["date"] = df["date"] + pd.offsets.MonthEnd(0) # ν΄λΉ μμ λ§μ§λ§ λ λ‘ μ€μ | |
# YYYYMMDD νμ (8μ리) | |
elif sample.isdigit() and len(sample) == 8: | |
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m%d", errors="coerce") | |
# κΈ°ν νμμ μλ κ°μ§ | |
else: | |
df["date"] = pd.to_datetime(df["date"], errors="coerce") | |
except: | |
# μ€ν¨ μ μΌλ° λ³ν μλ | |
df["date"] = pd.to_datetime(df["date"], errors="coerce") | |
# ββ build item from pdlt_nm + spcs_nm if needed ββββββββββββββββββββ | |
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns): | |
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip() | |
# ββ merge item + species βββββββββββββββββββββββββββββββββββββββββββ | |
if {"item", "species"}.issubset(df.columns): | |
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip() | |
df.drop(columns=["species"], inplace=True) | |
return df | |
def load_data() -> pd.DataFrame: | |
"""Load price data from CSV file.""" | |
try: | |
if not CSV_PATH.exists(): | |
st.error(f"πΎ {CSV_PATH} νμΌμ μ°Ύμ μ μμ΅λλ€.") | |
st.stop() | |
# CSV νμΌ μ§μ λ‘λ | |
df = pd.read_csv(CSV_PATH) | |
st.sidebar.success(f"CSV λ°μ΄ν° λ‘λ μλ£: {len(df)}κ° ν") | |
# λ°μ΄ν° νμ€ν μ μλ³Έ λ°μ΄ν° νν νμΈ | |
st.sidebar.write("μλ³Έ λ°μ΄ν° 컬λΌ:", list(df.columns)) | |
# νμ€ν μ μμΈ λ‘κ·Έ | |
before_std = len(df) | |
df = _standardize_columns(df) | |
after_std = len(df) | |
if before_std != after_std: | |
st.sidebar.warning(f"νμ€ν μ€ {before_std - after_std}κ° νμ΄ μ μΈλμμ΅λλ€.") | |
# νμ€ν ν λ‘κ·Έ | |
st.sidebar.write("νμ€ν ν 컬λΌ:", list(df.columns)) | |
# νμ μ»¬λΌ νμΈ | |
missing = {c for c in ["date", "item", "price"] if c not in df.columns} | |
if missing: | |
st.error(f"νμ μ»¬λΌ λλ½: {', '.join(missing)} β νμΌ μ»¬λΌλͺ μ νμΈνμΈμ.") | |
st.stop() | |
# λ μ§ λ°μ΄ν° νμΈ | |
st.sidebar.write("λ μ§ μ»¬λΌ λ°μ΄ν° μν:", df["date"].head().tolist()) | |
# λ μ§ λ³ν μ ν λ°μ΄ν° μ νμΈ | |
before_date_convert = len(df) | |
# YYYYMM νμ λ³ν (μ«μλ‘ μ μ₯λ κ²½μ°λ μ²λ¦¬) | |
try: | |
# λ°μ΄ν° νμ νμΈ | |
if pd.api.types.is_integer_dtype(df["date"]): | |
# μ μν YYYYMMμ λ¬Έμμ΄λ‘ λ³ν ν μ²λ¦¬ | |
df["date"] = df["date"].astype(str) | |
# λ¬Έμμ΄ νμ μ²λ¦¬ | |
if pd.api.types.is_object_dtype(df["date"]): | |
# YYYYMM νμμΈμ§ νμΈ (6μ리 μ«μ) | |
if df["date"].str.match(r'^\d{6}$').all(): | |
# μ°, μ ꡬλΆν΄μ datetimeμΌλ‘ λ³ν | |
df["year"] = df["date"].str[:4].astype(int) | |
df["month"] = df["date"].str[4:6].astype(int) | |
df["date"] = pd.to_datetime(dict(year=df["year"], month=df["month"], day=1)) | |
# μμ λ§μ§λ§ λ λ‘ μ€μ | |
df["date"] = df["date"] + pd.offsets.MonthEnd(0) | |
# μμ μ»¬λΌ μμ | |
df.drop(columns=["year", "month"], inplace=True) | |
else: | |
# μΌλ° λ³ν μλ | |
df["date"] = pd.to_datetime(df["date"], errors="coerce") | |
except Exception as e: | |
st.sidebar.warning(f"λ μ§ λ³ν μ€λ₯: {str(e)}") | |
# μ΅νμ λ°©λ²μΌλ‘ λ€μ μλ | |
try: | |
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce") | |
df["date"] = df["date"] + pd.offsets.MonthEnd(0) | |
except: | |
df["date"] = pd.to_datetime(df["date"], errors="coerce") | |
# λ μ§ λ³ν ν λ°μ΄ν° νμΈ | |
st.sidebar.write("λ μ§ λ³ν ν μν:", df["date"].head().tolist()) | |
after_date_convert = df.dropna(subset=["date"]).shape[0] | |
if before_date_convert != after_date_convert: | |
st.sidebar.warning(f"λ μ§ λ³ν μ€ {before_date_convert - after_date_convert}κ° νμ΄ μ μΈλμμ΅λλ€.") | |
# κ°κ²© λ°μ΄ν° μ«μλ‘ λ³ν | |
df["price"] = pd.to_numeric(df["price"], errors="coerce") | |
# NA λ°μ΄ν° μ²λ¦¬ μ ν μ νμΈ | |
before_na_drop = len(df) | |
df = df.dropna(subset=["date", "item", "price"]) | |
after_na_drop = len(df) | |
if before_na_drop != after_na_drop: | |
st.sidebar.warning(f"NA μ κ±° μ€ {before_na_drop - after_na_drop}κ° νμ΄ μ μΈλμμ΅λλ€.") | |
# κ²°κ³Ό μ λ ¬ | |
df.sort_values("date", inplace=True) | |
# λ°μ΄ν° μ 보 νμ | |
if len(df) > 0: | |
st.sidebar.write(f"μ΅μ’ λ°μ΄ν°: {len(df)}κ° ν") | |
# datetime νμμΈμ§ νμΈ | |
if pd.api.types.is_datetime64_dtype(df["date"]): | |
st.sidebar.write(f"λ°μ΄ν° λ μ§ λ²μ: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}") | |
else: | |
st.sidebar.write(f"λ°μ΄ν° λ μ§ λ²μ: λ μ§ νμ λ³ν μ€ν¨. νμ¬ νμ: {type(df['date'].iloc[0])}") | |
st.sidebar.write(f"μ΄ νλͺ© μ: {df['item'].nunique()}") | |
st.sidebar.write(f"νλͺ©λ³ νκ· λ°μ΄ν° μ: {len(df)/df['item'].nunique():.1f}κ°") | |
else: | |
st.error("μ ν¨ν λ°μ΄ν°κ° μμ΅λλ€!") | |
return df | |
except Exception as e: | |
st.error(f"λ°μ΄ν° λ‘λ μ€ μ€λ₯ λ°μ: {str(e)}") | |
import traceback | |
st.code(traceback.format_exc()) | |
st.stop() | |
def get_items(df: pd.DataFrame): | |
return sorted(df["item"].unique()) | |
def get_best_model_for_item(item): | |
"""νλͺ©μ λ§λ μ΅μ λͺ¨λΈ μ 보 λ°ν""" | |
return item_models.get(item, default_models) | |
def format_currency(value): | |
"""μν νμμΌλ‘ μ«μ ν¬λ§·ν """ | |
if pd.isna(value) or not np.isfinite(value): | |
return "N/A" | |
return f"{value:,.0f}μ" | |
# ------------------------------------------------- | |
# λͺ¨λΈ ꡬνλΆ -------------------------------------- | |
# ------------------------------------------------- | |
def prepare_monthly_data(df): | |
"""μλ³ λ°μ΄ν° μ€λΉ""" | |
# μλ³λ‘ μ§κ³ | |
monthly_df = df.copy() | |
monthly_df['year_month'] = monthly_df['date'].dt.strftime('%Y-%m') | |
monthly_df = monthly_df.groupby('year_month').agg({'date': 'last', 'price': 'mean'}).reset_index(drop=True) | |
monthly_df.sort_values('date', inplace=True) | |
# μΈλ±μ€ μ€μ | |
monthly_df.set_index('date', inplace=True) | |
# κ²°μΈ‘μΉ λ³΄κ° (μλ³ λ°μ΄ν°μ λΉ μμ΄ μμ μ μμ) | |
if len(monthly_df) > 1: | |
monthly_df = monthly_df.asfreq('M', method='ffill') | |
return monthly_df | |
def fit_sarima(df, order, seasonal_order, horizon_end): | |
"""SARIMA λͺ¨λΈ ꡬν""" | |
import pandas as pd | |
import numpy as np | |
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
# λͺ¨λΈ νμ΅ | |
try: | |
model = SARIMAX( | |
monthly_df['price'], | |
order=order, | |
seasonal_order=seasonal_order, | |
enforce_stationarity=False, | |
enforce_invertibility=False | |
) | |
results = model.fit(disp=False) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
forecast = results.get_forecast(steps=periods) | |
pred_mean = forecast.predicted_mean | |
pred_ci = forecast.conf_int() | |
# Prophet νμμΌλ‘ κ²°κ³Ό λ³ν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': pred_mean.values, | |
'yhat_lower': pred_ci.iloc[:, 0].values, | |
'yhat_upper': pred_ci.iloc[:, 1].values | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν (λ μ§, κ°κ²©) | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = 0 | |
try: | |
# κ°λ₯νλ©΄ κ³μ μ± λΆν΄ | |
decomposition = seasonal_decompose(monthly_df['price'], model='multiplicative', period=12) | |
trend = decomposition.trend | |
seasonal = decomposition.seasonal | |
# κ²°κ³Όμ κ³μ μ± λ°μ | |
for i, date in enumerate(fc_df_monthly['ds']): | |
month = date.month | |
if month in seasonal.index.month: | |
seasonal_value = seasonal[seasonal.index.month == month].mean() | |
fc_df_monthly.loc[i, 'yearly'] = seasonal_value | |
except: | |
pass | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"SARIMA λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_ets(df, seasonal_type, horizon_end): | |
"""ETS λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
# λͺ¨λΈ νλΌλ―Έν° μ€μ | |
if seasonal_type == 'multiplicative': | |
trend_type = 'add' | |
seasonal = 'mul' | |
else: # additive | |
trend_type = 'add' | |
seasonal = 'add' | |
# λͺ¨λΈ νμ΅ | |
try: | |
model = ExponentialSmoothing( | |
monthly_df['price'], | |
trend=trend_type, | |
seasonal=seasonal, | |
seasonal_periods=12, | |
damped=True | |
) | |
results = model.fit(optimized=True) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
forecast = results.forecast(periods) | |
# Prophet νμμΌλ‘ κ²°κ³Ό λ³ν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# μ λ’° κ΅¬κ° μΆμ (ETSλ κΈ°λ³Έ μ λ’° ꡬκ°μ μ 곡νμ§ μμ) | |
std_error = np.std(results.resid) | |
lower_bound = forecast - 1.96 * std_error | |
upper_bound = forecast + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': forecast.values, | |
'yhat_lower': lower_bound.values, | |
'yhat_upper': upper_bound.values | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = 0 | |
try: | |
# κ°λ₯νλ©΄ κ³μ μ± λΆν΄ | |
decomposition = seasonal_decompose(monthly_df['price'], model=seasonal_type, period=12) | |
trend = decomposition.trend | |
seasonal = decomposition.seasonal | |
# κ²°κ³Όμ κ³μ μ± λ°μ | |
for i, date in enumerate(fc_df_monthly['ds']): | |
month = date.month | |
if month in seasonal.index.month: | |
seasonal_value = seasonal[seasonal.index.month == month].mean() | |
fc_df_monthly.loc[i, 'yearly'] = seasonal_value | |
except: | |
pass | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"ETS λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_holt(df, horizon_end): | |
"""Holt λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
# λͺ¨λΈ νμ΅ | |
try: | |
model = Holt(monthly_df['price'], damped=True) | |
results = model.fit(optimized=True) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
forecast = results.forecast(periods) | |
# Prophet νμμΌλ‘ κ²°κ³Ό λ³ν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# μ λ’° κ΅¬κ° μΆμ | |
std_error = np.std(results.resid) | |
lower_bound = forecast - 1.96 * std_error | |
upper_bound = forecast + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': forecast.values, | |
'yhat_lower': lower_bound.values, | |
'yhat_upper': upper_bound.values | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = fc_df_monthly['yhat'] # Holtλ μΆμΈλ§ λͺ¨λΈλ§ | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Holt λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_holt_winters(df, horizon_end): | |
"""Holt-Winters λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
# λͺ¨λΈ νμ΅ | |
try: | |
model = ExponentialSmoothing( | |
monthly_df['price'], | |
trend='add', | |
seasonal='mul', # κ³μ μ±μ κ³±μ λ°©μμ΄ λμ°λ¬Ό κ°κ²©μ λ μ ν© | |
seasonal_periods=12, | |
damped=True | |
) | |
results = model.fit(optimized=True) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
forecast = results.forecast(periods) | |
# Prophet νμμΌλ‘ κ²°κ³Ό λ³ν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# μ λ’° κ΅¬κ° μΆμ | |
std_error = np.std(results.resid) | |
lower_bound = forecast - 1.96 * std_error | |
upper_bound = forecast + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': forecast.values, | |
'yhat_lower': lower_bound.values, | |
'yhat_upper': upper_bound.values | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = 0 | |
try: | |
# Holt-Winters λͺ¨λΈμμ κ³μ μ± μΆμΆ | |
seasonal = results.seasonal_ | |
# κ²°κ³Όμ κ³μ μ± λ°μ | |
for i, date in enumerate(fc_df_monthly['ds']): | |
month = date.month - 1 # 0-indexed | |
if month < len(seasonal): | |
fc_df_monthly.loc[i, 'yearly'] = seasonal[month] * fc_df_monthly.loc[i, 'yhat'] | |
fc_df_monthly.loc[i, 'trend'] = fc_df_monthly.loc[i, 'yhat'] - fc_df_monthly.loc[i, 'yearly'] | |
except: | |
pass | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Holt-Winters λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_moving_average(df, window, horizon_end): | |
"""μ΄λ νκ· λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# λ§μ§λ§ window κ°μμ νκ· κ³μ° | |
last_values = monthly_df['price'].iloc[-window:] | |
ma_value = last_values.mean() | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν (λͺ¨λ λ―Έλ μμ μ λμΌν κ°) | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# μ λ’° κ΅¬κ° μΆμ | |
std_error = last_values.std() | |
lower_bound = ma_value - 1.96 * std_error | |
upper_bound = ma_value + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': [ma_value] * len(future_dates), | |
'yhat_lower': [lower_bound] * len(future_dates), | |
'yhat_upper': [upper_bound] * len(future_dates) | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = fc_df_monthly['yhat'] | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"μ΄λ νκ· λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_weighted_ma(df, window, horizon_end): | |
"""κ°μ€ μ΄λ νκ· λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# λ§μ§λ§ window κ°μμ κ°μ€ νκ· κ³μ° | |
last_values = monthly_df['price'].iloc[-window:].to_numpy() | |
# κ°μ€μΉ μμ± (μ΅κ·Ό λ°μ΄ν°μ λ λμ κ°μ€μΉ) | |
weights = np.arange(1, window + 1) | |
weights = weights / np.sum(weights) | |
wma_value = np.sum(last_values * weights) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν (λͺ¨λ λ―Έλ μμ μ λμΌν κ°) | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# μ λ’° κ΅¬κ° μΆμ | |
std_error = np.std(last_values) | |
lower_bound = wma_value - 1.96 * std_error | |
upper_bound = wma_value + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': [wma_value] * len(future_dates), | |
'yhat_lower': [lower_bound] * len(future_dates), | |
'yhat_upper': [upper_bound] * len(future_dates) | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = fc_df_monthly['yhat'] | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"κ°μ€ μ΄λ νκ· λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_naive(df, horizon_end): | |
"""λ¨μ Naive λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# λ§μ§λ§ κ° μ¬μ© | |
last_value = monthly_df['price'].iloc[-1] | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν (λͺ¨λ λ―Έλ μμ μ λ§μ§λ§ κ°) | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# μ λ’° κ΅¬κ° μΆμ (κ³Όκ±° 12κ°μ νμ€νΈμ°¨ μ¬μ©) | |
history_std = monthly_df['price'].iloc[-12:].std() if len(monthly_df) >= 12 else monthly_df['price'].std() | |
lower_bound = last_value - 1.96 * history_std | |
upper_bound = last_value + 1.96 * history_std | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': [last_value] * len(future_dates), | |
'yhat_lower': [lower_bound] * len(future_dates), | |
'yhat_upper': [upper_bound] * len(future_dates) | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = fc_df_monthly['yhat'] | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Naive λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_seasonal_naive(df, horizon_end): | |
"""κ³μ μ± Naive λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν (κ° μμ λν΄ μλ κ°μ λ¬ κ°κ²© μ¬μ©) | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
future_values = [] | |
lower_bounds = [] | |
upper_bounds = [] | |
for date in future_dates: | |
# κ°μ μμ κ° μ°ΎκΈ° | |
same_month_values = monthly_df[monthly_df.index.month == date.month]['price'] | |
if len(same_month_values) > 0: | |
# κ°μ μ κ°μ₯ μ΅κ·Ό κ° μ¬μ© | |
forecast_value = same_month_values.iloc[-1] | |
# μ λ’° κ΅¬κ° | |
std_error = same_month_values.std() if len(same_month_values) > 1 else monthly_df['price'].std() | |
lower_bound = forecast_value - 1.96 * std_error | |
upper_bound = forecast_value + 1.96 * std_error | |
else: | |
# κ°μ μ λ°μ΄ν° μμΌλ©΄ μ 체 νκ· μ¬μ© | |
forecast_value = monthly_df['price'].mean() | |
std_error = monthly_df['price'].std() | |
lower_bound = forecast_value - 1.96 * std_error | |
upper_bound = forecast_value + 1.96 * std_error | |
future_values.append(forecast_value) | |
lower_bounds.append(lower_bound) | |
upper_bounds.append(upper_bound) | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': future_values, | |
'yhat_lower': lower_bounds, | |
'yhat_upper': upper_bounds | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = fc_df_monthly['yhat'] | |
fc_df_monthly['trend'] = 0 | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Seasonal Naive λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_fourier_lr(df, horizon_end): | |
"""Fourier + μ ν νκ· λͺ¨λΈ ꡬν""" | |
from sklearn.linear_model import LinearRegression | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# μκ° λ³μ μμ± | |
y = monthly_df['price'].values | |
t = np.arange(len(y)) | |
# Fourier νΉμ± μμ± (μ°κ° κ³μ μ±) | |
p = 12 # μ£ΌκΈ° (1λ ) | |
X = np.column_stack([ | |
t, # μ ν μΆμΈ | |
np.sin(2 * np.pi * t / p), | |
np.cos(2 * np.pi * t / p), | |
np.sin(4 * np.pi * t / p), | |
np.cos(4 * np.pi * t / p) | |
]) | |
# λͺ¨λΈ νμ΅ | |
model = LinearRegression() | |
model.fit(X, y) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
# λ―Έλ μμ νΉμ± μμ± | |
t_future = np.arange(len(y), len(y) + periods) | |
X_future = np.column_stack([ | |
t_future, | |
np.sin(2 * np.pi * t_future / p), | |
np.cos(2 * np.pi * t_future / p), | |
np.sin(4 * np.pi * t_future / p), | |
np.cos(4 * np.pi * t_future / p) | |
]) | |
# μμΈ‘ | |
forecast = model.predict(X_future) | |
# μ λ’° κ΅¬κ° μΆμ | |
y_pred = model.predict(X) | |
mse = np.mean((y - y_pred) ** 2) | |
std_error = np.sqrt(mse) | |
lower_bound = forecast - 1.96 * std_error | |
upper_bound = forecast + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': forecast, | |
'yhat_lower': lower_bound, | |
'yhat_upper': upper_bound | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['trend'] = model.coef_[0] * np.arange(len(fc_df_monthly)) + model.intercept_ | |
# κ³μ μ± κ³μ° | |
season_features = np.column_stack([ | |
np.sin(2 * np.pi * np.arange(len(fc_df_monthly)) / p), | |
np.cos(2 * np.pi * np.arange(len(fc_df_monthly)) / p), | |
np.sin(4 * np.pi * np.arange(len(fc_df_monthly)) / p), | |
np.cos(4 * np.pi * np.arange(len(fc_df_monthly)) / p) | |
]) | |
seasonal_effect = np.dot(season_features, model.coef_[1:5]) | |
fc_df_monthly['yearly'] = seasonal_effect | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Fourier + LR λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_linear_trend(df, horizon_end): | |
"""μ ν μΆμΈ λͺ¨λΈ ꡬν""" | |
from sklearn.linear_model import LinearRegression | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# μκ° λ³μ μμ± | |
y = monthly_df['price'].values | |
t = np.arange(len(y)).reshape(-1, 1) | |
# λͺ¨λΈ νμ΅ | |
model = LinearRegression() | |
model.fit(t, y) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
t_future = np.arange(len(y), len(y) + periods).reshape(-1, 1) | |
forecast = model.predict(t_future) | |
# μ λ’° κ΅¬κ° μΆμ | |
y_pred = model.predict(t) | |
mse = np.mean((y - y_pred) ** 2) | |
std_error = np.sqrt(mse) | |
lower_bound = forecast - 1.96 * std_error | |
upper_bound = forecast + 1.96 * std_error | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': forecast, | |
'yhat_lower': lower_bound, | |
'yhat_upper': upper_bound | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = fc_df_monthly['yhat'] | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Linear Trend λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_simple_exp_smoothing(df, horizon_end): | |
"""λ¨μ μ§μ νν λͺ¨λΈ ꡬν""" | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_df = prepare_monthly_data(df) | |
try: | |
# λͺ¨λΈ νμ΅ | |
model = SimpleExpSmoothing(monthly_df['price']) | |
results = model.fit(optimized=True) | |
# μμΈ‘ κΈ°κ° κ³μ° | |
last_date = monthly_df.index[-1] | |
end_date = pd.Timestamp(horizon_end) | |
periods = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month) | |
# μμΈ‘ μν | |
forecast = results.forecast(periods) | |
# μ λ’° κ΅¬κ° μΆμ | |
std_error = np.std(results.resid) | |
lower_bound = forecast - 1.96 * std_error | |
upper_bound = forecast + 1.96 * std_error | |
# Prophet νμμΌλ‘ κ²°κ³Ό λ³ν | |
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=periods, freq='M') | |
fc_df = pd.DataFrame({ | |
'ds': future_dates, | |
'yhat': forecast.values, | |
'yhat_lower': lower_bound.values, | |
'yhat_upper': upper_bound.values | |
}) | |
# μλ³λ‘ κ²°κ³Ό λ³ν | |
fc_df_monthly = pd.DataFrame({ | |
'ds': pd.date_range(start=monthly_df.index[0], end=future_dates[-1], freq='M'), | |
}) | |
# νμ΅ λ°μ΄ν° κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_lower'] = monthly_df['price'].values | |
fc_df_monthly.loc[:len(monthly_df)-1, 'yhat_upper'] = monthly_df['price'].values | |
# μμΈ‘ κΈ°κ°μ κ²°κ³Ό μΆκ° | |
fc_df_monthly.loc[len(monthly_df):, 'yhat'] = fc_df['yhat'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_lower'] = fc_df['yhat_lower'].values | |
fc_df_monthly.loc[len(monthly_df):, 'yhat_upper'] = fc_df['yhat_upper'].values | |
# yearly, trend μ»΄ν¬λνΈ μΆκ° (Prophet νΈν) | |
fc_df_monthly['yearly'] = 0 | |
fc_df_monthly['trend'] = fc_df_monthly['yhat'] | |
return fc_df_monthly | |
except Exception as e: | |
st.error(f"Simple Exponential Smoothing λͺ¨λΈ μ€λ₯: {str(e)}") | |
return None | |
def fit_optimal_model(df, item_name, horizon_end, model_type="primary"): | |
"""νλͺ©λ³ μ΅μ λͺ¨λΈ μ μ©""" | |
# λ°μ΄ν° μ€λΉ λ° μ 리 | |
df = df.copy() | |
df = df.dropna(subset=["date", "price"]) | |
# νλͺ©λ³ μ΅μ λͺ¨λΈ μ ν | |
model_info = get_best_model_for_item(item_name) | |
if model_type == "primary": | |
model_name = model_info["model1"] | |
accuracy = model_info["accuracy1"] | |
else: # backup | |
model_name = model_info["model2"] | |
accuracy = model_info["accuracy2"] | |
st.info(f"{item_name}μ μ΅μ νλ {model_name} λͺ¨λΈ μ μ© (μ νλ: {accuracy}%)") | |
# νΉμ μ²λ¦¬κ° νμν νλͺ© νμΈ | |
needs_monitoring = "special" in model_info and model_info["special"] == "accuracy_drop" | |
if needs_monitoring: | |
st.warning(f"β οΈ {item_name}λ νΉμ μμ μ νλκ° κΈλ½ν μ μλ νλͺ©μ λλ€. μμΈ‘ κ²°κ³Όλ₯Ό μ£Όμ κΉκ² μ΄ν΄λ³΄μΈμ.") | |
# λͺ¨λΈ μ ν λ° νμ΅ | |
if "SARIMA(1,0,1)(1,0,1,12)" in model_name: | |
return fit_sarima(df, order=(1,0,1), seasonal_order=(1,0,1,12), horizon_end=horizon_end) | |
elif "SARIMA(1,1,1)(1,1,1,12)" in model_name: | |
return fit_sarima(df, order=(1,1,1), seasonal_order=(1,1,1,12), horizon_end=horizon_end) | |
elif "SARIMA(0,1,1)(0,1,1,12)" in model_name: | |
return fit_sarima(df, order=(0,1,1), seasonal_order=(0,1,1,12), horizon_end=horizon_end) | |
elif "ETS(Multiplicative)" in model_name: | |
return fit_ets(df, seasonal_type="multiplicative", horizon_end=horizon_end) | |
elif "ETS(Additive)" in model_name: | |
return fit_ets(df, seasonal_type="additive", horizon_end=horizon_end) | |
elif "Holt-Winters" in model_name: | |
return fit_holt_winters(df, horizon_end=horizon_end) | |
elif "Holt" in model_name: | |
return fit_holt(df, horizon_end=horizon_end) | |
elif "MovingAverage-6 m" in model_name: | |
return fit_moving_average(df, window=6, horizon_end=horizon_end) | |
elif "WeightedMA-6 m" in model_name: | |
return fit_weighted_ma(df, window=6, horizon_end=horizon_end) | |
elif "Naive" in model_name and "Seasonal" not in model_name: | |
return fit_naive(df, horizon_end=horizon_end) | |
elif "SeasonalNaive" in model_name: | |
return fit_seasonal_naive(df, horizon_end=horizon_end) | |
elif "Fourier + LR" in model_name: | |
return fit_fourier_lr(df, horizon_end=horizon_end) | |
elif "LinearTrend" in model_name: | |
return fit_linear_trend(df, horizon_end=horizon_end) | |
elif "SimpleExpSmoothing" in model_name: | |
return fit_simple_exp_smoothing(df, horizon_end=horizon_end) | |
else: | |
st.warning(f"μ μ μλ λͺ¨λΈ: {model_name}. κΈ°λ³Έ λͺ¨λΈ(SARIMA)μ μ¬μ©ν©λλ€.") | |
return fit_sarima(df, order=(1,0,1), seasonal_order=(1,0,1,12), horizon_end=horizon_end) | |
def fit_ensemble_model(df, item_name, horizon_end): | |
"""1μμ 2μ λͺ¨λΈμ μμλΈ μν""" | |
# 1μ λͺ¨λΈ μμΈ‘ | |
fc1 = fit_optimal_model(df, item_name, horizon_end, model_type="primary") | |
# 2μ λͺ¨λΈ μμΈ‘ | |
fc2 = fit_optimal_model(df, item_name, horizon_end, model_type="backup") | |
# λ λͺ¨λΈ λͺ¨λ μ±κ³΅ν κ²½μ°λ§ μμλΈ | |
if fc1 is not None and fc2 is not None: | |
# μμλΈ κ°μ€μΉ κ³μ° (μ νλ κΈ°λ°) | |
model_info = get_best_model_for_item(item_name) | |
acc1 = model_info["accuracy1"] | |
acc2 = model_info["accuracy2"] | |
# μ νλ μ°¨μ΄κ° 0.2%p μ΄λ΄μΈ κ²½μ° μμλΈ μν | |
accuracy_diff = abs(acc1 - acc2) | |
if accuracy_diff <= 0.2: | |
st.success(f"λ λͺ¨λΈμ μ νλ μ°¨μ΄κ° {accuracy_diff:.2f}%pλ‘ μμ μμλΈμ μνν©λλ€.") | |
# μ νλ κΈ°λ° κ°μ€μΉ κ³μ° | |
total_acc = acc1 + acc2 | |
w1 = acc1 / total_acc | |
w2 = acc2 / total_acc | |
# μμλΈ κ²°κ³Ό μμ± | |
fc_ensemble = fc1.copy() | |
fc_ensemble['yhat'] = w1 * fc1['yhat'] + w2 * fc2['yhat'] | |
fc_ensemble['yhat_lower'] = w1 * fc1['yhat_lower'] + w2 * fc2['yhat_lower'] | |
fc_ensemble['yhat_upper'] = w1 * fc1['yhat_upper'] + w2 * fc2['yhat_upper'] | |
return fc_ensemble | |
else: | |
st.info(f"μ νλ μ°¨μ΄κ° {accuracy_diff:.2f}%pλ‘ μ»€μ 1μ λͺ¨λΈλ§ μ¬μ©ν©λλ€.") | |
return fc1 | |
# νλλΌλ μ€ν¨ν κ²½μ° μ±κ³΅ν λͺ¨λΈ λ°ν | |
return fc1 if fc1 is not None else fc2 | |
# ------------------------------------------------- | |
# MAIN APP --------------------------------------- | |
# ------------------------------------------------- | |
# λ°μ΄ν° λ‘λ | |
raw_df = load_data() | |
if len(raw_df) == 0: | |
st.error("λ°μ΄ν°κ° λΉμ΄ μμ΅λλ€. νμΌμ νμΈν΄μ£ΌμΈμ.") | |
st.stop() | |
st.sidebar.header("π νλͺ© μ ν") | |
selected_item = st.sidebar.selectbox("νλͺ©", get_items(raw_df)) | |
current_date = date.today() | |
st.sidebar.caption(f"μ€λ: {current_date}") | |
# μ νλ νλͺ©μ μ΅μ λͺ¨λΈ μ 보 νμ | |
model_info = get_best_model_for_item(selected_item) | |
st.sidebar.subheader("νλͺ©λ³ μ΅μ λͺ¨λΈ") | |
st.sidebar.markdown(f"**1μ λͺ¨λΈ:** {model_info['model1']} (μ νλ: {model_info['accuracy1']}%)") | |
st.sidebar.markdown(f"**2μ λͺ¨λΈ:** {model_info['model2']} (μ νλ: {model_info['accuracy2']}%)") | |
# λ°μ΄ν° νν°λ§ | |
item_df = raw_df.query("item == @selected_item").copy() | |
if item_df.empty: | |
st.error("μ νν νλͺ© λ°μ΄ν° μμ") | |
st.stop() | |
# λ°μ΄ν° μ κ²μ¬ | |
if len(item_df) < 2: | |
st.warning(f"μ νν νλͺ© '{selected_item}' λ°μ΄ν°κ° λ무 μ μ΅λλ€ (λ°μ΄ν° μ: {len(item_df)}). μμΈ‘μ΄ λΆμ νν μ μμ΅λλ€.") | |
else: | |
st.success(f"μ νν νλͺ© '{selected_item}'μ λν΄ {len(item_df)}κ°μ λ°μ΄ν°κ° μμ΅λλ€.") | |
# ------------------------------------------------- | |
# MACRO FORECAST 1996β2030 ------------------------ | |
# ------------------------------------------------- | |
# ------------------------------------------------- | |
# MACRO FORECAST 1996β2030 ------------------------ | |
# ------------------------------------------------- | |
st.header(f"π {selected_item} κ°κ²© μμΈ‘ λμ보λ") | |
# λ°μ΄ν° νν°λ§ λ‘μ§ | |
try: | |
macro_start_dt = pd.Timestamp("1996-01-01") | |
# λ°μ΄ν°μ μμμΌμ΄ 1996λ μ΄νμΈμ§ νμΈ | |
if item_df["date"].min() > macro_start_dt: | |
macro_start_dt = item_df["date"].min() | |
macro_df = item_df[item_df["date"] >= macro_start_dt].copy() | |
except Exception as e: | |
st.error(f"λ μ§ νν°λ§ μ€λ₯: {str(e)}") | |
macro_df = item_df.copy() # νν°λ§ μμ΄ μ 체 λ°μ΄ν° μ¬μ© | |
# Add diagnostic info | |
with st.expander("λ°μ΄ν° μ§λ¨"): | |
st.write(f"- μ 체 λ°μ΄ν° μ: {len(item_df)}") | |
st.write(f"- λΆμ λ°μ΄ν° μ: {len(macro_df)}") | |
if len(macro_df) > 0: | |
st.write(f"- κΈ°κ°: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}") | |
st.dataframe(macro_df.head()) | |
else: | |
st.write("λ°μ΄ν°κ° μμ΅λλ€.") | |
if len(macro_df) < 2: | |
st.warning(f"{selected_item}μ λν λ°μ΄ν°κ° μΆ©λΆνμ§ μμ΅λλ€. μ 체 κΈ°κ° λ°μ΄ν°λ₯Ό νμν©λλ€.") | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=item_df["date"], y=item_df["price"], mode="lines", name="μ€μ κ°κ²©")) | |
fig.update_layout(title=f"{selected_item} κ³Όκ±° κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
else: | |
try: | |
# λ°μ΄ν° μΆ©λΆν κ²½μ° νλͺ©λ³ μ΅μ λͺ¨λΈ μ¬μ© | |
use_ensemble = st.checkbox("μμλΈ λͺ¨λΈ μ¬μ© (1μ + 2μ λͺ¨λΈ κ²°ν©)", value=False) | |
with st.spinner("μ₯κΈ° μμΈ‘ λͺ¨λΈ μμ± μ€..."): | |
if use_ensemble: | |
fc_macro = fit_ensemble_model(macro_df, selected_item, MACRO_END) | |
else: | |
fc_macro = fit_optimal_model(macro_df, selected_item, MACRO_END) | |
if fc_macro is not None: | |
# μ€μ λ°μ΄ν°μ μμΈ‘ λ°μ΄ν° κ΅¬λΆ | |
cutoff_date = pd.Timestamp("2025-01-01") | |
# νλ‘― μμ± | |
fig = go.Figure() | |
# μ€μ λ°μ΄ν° μΆκ° (1996-2024) | |
historical_data = macro_df[macro_df["date"] < cutoff_date].copy() | |
if not historical_data.empty: | |
fig.add_trace(go.Scatter( | |
x=historical_data["date"], | |
y=historical_data["price"], | |
mode="lines", | |
name="μ€μ κ°κ²© (1996-2024)", | |
line=dict(color="blue", width=2) | |
)) | |
# μμΈ‘ κΈ°κ° μλ₯΄κΈ° | |
forecast_data = fc_macro[fc_macro["ds"] >= cutoff_date].copy() | |
# 2025-2030 μμΈ‘ λ°μ΄ν° | |
if not forecast_data.empty: | |
fig.add_trace(go.Scatter( | |
x=forecast_data["ds"], | |
y=forecast_data["yhat"], | |
mode="lines", | |
name="μμΈ‘ κ°κ²© (2025-2030)", | |
line=dict(color="red", width=2, dash="dash") | |
)) | |
# μ λ’° κ΅¬κ° μΆκ° | |
fig.add_trace(go.Scatter( | |
x=forecast_data["ds"], | |
y=forecast_data["yhat_upper"], | |
mode="lines", | |
line=dict(width=0), | |
showlegend=False | |
)) | |
fig.add_trace(go.Scatter( | |
x=forecast_data["ds"], | |
y=forecast_data["yhat_lower"], | |
mode="lines", | |
line=dict(width=0), | |
fill="tonexty", | |
fillcolor="rgba(255, 0, 0, 0.1)", | |
name="95% μ λ’° ꡬκ°" | |
)) | |
# μμ μμΈ‘κ° μ κ±° | |
fig.update_yaxes(range=[0, None]) | |
# λ μ΄μμ μ€μ | |
fig.update_layout( | |
title=f"{selected_item} μ₯κΈ° κ°κ²© μμΈ‘ (1996-2030)", | |
xaxis_title="μ°λ", | |
yaxis_title="κ°κ²© (μ)", | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="right", | |
x=1 | |
) | |
) | |
# μ°¨νΈ νμ | |
st.plotly_chart(fig, use_container_width=True) | |
# μ°λλ³ μμΈ‘κ° νμ | |
try: | |
latest_price = macro_df.iloc[-1]["price"] | |
# μ°λλ³ μμΈ‘κ° κ³μ°μ μν ν¨μ | |
def get_yearly_prediction(year_end): | |
target_date = pd.Timestamp(f"{year_end}-12-31") | |
# λ μ§ κΈ°λ°μΌλ‘ κ°μ₯ κ°κΉμ΄ λ μ§μ μμΈ‘κ° μ°ΎκΈ° | |
date_diffs = abs(fc_macro["ds"] - target_date) | |
closest_idx = date_diffs.idxmin() | |
pred_value = fc_macro.loc[closest_idx, "yhat"] | |
pct_change = (pred_value - latest_price) / latest_price * 100 | |
return pred_value, pct_change | |
# μ°λλ³ μμΈ‘κ° νμ | |
col1, col2, col3 = st.columns(3) | |
# 2025λ μμΈ‘κ° | |
pred_2025, pct_2025 = get_yearly_prediction(2025) | |
col1.metric("2025λ μμΈ‘κ°", format_currency(pred_2025), f"{pct_2025:+.1f}%") | |
# 2027λ μμΈ‘κ° | |
pred_2027, pct_2027 = get_yearly_prediction(2027) | |
col2.metric("2027λ μμΈ‘κ°", format_currency(pred_2027), f"{pct_2027:+.1f}%") | |
# 2030λ μμΈ‘κ° | |
pred_2030, pct_2030 = get_yearly_prediction(2030) | |
col3.metric("2030λ μμΈ‘κ°", format_currency(pred_2030), f"{pct_2030:+.1f}%") | |
# μΆκ° μ°λ μμΈ‘κ° (νμ₯ κ°λ₯) | |
with st.expander("λ λ§μ μ°λλ³ μμΈ‘κ° λ³΄κΈ°"): | |
col4, col5, col6 = st.columns(3) | |
# 2026λ μμΈ‘κ° | |
pred_2026, pct_2026 = get_yearly_prediction(2026) | |
col4.metric("2026λ μμΈ‘κ°", format_currency(pred_2026), f"{pct_2026:+.1f}%") | |
# 2028λ μμΈ‘κ° | |
pred_2028, pct_2028 = get_yearly_prediction(2028) | |
col5.metric("2028λ μμΈ‘κ°", format_currency(pred_2028), f"{pct_2028:+.1f}%") | |
# 2029λ μμΈ‘κ° | |
pred_2029, pct_2029 = get_yearly_prediction(2029) | |
col6.metric("2029λ μμΈ‘κ°", format_currency(pred_2029), f"{pct_2029:+.1f}%") | |
except Exception as e: | |
st.error(f"μμΈ‘κ° κ³μ° μ€λ₯: {str(e)}") | |
else: | |
st.warning("μμΈ‘ λͺ¨λΈμ μμ±ν μ μμ΅λλ€.") | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ€μ κ°κ²©")) | |
fig.update_layout(title=f"{selected_item} κ³Όκ±° κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
except Exception as e: | |
st.error(f"μ₯κΈ° μμΈ‘ μ€λ₯ λ°μ: {str(e)}") | |
import traceback | |
st.code(traceback.format_exc()) | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ€μ κ°κ²©")) | |
fig.update_layout(title=f"{selected_item} κ³Όκ±° κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
# ------------------------------------------------- | |
# MICRO FORECAST 2024β2026 ------------------------ | |
# ------------------------------------------------- | |
# ------------------------------------------------- | |
# MICRO FORECAST 2024β2026 ------------------------ | |
# ------------------------------------------------- | |
st.subheader("π 2024β2026 λ¨κΈ° μμΈ‘ (μλ³)") | |
# λ°μ΄ν° νν°λ§ - μ΅κ·Ό 3λ λ°μ΄ν° νμ© | |
try: | |
three_years_ago = pd.Timestamp("2021-01-01") | |
if item_df["date"].min() > three_years_ago: | |
three_years_ago = item_df["date"].min() | |
micro_df = item_df[item_df["date"] >= three_years_ago].copy() | |
except Exception as e: | |
st.error(f"λ¨κΈ° μμΈ‘ λ°μ΄ν° νν°λ§ μ€λ₯: {str(e)}") | |
# μ΅κ·Ό λ°μ΄ν° μ¬μ© | |
micro_df = item_df.sort_values("date").tail(24).copy() | |
if len(micro_df) < 2: | |
st.warning(f"μ΅κ·Ό λ°μ΄ν°κ° μΆ©λΆνμ§ μμ΅λλ€.") | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=item_df["date"], y=item_df["price"], mode="lines", name="μ€μ κ°κ²©")) | |
fig.update_layout(title=f"{selected_item} μ΅κ·Ό κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
else: | |
try: | |
with st.spinner("λ¨κΈ° μμΈ‘ λͺ¨λΈ μμ± μ€..."): | |
if use_ensemble: | |
fc_micro = fit_ensemble_model(micro_df, selected_item, MICRO_END) | |
else: | |
fc_micro = fit_optimal_model(micro_df, selected_item, MICRO_END) | |
if fc_micro is not None: | |
# 2024-01-01λΆν° 2026-12-31κΉμ§ νν°λ§ | |
start_date = pd.Timestamp("2024-01-01") | |
end_date = pd.Timestamp("2026-12-31") | |
# μλ³ λ°μ΄ν° μ€λΉ | |
monthly_historical = micro_df.copy() | |
monthly_historical["year_month"] = monthly_historical["date"].dt.strftime("%Y-%m") | |
monthly_historical = monthly_historical.groupby("year_month").agg({ | |
"date": "first", | |
"price": "mean" | |
}).reset_index(drop=True) | |
monthly_historical = monthly_historical[ | |
(monthly_historical["date"] >= start_date) & | |
(monthly_historical["date"] <= end_date) | |
] | |
monthly_forecast = fc_micro[ | |
(fc_micro["ds"] >= start_date) & | |
(fc_micro["ds"] <= end_date) | |
].copy() | |
# μλ³ μ°¨νΈ μμ± | |
fig = go.Figure() | |
# 2024λ μ€μ λ°μ΄ν° | |
actual_2024 = monthly_historical[ | |
(monthly_historical["date"] >= pd.Timestamp("2024-01-01")) & | |
(monthly_historical["date"] <= pd.Timestamp("2024-12-31")) | |
] | |
if not actual_2024.empty: | |
fig.add_trace(go.Scatter( | |
x=actual_2024["date"], | |
y=actual_2024["price"], | |
mode="lines+markers", | |
name="2024 μ€μ κ°κ²©", | |
line=dict(color="blue", width=2), | |
marker=dict(size=8) | |
)) | |
# 2024λ μ΄ν μμΈ‘ λ°μ΄ν° | |
cutoff = pd.Timestamp("2024-12-31") | |
future_data = monthly_forecast[monthly_forecast["ds"] > cutoff] | |
if not future_data.empty: | |
fig.add_trace(go.Scatter( | |
x=future_data["ds"], | |
y=future_data["yhat"], | |
mode="lines+markers", | |
name="2025-2026 μμΈ‘ κ°κ²©", | |
line=dict(color="red", width=2, dash="dash"), | |
marker=dict(size=8) | |
)) | |
# μ λ’° κ΅¬κ° μΆκ° | |
fig.add_trace(go.Scatter( | |
x=future_data["ds"], | |
y=future_data["yhat_upper"], | |
mode="lines", | |
line=dict(width=0), | |
showlegend=False | |
)) | |
fig.add_trace(go.Scatter( | |
x=future_data["ds"], | |
y=future_data["yhat_lower"], | |
mode="lines", | |
line=dict(width=0), | |
fill="tonexty", | |
fillcolor="rgba(255, 0, 0, 0.1)", | |
name="95% μ λ’° ꡬκ°" | |
)) | |
# μμ μμΈ‘κ° μ κ±° | |
fig.update_yaxes(range=[0, None]) | |
# λ μ΄μμ μ€μ | |
fig.update_layout( | |
title=f"{selected_item} μλ³ λ¨κΈ° μμΈ‘ (2024-2026)", | |
xaxis_title="μ", | |
yaxis_title="κ°κ²© (μ)", | |
xaxis=dict( | |
tickformat="%Y-%m", | |
dtick="M3", # 3κ°μ κ°κ²© | |
tickangle=45 | |
), | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="right", | |
x=1 | |
) | |
) | |
# μ°¨νΈ νμ | |
st.plotly_chart(fig, use_container_width=True) | |
# μλ³ μμΈ‘ κ°κ²© νμ (2025-2026) | |
with st.expander("μλ³ μμΈ‘ κ°κ²© μμΈλ³΄κΈ°"): | |
monthly_detail = monthly_forecast[monthly_forecast["ds"] > cutoff].copy() | |
monthly_detail["λ μ§"] = monthly_detail["ds"].dt.strftime("%Yλ %mμ") | |
monthly_detail["μμΈ‘κ°κ²©"] = monthly_detail["yhat"].apply(format_currency) | |
monthly_detail["ννκ°"] = monthly_detail["yhat_lower"].apply(format_currency) | |
monthly_detail["μνκ°"] = monthly_detail["yhat_upper"].apply(format_currency) | |
st.dataframe( | |
monthly_detail[["λ μ§", "μμΈ‘κ°κ²©", "ννκ°", "μνκ°"]], | |
hide_index=True | |
) | |
# μλ³/μ°λλ³ μμΈ‘κ° νμ ν¨μ | |
def get_monthly_prediction(year, month): | |
target_date = pd.Timestamp(f"{year}-{month:02d}-01") | |
# κ°μ₯ κ°κΉμ΄ λ μ§μ μμΈ‘κ° μ°ΎκΈ° | |
date_diffs = abs(monthly_forecast["ds"] - target_date) | |
closest_idx = date_diffs.idxmin() | |
if closest_idx in monthly_forecast.index: | |
pred_value = monthly_forecast.loc[closest_idx, "yhat"] | |
# νμ¬ κ°κ²© κΈ°μ€ λ³νμ¨ κ³μ° | |
latest_price = monthly_historical.iloc[-1]["price"] if not monthly_historical.empty else micro_df.iloc[-1]["price"] | |
pct_change = (pred_value - latest_price) / latest_price * 100 | |
return pred_value, pct_change | |
else: | |
return None, None | |
# 2025λ κ³Ό 2026λ μ μ£Όμ μλ³ μμΈ‘κ° | |
st.subheader("μ£Όμ μλ³ μμΈ‘κ°") | |
col1, col2, col3 = st.columns(3) | |
# 2025λ 6μ μμΈ‘κ° | |
pred_2025_06, pct_2025_06 = get_monthly_prediction(2025, 6) | |
if pred_2025_06 is not None: | |
col1.metric("2025λ 6μ", format_currency(pred_2025_06), f"{pct_2025_06:+.1f}%") | |
else: | |
col1.metric("2025λ 6μ", "λ°μ΄ν° μμ", "0%") | |
# 2025λ 12μ μμΈ‘κ° | |
pred_2025_12, pct_2025_12 = get_monthly_prediction(2025, 12) | |
if pred_2025_12 is not None: | |
col2.metric("2025λ 12μ", format_currency(pred_2025_12), f"{pct_2025_12:+.1f}%") | |
else: | |
col2.metric("2025λ 12μ", "λ°μ΄ν° μμ", "0%") | |
# 2026λ 12μ μμΈ‘κ° | |
pred_2026_12, pct_2026_12 = get_monthly_prediction(2026, 12) | |
if pred_2026_12 is not None: | |
col3.metric("2026λ 12μ", format_currency(pred_2026_12), f"{pct_2026_12:+.1f}%") | |
else: | |
col3.metric("2026λ 12μ", "λ°μ΄ν° μμ", "0%") | |
# λμ°λ¬Ό κ³μ μ±μ λ§λ μΆκ° μλ³ λ°μ΄ν° νμ | |
with st.expander("λ λ§μ μλ³ μμΈ‘κ° λ³΄κΈ°"): | |
# λΆκΈ°λ³λ‘ λλ μ νμ | |
for year in [2025, 2026]: | |
st.write(f"### {year}λ λΆκΈ°λ³ μμΈ‘κ°") | |
q1, q2, q3, q4 = st.columns(4) | |
# 1λΆκΈ° (3μ) | |
pred_q1, pct_q1 = get_monthly_prediction(year, 3) | |
if pred_q1 is not None: | |
q1.metric(f"{year}λ 3μ", format_currency(pred_q1), f"{pct_q1:+.1f}%") | |
else: | |
q1.metric(f"{year}λ 3μ", "λ°μ΄ν° μμ", "0%") | |
# 2λΆκΈ° (6μ) | |
pred_q2, pct_q2 = get_monthly_prediction(year, 6) | |
if pred_q2 is not None: | |
q2.metric(f"{year}λ 6μ", format_currency(pred_q2), f"{pct_q2:+.1f}%") | |
else: | |
q2.metric(f"{year}λ 6μ", "λ°μ΄ν° μμ", "0%") | |
# 3λΆκΈ° (9μ) | |
pred_q3, pct_q3 = get_monthly_prediction(year, 9) | |
if pred_q3 is not None: | |
q3.metric(f"{year}λ 9μ", format_currency(pred_q3), f"{pct_q3:+.1f}%") | |
else: | |
q3.metric(f"{year}λ 9μ", "λ°μ΄ν° μμ", "0%") | |
# 4λΆκΈ° (12μ) | |
pred_q4, pct_q4 = get_monthly_prediction(year, 12) | |
if pred_q4 is not None: | |
q4.metric(f"{year}λ 12μ", format_currency(pred_q4), f"{pct_q4:+.1f}%") | |
else: | |
q4.metric(f"{year}λ 12μ", "λ°μ΄ν° μμ", "0%") | |
else: | |
st.warning("λ¨κΈ° μμΈ‘ λͺ¨λΈμ μμ±ν μ μμ΅λλ€.") | |
except Exception as e: | |
st.error(f"λ¨κΈ° μμΈ‘ μ€λ₯: {str(e)}") | |
st.code(traceback.format_exc()) | |
# ------------------------------------------------- | |
# SEASONALITY & PATTERN --------------------------- | |
# ------------------------------------------------- | |
if 'fc_micro' in locals() and fc_micro is not None: | |
with st.expander("π μμ¦λλ¦¬ν° & ν¨ν΄ μ€λͺ "): | |
try: | |
# μλ³ κ³μ μ± λΆμ | |
if "yearly" in fc_micro.columns and fc_micro["yearly"].sum() != 0: | |
month_season = fc_micro.copy() | |
month_season["month"] = month_season["ds"].dt.month | |
month_seasonality = month_season.groupby("month")["yearly"].mean() | |
# μ μ΄λ¦ μ€μ | |
month_names = ["1μ", "2μ", "3μ", "4μ", "5μ", "6μ", "7μ", "8μ", "9μ", "10μ", "11μ", "12μ"] | |
# κ³μ μ± μ°¨νΈ κ·Έλ¦¬κΈ° | |
fig = go.Figure() | |
fig.add_trace(go.Bar( | |
x=month_names, | |
y=month_seasonality.values, | |
marker_color=['blue' if x >= 0 else 'red' for x in month_seasonality.values] | |
)) | |
fig.update_layout( | |
title=f"{selected_item} μλ³ κ³μ μ± ν¨ν΄", | |
xaxis_title="μ", | |
yaxis_title="μλμ κ°κ²© λ³λ", | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# νΌν¬μ μ μ κ³μ° | |
peak_month = month_seasonality.idxmax() | |
low_month = month_seasonality.idxmin() | |
seasonality_range = month_seasonality.max() - month_seasonality.min() | |
st.markdown( | |
f"**μ°κ° νΌν¬ μ:** {month_names[peak_month-1]} \n" | |
f"**μ°κ° μ μ μ:** {month_names[low_month-1]} \n" | |
f"**μ°κ° λ³λν:** {seasonality_range:.1f}") | |
# κ³μ μ±μ΄ λμ νλͺ©μΈμ§ μ€λͺ | |
if abs(seasonality_range) > 30: | |
st.info(f"{selected_item}μ(λ) κ³μ μ±μ΄ λ§€μ° κ°ν νλͺ©μ λλ€. νΉμ λ¬μ κ°κ²©μ΄ ν¬κ² λ³λν μ μμ΅λλ€.") | |
elif abs(seasonality_range) > 10: | |
st.info(f"{selected_item}μ(λ) κ³μ μ±μ΄ μ€κ° μ λμΈ νλͺ©μ λλ€.") | |
else: | |
st.info(f"{selected_item}μ(λ) κ³μ μ±μ΄ μ½ν νλͺ©μ λλ€. μ°μ€ κ°κ²©μ΄ λΉκ΅μ μμ μ μ λλ€.") | |
except Exception as e: | |
st.error(f"κ³μ μ± λΆμ μ€λ₯: {str(e)}") | |
st.info("μ΄ νλͺ©μ λν κ³μ μ± ν¨ν΄μ λΆμν μ μμ΅λλ€.") | |
# ------------------------------------------------- | |
# FOOTER ------------------------------------------ | |
# ------------------------------------------------- | |
st.markdown("---") | |
st.caption("Β© 2025 νλͺ©λ³ κ°κ²© μμΈ‘ μμ€ν | λ°μ΄ν° λΆμ μλν") |