import streamlit as st import pandas as pd import numpy as np from prophet import Prophet import plotly.express as px import seaborn as sns import matplotlib.pyplot as plt from datetime import date from pathlib import Path # ------------------------------------------------- # CONFIG ------------------------------------------ # ------------------------------------------------- CSV_PATH = Path("price_data.csv") PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 일간/월간 가격 MACRO_START, MACRO_END = "1996-01-01", "2030-12-31" MICRO_START, MICRO_END = "2020-01-01", "2026-12-31" st.set_page_config(page_title="품목별 가격 예측", page_icon="📈", layout="wide") # ------------------------------------------------- # UTILITIES --------------------------------------- # ------------------------------------------------- DATE_CANDIDATES = {"date", "ds", "ymd", "날짜", "prce_reg_mm", "etl_ldg_dt"} ITEM_CANDIDATES = {"item", "품목", "code", "category", "pdlt_nm", "spcs_nm"} PRICE_CANDIDATES = {"price", "y", "value", "가격", "avrg_prce"} def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame: """Rename date/item/price columns to date, item, price. Create composite item if needed.""" col_map = {} for c in df.columns: lc = c.lower() if lc in DATE_CANDIDATES: col_map[c] = "date" elif lc in PRICE_CANDIDATES: col_map[c] = "price" elif lc in ITEM_CANDIDATES: if "item" not in col_map.values(): col_map[c] = "item" else: col_map[c] = "species" df = df.rename(columns=col_map) if "date" not in df.columns and df.index.dtype.kind == "M": df.reset_index(inplace=True) df.rename(columns={df.columns[0]: "date"}, inplace=True) if "date" in df.columns and df["date"].dtype == object: sample = str(df["date"].iloc[0]) if sample.isdigit() and len(sample) in (6, 8): df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce") if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns): df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip() if {"item", "species"}.issubset(df.columns): df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip() df.drop(columns=["species"], inplace=True) return df @st.cache_data(show_spinner=False) def load_data() -> pd.DataFrame: if PARQUET_PATH.exists(): df = pd.read_parquet(PARQUET_PATH) elif CSV_PATH.exists(): df = pd.read_csv(CSV_PATH) else: st.error("💾 price_data.csv 또는 domae-202503.parquet 파일을 찾을 수 없습니다.") st.stop() df = _standardize_columns(df) missing = {c for c in ["date", "item", "price"] if c not in df.columns} if missing: st.error(f"필수 컬럼 누락: {', '.join(missing)} — 파일 컬럼명을 확인하세요.") st.stop() df["date"] = pd.to_datetime(df["date"], errors="coerce") df = df.dropna(subset=["date", "item", "price"]) df.sort_values("date", inplace=True) return df @st.cache_data(show_spinner=False) def get_items(df: pd.DataFrame): return sorted(df["item"].unique()) @st.cache_data(show_spinner=False) def fit_prophet(df: pd.DataFrame, horizon_end: str): m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False) m.fit(df.rename(columns={"date": "ds", "price": "y"})) periods = (pd.Timestamp(horizon_end) - df["date"].max()).days future = m.make_future_dataframe(periods=periods, freq="D") forecast = m.predict(future) return m, forecast # ------------------------------------------------- # LOAD DATA --------------------------------------- # ------------------------------------------------- raw_df = load_data() st.sidebar.header("🔍 품목 선택") selected_item = st.sidebar.selectbox("품목", get_items(raw_df)) current_date = date.today() st.sidebar.caption(f"오늘: {current_date}") item_df = raw_df.query("item == @selected_item").copy() if item_df.empty: st.error("선택한 품목 데이터 없음") st.stop() # ------------------------------------------------- # MACRO FORECAST 1996-2030 ------------------------ # ------------------------------------------------- st.header(f"📈 {selected_item} 가격 예측 대시보드") macro_df = item_df[item_df["date"] >= MACRO_START] m_macro, fc_macro = fit_prophet(macro_df, MACRO_END) fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996–2030") fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual") st.plotly_chart(fig_macro, use_container_width=True) latest_price = macro_df.iloc[-1]["price"] macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0] macro_pct = (macro_pred - latest_price) / latest_price * 100 st.metric("2030 예측가", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%") # ------------------------------------------------- # MICRO FORECAST 2024-2026 ------------------------ # ------------------------------------------------- st.subheader("🔎 2024–2026 단기 예측") micro_df = item_df[item_df["date"] >= MICRO_START] m_micro, fc_micro = fit_prophet(micro_df, MICRO_END) fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024–2026") fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="Actual") st.plotly_chart(fig_micro, use_container_width=True) micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0] micro_pct = (micro_pred - latest_price) / latest_price * 100 st.metric("2026 예측가", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%") # ------------------------------------------------- # SEASONALITY & PATTERN --------------------------- # ------------------------------------------------- with st.expander("📆 시즈널리티 & 패턴 설명"): comp_fig = m_micro.plot_components(fc_micro) st.pyplot(comp_fig) month_season = ( fc_micro[["ds", "yearly"]] .assign(month=lambda d: d.ds.dt.month) .groupby("month")["yearly"].mean() ) st.markdown( f"**연간 피크 월:** {int(month_season.idxmax())}월 \n" f"**연간 저점 월:** {int(month_season.idxmin())}월 \n" f"**연간 변동폭:** {month_season.max() - month_season.min():.1f}" ) # ------------------------------------------------- # CORRELATION HEATMAP ----------------------------- # ------------------------------------------------- st.subheader("🧮 품목 간 상관관계") monthly_pivot = ( raw_df.assign(month=lambda d: d.date.dt.to_period("M")) .groupby(["month", "item"], as_index=False)["price"] .mean() .pivot(index="month", columns="item", values="price") ) corr = monthly_pivot.corr() fig, ax = plt.subplots(figsize=(12, 10)) mask = np.triu(np.ones_like(corr, dtype=bool)) sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax) st.pyplot(fig) st.info("빨간 영역: 가격 동조화 / 파란 영역: 대체재 가능성") # ------------------------------------------------- # VOLATILITY -------------------------------------- # ------------------------------------------------- st.subheader("📊 30일 이동 표준편차 (가격 변동성)") vol = ( item_df.set_index("date")["price"] .rolling(30) .std() .dropna() .reset_index() ) fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev") st.plotly_chart(fig_vol, use_container_width=True) st.caption("데이터: domae-202503.parquet · Prophet 예측 · Streamlit 대시보드")