NH-Prediction

Running

File size: 5,351 Bytes

1acd6e1
 
 
 
 
 
 
 
dc2be38
1acd6e1
dc2be38
 
 
 
 
4fb476c
 
 
 
 
dc2be38
 
 
1acd6e1
dc2be38
 
 
 
 
 
 
 
 
 
 
1acd6e1
 
 
 
 
 
 
4fb476c
1acd6e1
 
 
 
 
 
 
dc2be38
 
 
 
 
 
 
1acd6e1
dc2be38
1acd6e1
dc2be38
1acd6e1
dc2be38
1acd6e1
 
dc2be38
 
 
 
1acd6e1
dc2be38
 
4fb476c
 
1acd6e1
 
 
4fb476c
dc2be38
 
 
1acd6e1
dc2be38
 
4fb476c
 
 
1acd6e1
 
 
dc2be38
 
 
1acd6e1
dc2be38
 
1acd6e1
 
4fb476c
dc2be38
4fb476c
dc2be38
 
 
 
 
 
 
 
 
 
 
 
1acd6e1
dc2be38
1acd6e1
 
 
dc2be38
1acd6e1
dc2be38
 
 
 
1acd6e1
 
dc2be38

import streamlit as st
import pandas as pd
import numpy as np
from prophet import Prophet
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date
from pathlib import Path

# -------------------------------------------------
# CONFIG ------------------------------------------
# -------------------------------------------------
CSV_PATH = Path("price_data.csv")
PARQUET_PATH = Path("domae-202503.parquet")  # 1996‑1993-03 가격 데이터
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"

st.set_page_config(page_title="품목별 가격 예측", page_icon="📈", layout="wide")

# -------------------------------------------------
# UTILITIES ---------------------------------------
# -------------------------------------------------
@st.cache_data(show_spinner=False)
def load_data() -> pd.DataFrame:
    """Load price data from Parquet if available, else CSV."""
    if PARQUET_PATH.exists():
        df = pd.read_parquet(PARQUET_PATH)
    elif CSV_PATH.exists():
        df = pd.read_csv(CSV_PATH)
    else:
        st.error("데이터 파일을 찾을 수 없습니다. price_data.csv 또는 domae-202503.parquet" )
        st.stop()
    # 표준화
    df["date"] = pd.to_datetime(df["date"])
    df.sort_values("date", inplace=True)
    return df

@st.cache_data(show_spinner=False)
def get_items(df: pd.DataFrame):
    return sorted(df["item"].unique())

@st.cache_data(show_spinner=False)
def fit_prophet(df: pd.DataFrame, horizon_end: str):
    m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    m.fit(df.rename(columns={"date": "ds", "price": "y"}))
    future = m.make_future_dataframe(periods=(pd.Timestamp(horizon_end) - df["date"].max()).days, freq="D")
    forecast = m.predict(future)
    return m, forecast

# -------------------------------------------------
# LOAD DATA ---------------------------------------
# -------------------------------------------------
raw_df = load_data()

st.sidebar.header("🔍 품목 선택")
selected_item = st.sidebar.selectbox("품목", get_items(raw_df))
current_date = date.today()
st.sidebar.caption(f"오늘: {current_date}")

item_df = raw_df.query("item == @selected_item").copy()
if item_df.empty:
    st.error("선택한 품목 데이터 없음")
    st.stop()

# -------------------------------------------------
# PLOTS -------------------------------------------
# -------------------------------------------------
st.header(f"📈 {selected_item} 가격 예측 대시보드")

# Macro forecast 1996–2030
macro_df = item_df[item_df["date"] >= MACRO_START]
m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996–2030")
fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
st.plotly_chart(fig_macro, use_container_width=True)

latest_price = macro_df.iloc[-1]["price"]
macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0]
macro_pct = (macro_pred - latest_price) / latest_price * 100
st.metric("2030 예측가", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")

# Micro forecast 2024–2026
st.subheader("🔎 2024–2026 단기 예측")
micro_df = item_df[item_df["date"] >= MICRO_START]
m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024–2026")
fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="Actual")
st.plotly_chart(fig_micro, use_container_width=True)

micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0]
micro_pct = (micro_pred - latest_price) / latest_price * 100
st.metric("2026 예측가", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")

# Seasonality components
with st.expander("📆 시즈널리티 & 패턴 설명"):
    comp_fig = m_micro.plot_components(fc_micro)
    st.pyplot(comp_fig)
    month_season = (fc_micro[["ds", "yearly"]]
                    .assign(month=lambda d: d.ds.dt.month)
                    .groupby("month")["yearly"].mean())
    st.markdown(
        f"**연간 피크 월:** {int(month_season.idxmax())}월\n\n"
        f"**연간 저점 월:** {int(month_season.idxmin())}월\n\n"
        f"**연간 변동폭:** {month_season.max() - month_season.min():.1f}")

# Correlation heatmap
st.subheader("🧮 품목 간 상관관계")
monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
                        .groupby(["month", "item"], as_index=False)["price"].mean()
                        .pivot(index="month", columns="item", values="price"))

corr = monthly_pivot.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax)
st.pyplot(fig)

st.info("빨간 영역: 가격 동조화 / 파란 영역: 대체재 가능성.")

# Volatility Chart
st.subheader("📊 30일 이동 표준편차 (가격 변동성)")
vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
st.plotly_chart(fig_vol, use_container_width=True)

st.caption("데이터: domae-202503.parquet · Prophet 예측 · Streamlit 대시보드")