Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from prophet import Prophet | |
import plotly.express as px | |
import matplotlib.pyplot as plt | |
from datetime import date | |
from pathlib import Path | |
import matplotlib.font_manager as fm | |
import matplotlib as mpl | |
# ------------------------------------------------- | |
# CONFIG ------------------------------------------ | |
# ------------------------------------------------- | |
CSV_PATH = Path("2025-domae.csv") # νμΌ κ²½λ‘ μμ | |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31" | |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31" | |
# νκΈ ν°νΈ μ€μ | |
font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or | |
'gulim' in f.name.lower() or 'malgun' in f.name.lower() or | |
'nanum' in f.name.lower() or 'batang' in f.name.lower()] | |
if font_list: | |
font_name = font_list[0] | |
plt.rcParams['font.family'] = font_name | |
mpl.rcParams['axes.unicode_minus'] = False | |
else: | |
plt.rcParams['font.family'] = 'DejaVu Sans' | |
st.set_page_config(page_title="νλͺ©λ³ κ°κ²© μμΈ‘", page_icon="π", layout="wide") | |
# ------------------------------------------------- | |
# UTILITIES --------------------------------------- | |
# ------------------------------------------------- | |
DATE_CANDIDATES = {"date", "ds", "ymd", "λ μ§", "prce_reg_mm", "etl_ldg_dt"} | |
ITEM_CANDIDATES = {"item", "νλͺ©", "code", "category", "pdlt_nm", "spcs_nm"} | |
PRICE_CANDIDATES = {"price", "y", "value", "κ°κ²©", "avrg_prce"} | |
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame: | |
"""Standardize column names to date/item/price and deduplicate.""" | |
col_map = {} | |
for c in df.columns: | |
lc = c.lower() | |
if lc in DATE_CANDIDATES: | |
col_map[c] = "date" | |
elif lc in PRICE_CANDIDATES: | |
col_map[c] = "price" | |
elif lc in ITEM_CANDIDATES: | |
# first hit as item, second as species | |
if "item" not in col_map.values(): | |
col_map[c] = "item" | |
else: | |
col_map[c] = "species" | |
df = df.rename(columns=col_map) | |
# ββ handle duplicated columns after rename βββββββββββββββββββββββββ | |
if df.columns.duplicated().any(): | |
df = df.loc[:, ~df.columns.duplicated()] | |
# ββ index datetime to column βββββββββββββββββββββββββββββββββββββββ | |
if "date" not in df.columns and df.index.dtype.kind == "M": | |
df.reset_index(inplace=True) | |
df.rename(columns={df.columns[0]: "date"}, inplace=True) | |
# ββ convert YYYYMM string to datetime ββββββββββββββββββββββββββββββ | |
if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]): | |
if len(df) > 0: | |
sample = str(df["date"].iloc[0]) | |
if sample.isdigit() and len(sample) in (6, 8): | |
df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce") | |
# ββ build item from pdlt_nm + spcs_nm if needed ββββββββββββββββββββ | |
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns): | |
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip() | |
# ββ merge item + species βββββββββββββββββββββββββββββββββββββββββββ | |
if {"item", "species"}.issubset(df.columns): | |
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip() | |
df.drop(columns=["species"], inplace=True) | |
return df | |
def load_data() -> pd.DataFrame: | |
"""Load price data from CSV file.""" | |
try: | |
if not CSV_PATH.exists(): | |
st.error(f"πΎ {CSV_PATH} νμΌμ μ°Ύμ μ μμ΅λλ€.") | |
st.stop() | |
st.sidebar.info(f"{CSV_PATH} νμΌμμ λ°μ΄ν°λ₯Ό λΆλ¬μ΅λλ€.") | |
# CSV νμΌ μ§μ λ‘λ | |
df = pd.read_csv(CSV_PATH) | |
st.sidebar.success(f"CSV λ°μ΄ν° λ‘λ μλ£: {len(df)}κ° ν") | |
# μλ³Έ λ°μ΄ν° νν νμΈ | |
st.sidebar.write("μλ³Έ λ°μ΄ν° 컬λΌ:", list(df.columns)) | |
df = _standardize_columns(df) | |
st.sidebar.write("νμ€ν ν 컬λΌ:", list(df.columns)) | |
missing = {c for c in ["date", "item", "price"] if c not in df.columns} | |
if missing: | |
st.error(f"νμ μ»¬λΌ λλ½: {', '.join(missing)} β νμΌ μ»¬λΌλͺ μ νμΈνμΈμ.") | |
st.stop() | |
# λ μ§ λ³ν | |
before_date_convert = len(df) | |
df["date"] = pd.to_datetime(df["date"], errors="coerce") | |
after_date_convert = df.dropna(subset=["date"]).shape[0] | |
if before_date_convert != after_date_convert: | |
st.warning(f"λ μ§ λ³ν μ€ {before_date_convert - after_date_convert}κ° νμ΄ μ μΈλμμ΅λλ€.") | |
# NA λ°μ΄ν° μ²λ¦¬ | |
before_na_drop = len(df) | |
df = df.dropna(subset=["date", "item", "price"]) | |
after_na_drop = len(df) | |
if before_na_drop != after_na_drop: | |
st.warning(f"NA μ κ±° μ€ {before_na_drop - after_na_drop}κ° νμ΄ μ μΈλμμ΅λλ€.") | |
df.sort_values("date", inplace=True) | |
# λ°μ΄ν° λ μ§ λ²μ νμΈ | |
if len(df) > 0: | |
st.sidebar.write(f"λ°μ΄ν° λ μ§ λ²μ: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}") | |
st.sidebar.write(f"μ΄ νλͺ© μ: {df['item'].nunique()}") | |
else: | |
st.error("μ ν¨ν λ°μ΄ν°κ° μμ΅λλ€!") | |
return df | |
except Exception as e: | |
st.error(f"λ°μ΄ν° λ‘λ μ€ μ€λ₯ λ°μ: {str(e)}") | |
# μ€λ₯ μμΈ μ 보 νμ | |
import traceback | |
st.code(traceback.format_exc()) | |
st.stop() | |
def get_items(df: pd.DataFrame): | |
return sorted(df["item"].unique()) | |
def fit_prophet(df: pd.DataFrame, horizon_end: str): | |
# Make a copy and ensure we have data | |
df = df.copy() | |
df = df.dropna(subset=["date", "price"]) | |
# μ€λ³΅ λ μ§ μ²λ¦¬ - λμΌ λ μ§μ μ¬λ¬ κ°μ΄ μμΌλ©΄ νκ· κ° μ¬μ© | |
df = df.groupby("date")["price"].mean().reset_index() | |
if len(df) < 2: | |
st.warning(f"λ°μ΄ν° ν¬μΈνΈκ° λΆμ‘±ν©λλ€. μμΈ‘μ μν΄μλ μ΅μ 2κ° μ΄μμ μ ν¨ λ°μ΄ν°κ° νμν©λλ€. (νμ¬ {len(df)}κ°)") | |
return None, None | |
# Convert to Prophet format | |
prophet_df = df.rename(columns={"date": "ds", "price": "y"}) | |
try: | |
# Fit the model | |
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False) | |
m.fit(prophet_df) | |
# Generate future dates | |
periods = max((pd.Timestamp(horizon_end) - df["date"].max()).days, 1) | |
future = m.make_future_dataframe(periods=periods, freq="D") | |
# Make predictions | |
forecast = m.predict(future) | |
return m, forecast | |
except Exception as e: | |
st.error(f"Prophet λͺ¨λΈ μμ± μ€ μ€λ₯: {str(e)}") | |
return None, None | |
# ------------------------------------------------- | |
# LOAD DATA --------------------------------------- | |
# ------------------------------------------------- | |
raw_df = load_data() | |
if len(raw_df) == 0: | |
st.error("λ°μ΄ν°κ° λΉμ΄ μμ΅λλ€. νμΌμ νμΈν΄μ£ΌμΈμ.") | |
st.stop() | |
st.sidebar.header("π νλͺ© μ ν") | |
selected_item = st.sidebar.selectbox("νλͺ©", get_items(raw_df)) | |
current_date = date.today() | |
st.sidebar.caption(f"μ€λ: {current_date}") | |
item_df = raw_df.query("item == @selected_item").copy() | |
if item_df.empty: | |
st.error("μ νν νλͺ© λ°μ΄ν° μμ") | |
st.stop() | |
# ------------------------------------------------- | |
# MACRO FORECAST 1996β2030 ------------------------ | |
# ------------------------------------------------- | |
st.header(f"π {selected_item} κ°κ²© μμΈ‘ λμ보λ") | |
# λ°μ΄ν° νν°λ§ λ‘μ§ κ°μ | |
try: | |
macro_start_dt = pd.Timestamp(MACRO_START) | |
# λ°μ΄ν°κ° μΆ©λΆνμ§ μμΌλ©΄ μμ λ μ§λ₯Ό μ‘°μ | |
if len(item_df[item_df["date"] >= macro_start_dt]) < 10: | |
# κ°μ₯ μ€λλ λ μ§λΆν° μμ | |
macro_start_dt = item_df["date"].min() | |
st.info(f"μΆ©λΆν λ°μ΄ν°κ° μμ΄ μμ λ μ§λ₯Ό {macro_start_dt.strftime('%Y-%m-%d')}λ‘ μ‘°μ νμ΅λλ€.") | |
macro_df = item_df[item_df["date"] >= macro_start_dt].copy() | |
except Exception as e: | |
st.error(f"λ μ§ νν°λ§ μ€λ₯: {str(e)}") | |
macro_df = item_df.copy() # νν°λ§ μμ΄ μ 체 λ°μ΄ν° μ¬μ© | |
# Add diagnostic info | |
with st.expander("λ°μ΄ν° μ§λ¨"): | |
st.write(f"- μ 체 λ°μ΄ν° μ: {len(item_df)}") | |
st.write(f"- λΆμ λ°μ΄ν° μ: {len(macro_df)}") | |
if len(macro_df) > 0: | |
st.write(f"- κΈ°κ°: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}") | |
st.dataframe(macro_df.head()) | |
else: | |
st.write("λ°μ΄ν°κ° μμ΅λλ€.") | |
if len(macro_df) < 2: | |
st.warning(f"{selected_item}μ λν λ°μ΄ν°κ° μΆ©λΆνμ§ μμ΅λλ€. μ 체 κΈ°κ° λ°μ΄ν°λ₯Ό νμν©λλ€.") | |
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} κ³Όκ±° κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
else: | |
try: | |
with st.spinner("μ₯κΈ° μμΈ‘ λͺ¨λΈ μμ± μ€..."): | |
m_macro, fc_macro = fit_prophet(macro_df, MACRO_END) | |
if m_macro is not None and fc_macro is not None: | |
fig_macro = px.line(fc_macro, x="ds", y="yhat", title="μ₯κΈ° μμΈ‘ (1996β2030)") | |
fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="μ€μ κ°κ²©") | |
st.plotly_chart(fig_macro, use_container_width=True) | |
latest_price = macro_df.iloc[-1]["price"] | |
# 2030λ λ§μ§λ§ λ μ°ΎκΈ° | |
target_date = pd.Timestamp(MACRO_END) | |
close_dates = fc_macro.loc[(fc_macro["ds"] - target_date).abs().argsort()[:1], "ds"].values[0] | |
macro_pred = fc_macro.loc[fc_macro["ds"] == close_dates, "yhat"].iloc[0] | |
macro_pct = (macro_pred - latest_price) / latest_price * 100 | |
st.metric("2030 μμΈ‘κ°", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%") | |
else: | |
st.warning("μμΈ‘ λͺ¨λΈμ μμ±ν μ μμ΅λλ€.") | |
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} κ³Όκ±° κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
except Exception as e: | |
st.error(f"μ₯κΈ° μμΈ‘ μ€λ₯ λ°μ: {str(e)}") | |
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} κ³Όκ±° κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
# ------------------------------------------------- | |
# MICRO FORECAST 2024β2026 ------------------------ | |
# ------------------------------------------------- | |
st.subheader("π 2024β2026 λ¨κΈ° μμΈ‘") | |
# λ°μ΄ν° νν°λ§ λ‘μ§ κ°μ | |
try: | |
micro_start_dt = pd.Timestamp(MICRO_START) | |
# λ°μ΄ν°κ° μΆ©λΆνμ§ μμΌλ©΄ μμ λ μ§λ₯Ό μ‘°μ | |
if len(item_df[item_df["date"] >= micro_start_dt]) < 10: | |
# μ΅κ·Ό 30% λ°μ΄ν°λ§ μ¬μ© | |
n = max(2, int(len(item_df) * 0.3)) | |
micro_df = item_df.sort_values("date").tail(n).copy() | |
st.info(f"μΆ©λΆν μ΅κ·Ό λ°μ΄ν°κ° μμ΄ μ΅κ·Ό {n}κ° λ°μ΄ν° ν¬μΈνΈλ§ μ¬μ©ν©λλ€.") | |
else: | |
micro_df = item_df[item_df["date"] >= micro_start_dt].copy() | |
except Exception as e: | |
st.error(f"λ¨κΈ° μμΈ‘ λ°μ΄ν° νν°λ§ μ€λ₯: {str(e)}") | |
# μ΅κ·Ό 10κ° λ°μ΄ν° ν¬μΈνΈ μ¬μ© | |
micro_df = item_df.sort_values("date").tail(10).copy() | |
if len(micro_df) < 2: | |
st.warning(f"{MICRO_START} μ΄ν λ°μ΄ν°κ° μΆ©λΆνμ§ μμ΅λλ€.") | |
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} μ΅κ·Ό κ°κ²©") | |
st.plotly_chart(fig, use_container_width=True) | |
else: | |
try: | |
with st.spinner("λ¨κΈ° μμΈ‘ λͺ¨λΈ μμ± μ€..."): | |
m_micro, fc_micro = fit_prophet(micro_df, MICRO_END) | |
if m_micro is not None and fc_micro is not None: | |
fig_micro = px.line(fc_micro, x="ds", y="yhat", title="λ¨κΈ° μμΈ‘ (2024β2026)") | |
fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="μ€μ κ°κ²©") | |
st.plotly_chart(fig_micro, use_container_width=True) | |
latest_price = micro_df.iloc[-1]["price"] | |
target_date = pd.Timestamp(MICRO_END) | |
close_dates = fc_micro.loc[(fc_micro["ds"] - target_date).abs().argsort()[:1], "ds"].values[0] | |
micro_pred = fc_micro.loc[fc_micro["ds"] == close_dates, "yhat"].iloc[0] | |
micro_pct = (micro_pred - latest_price) / latest_price * 100 | |
st.metric("2026 μμΈ‘κ°", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%") | |
else: | |
st.warning("λ¨κΈ° μμΈ‘ λͺ¨λΈμ μμ±ν μ μμ΅λλ€.") | |
except Exception as e: | |
st.error(f"λ¨κΈ° μμΈ‘ μ€λ₯: {str(e)}") | |
# ------------------------------------------------- | |
# SEASONALITY & PATTERN --------------------------- | |
# ------------------------------------------------- | |
with st.expander("π μμ¦λλ¦¬ν° & ν¨ν΄ μ€λͺ "): | |
if 'm_micro' in locals() and m_micro is not None and 'fc_micro' in locals() and fc_micro is not None: | |
try: | |
comp_fig = m_micro.plot_components(fc_micro) | |
st.pyplot(comp_fig) | |
month_season = (fc_micro[["ds", "yearly"]] | |
.assign(month=lambda d: d.ds.dt.month) | |
.groupby("month")["yearly"].mean()) | |
st.markdown( | |
f"**μ°κ° νΌν¬ μ:** {int(month_season.idxmax())}μ \n" | |
f"**μ°κ° μ μ μ:** {int(month_season.idxmin())}μ \n" | |
f"**μ°κ° λ³λν:** {month_season.max() - month_season.min():.1f}") | |
except Exception as e: | |
st.error(f"μμ¦λλ¦¬ν° λΆμ μ€λ₯: {str(e)}") | |
else: | |
st.info("ν¨ν΄ λΆμμ μν μΆ©λΆν λ°μ΄ν°κ° μμ΅λλ€.") | |
# ------------------------------------------------- | |
# FOOTER ------------------------------------------ | |
# ------------------------------------------------- | |
st.markdown("---") | |
st.caption("Β© 2025 νλͺ©λ³ κ°κ²© μμΈ‘ μμ€ν | λ°μ΄ν° λΆμ μλν") |