NH-Prediction / app.py
yokoha's picture
Update app.py
0f95c64 verified
raw
history blame
17.9 kB
import streamlit as st
import pandas as pd
import numpy as np
from prophet import Prophet
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date
from pathlib import Path
import matplotlib.font_manager as fm
import matplotlib as mpl
# -------------------------------------------------
# CONFIG ------------------------------------------
# -------------------------------------------------
CSV_PATH = Path("price_data.csv")
PARQUET_PATH = Path("domae-202503.parquet")
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
# ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
# 1. ์‹œ์Šคํ…œ์— ์„ค์น˜๋œ ํ•œ๊ธ€ ํฐํŠธ ์ฐพ๊ธฐ
font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or
'gulim' in f.name.lower() or 'malgun' in f.name.lower() or
'nanum' in f.name.lower() or 'batang' in f.name.lower()]
if font_list:
font_name = font_list[0]
plt.rcParams['font.family'] = font_name
mpl.rcParams['axes.unicode_minus'] = False
else:
# ํฐํŠธ๊ฐ€ ์—†์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ํฐํŠธ ์„ค์ •
plt.rcParams['font.family'] = 'DejaVu Sans'
st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layout="wide")
# -------------------------------------------------
# UTILITIES ---------------------------------------
# -------------------------------------------------
DATE_CANDIDATES = {"date", "ds", "ymd", "๋‚ ์งœ", "prce_reg_mm", "etl_ldg_dt"}
ITEM_CANDIDATES = {"item", "ํ’ˆ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ€๊ฒฉ", "avrg_prce"}
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
"""Standardize column names to date/item/price and deduplicate."""
col_map = {}
for c in df.columns:
lc = c.lower()
if lc in DATE_CANDIDATES:
col_map[c] = "date"
elif lc in PRICE_CANDIDATES:
col_map[c] = "price"
elif lc in ITEM_CANDIDATES:
# first hit as item, second as species
if "item" not in col_map.values():
col_map[c] = "item"
else:
col_map[c] = "species"
df = df.rename(columns=col_map)
# โ”€โ”€ handle duplicated columns after rename โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if df.columns.duplicated().any():
df = df.loc[:, ~df.columns.duplicated()]
# โ”€โ”€ index datetime to column โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if "date" not in df.columns and df.index.dtype.kind == "M":
df.reset_index(inplace=True)
df.rename(columns={df.columns[0]: "date"}, inplace=True)
# โ”€โ”€ convert YYYYMM string to datetime โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
if len(df) > 0: # ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
sample = str(df["date"].iloc[0])
if sample.isdigit() and len(sample) in (6, 8):
df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
# โ”€โ”€ build item from pdlt_nm + spcs_nm if needed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
# โ”€โ”€ merge item + species โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if {"item", "species"}.issubset(df.columns):
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
df.drop(columns=["species"], inplace=True)
return df
@st.cache_data(show_spinner=False)
def load_data() -> pd.DataFrame:
"""Load price data from Parquet if available, else CSV. Handle flexible schema."""
try:
if PARQUET_PATH.exists():
st.sidebar.info("Parquet ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
df = pd.read_parquet(PARQUET_PATH)
st.sidebar.success(f"Parquet ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
elif CSV_PATH.exists():
st.sidebar.info("CSV ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
df = pd.read_csv(CSV_PATH)
st.sidebar.success(f"CSV ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
else:
st.error("๐Ÿ’พ price_data.csv ๋˜๋Š” domae-202503.parquet ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
st.stop()
# ์›๋ณธ ๋ฐ์ดํ„ฐ ํ˜•ํƒœ ํ™•์ธ
st.sidebar.write("์›๋ณธ ๋ฐ์ดํ„ฐ ์ปฌ๋Ÿผ:", list(df.columns))
df = _standardize_columns(df)
st.sidebar.write("ํ‘œ์ค€ํ™” ํ›„ ์ปฌ๋Ÿผ:", list(df.columns))
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
if missing:
st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
st.stop()
# ๋‚ ์งœ ๋ณ€ํ™˜ ์ „ํ›„ ๋ฐ์ดํ„ฐ ์ˆ˜ ํ™•์ธ
before_date_convert = len(df)
df["date"] = pd.to_datetime(df["date"], errors="coerce")
after_date_convert = df.dropna(subset=["date"]).shape[0]
if before_date_convert != after_date_convert:
st.warning(f"๋‚ ์งœ ๋ณ€ํ™˜ ์ค‘ {before_date_convert - after_date_convert}๊ฐœ ํ–‰์ด ์ œ์™ธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
# NA ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
before_na_drop = len(df)
df = df.dropna(subset=["date", "item", "price"])
after_na_drop = len(df)
if before_na_drop != after_na_drop:
st.warning(f"NA ์ œ๊ฑฐ ์ค‘ {before_na_drop - after_na_drop}๊ฐœ ํ–‰์ด ์ œ์™ธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
df.sort_values("date", inplace=True)
# ๋ฐ์ดํ„ฐ ๋‚ ์งœ ๋ฒ”์œ„ ํ™•์ธ
if len(df) > 0:
st.sidebar.write(f"๋ฐ์ดํ„ฐ ๋‚ ์งœ ๋ฒ”์œ„: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}")
st.sidebar.write(f"์ด ํ’ˆ๋ชฉ ์ˆ˜: {df['item'].nunique()}")
else:
st.error("์œ ํšจํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค!")
return df
except Exception as e:
st.error(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
st.stop()
@st.cache_data(show_spinner=False)
def get_items(df: pd.DataFrame):
return sorted(df["item"].unique())
@st.cache_data(show_spinner=False, ttl=3600)
def fit_prophet(df: pd.DataFrame, horizon_end: str):
# Make a copy and ensure we have data
df = df.copy()
df = df.dropna(subset=["date", "price"])
# ์ค‘๋ณต ๋‚ ์งœ ์ฒ˜๋ฆฌ - ๋™์ผ ๋‚ ์งœ์— ์—ฌ๋Ÿฌ ๊ฐ’์ด ์žˆ์œผ๋ฉด ํ‰๊ท ๊ฐ’ ์‚ฌ์šฉ
df = df.groupby("date")["price"].mean().reset_index()
if len(df) < 2:
st.warning(f"๋ฐ์ดํ„ฐ ํฌ์ธํŠธ๊ฐ€ ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ์ธก์„ ์œ„ํ•ด์„œ๋Š” ์ตœ์†Œ 2๊ฐœ ์ด์ƒ์˜ ์œ ํšจ ๋ฐ์ดํ„ฐ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. (ํ˜„์žฌ {len(df)}๊ฐœ)")
return None, None
# Convert to Prophet format
prophet_df = df.rename(columns={"date": "ds", "price": "y"})
try:
# Fit the model
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
m.fit(prophet_df)
# Generate future dates
periods = max((pd.Timestamp(horizon_end) - df["date"].max()).days, 1)
future = m.make_future_dataframe(periods=periods, freq="D")
# Make predictions
forecast = m.predict(future)
return m, forecast
except Exception as e:
st.error(f"Prophet ๋ชจ๋ธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
return None, None
# -------------------------------------------------
# LOAD DATA ---------------------------------------
# -------------------------------------------------
raw_df = load_data()
if len(raw_df) == 0:
st.error("๋ฐ์ดํ„ฐ๊ฐ€ ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ํŒŒ์ผ์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
st.stop()
st.sidebar.header("๐Ÿ” ํ’ˆ๋ชฉ ์„ ํƒ")
selected_item = st.sidebar.selectbox("ํ’ˆ๋ชฉ", get_items(raw_df))
current_date = date.today()
st.sidebar.caption(f"์˜ค๋Š˜: {current_date}")
item_df = raw_df.query("item == @selected_item").copy()
if item_df.empty:
st.error("์„ ํƒํ•œ ํ’ˆ๋ชฉ ๋ฐ์ดํ„ฐ ์—†์Œ")
st.stop()
# -------------------------------------------------
# MACRO FORECAST 1996โ€‘2030 ------------------------
# -------------------------------------------------
st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
# ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ๋กœ์ง ๊ฐœ์„  - ์‹œ๊ฐ„ ๋ฒ”์œ„๋ฅผ ์กฐ์ •ํ•˜์—ฌ ๋” ๋งŽ์€ ๋ฐ์ดํ„ฐ ํฌํ•จ
try:
macro_start_dt = pd.Timestamp(MACRO_START)
# ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์œผ๋ฉด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ ์กฐ์ •
if len(item_df[item_df["date"] >= macro_start_dt]) < 10:
# ๊ฐ€์žฅ ์˜ค๋ž˜๋œ ๋‚ ์งœ๋ถ€ํ„ฐ ์‹œ์ž‘
macro_start_dt = item_df["date"].min()
st.info(f"์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ {macro_start_dt.strftime('%Y-%m-%d')}๋กœ ์กฐ์ •ํ–ˆ์Šต๋‹ˆ๋‹ค.")
macro_df = item_df[item_df["date"] >= macro_start_dt].copy()
except Exception as e:
st.error(f"๋‚ ์งœ ํ•„ํ„ฐ๋ง ์˜ค๋ฅ˜: {str(e)}")
macro_df = item_df.copy() # ํ•„ํ„ฐ๋ง ์—†์ด ์ „์ฒด ๋ฐ์ดํ„ฐ ์‚ฌ์šฉ
# Add diagnostic info
with st.expander("๋ฐ์ดํ„ฐ ์ง„๋‹จ"):
st.write(f"- ์ „์ฒด ๋ฐ์ดํ„ฐ ์ˆ˜: {len(item_df)}")
st.write(f"- ๋ถ„์„ ๋ฐ์ดํ„ฐ ์ˆ˜: {len(macro_df)}")
if len(macro_df) > 0:
st.write(f"- ๊ธฐ๊ฐ„: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}")
st.dataframe(macro_df.head())
else:
st.write("๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
if len(macro_df) < 2:
st.warning(f"{selected_item}์— ๋Œ€ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์ „์ฒด ๊ธฐ๊ฐ„ ๋ฐ์ดํ„ฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
st.plotly_chart(fig, use_container_width=True)
else:
try:
with st.spinner("์žฅ๊ธฐ ์˜ˆ์ธก ๋ชจ๋ธ ์ƒ์„ฑ ์ค‘..."):
m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
if m_macro is not None and fc_macro is not None:
fig_macro = px.line(fc_macro, x="ds", y="yhat", title="์žฅ๊ธฐ ์˜ˆ์ธก (1996โ€“2030)")
fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="์‹ค์ œ ๊ฐ€๊ฒฉ")
st.plotly_chart(fig_macro, use_container_width=True)
latest_price = macro_df.iloc[-1]["price"]
# 2030๋…„ ๋งˆ์ง€๋ง‰ ๋‚  ์ฐพ๊ธฐ
target_date = pd.Timestamp(MACRO_END)
close_dates = fc_macro.loc[(fc_macro["ds"] - target_date).abs().argsort()[:1], "ds"].values[0]
macro_pred = fc_macro.loc[fc_macro["ds"] == close_dates, "yhat"].iloc[0]
macro_pct = (macro_pred - latest_price) / latest_price * 100
st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
else:
st.warning("์˜ˆ์ธก ๋ชจ๋ธ์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"์žฅ๊ธฐ ์˜ˆ์ธก ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
st.plotly_chart(fig, use_container_width=True)
# -------------------------------------------------
# MICRO FORECAST 2024โ€‘2026 ------------------------
# -------------------------------------------------
st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
# ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ๋กœ์ง ๊ฐœ์„ 
try:
micro_start_dt = pd.Timestamp(MICRO_START)
# ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์œผ๋ฉด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ ์กฐ์ •
if len(item_df[item_df["date"] >= micro_start_dt]) < 10:
# ์ตœ๊ทผ 30% ๋ฐ์ดํ„ฐ๋งŒ ์‚ฌ์šฉ
n = max(2, int(len(item_df) * 0.3))
micro_df = item_df.sort_values("date").tail(n).copy()
st.info(f"์ถฉ๋ถ„ํ•œ ์ตœ๊ทผ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ์ตœ๊ทผ {n}๊ฐœ ๋ฐ์ดํ„ฐ ํฌ์ธํŠธ๋งŒ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
else:
micro_df = item_df[item_df["date"] >= micro_start_dt].copy()
except Exception as e:
st.error(f"๋‹จ๊ธฐ ์˜ˆ์ธก ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ์˜ค๋ฅ˜: {str(e)}")
# ์ตœ๊ทผ 10๊ฐœ ๋ฐ์ดํ„ฐ ํฌ์ธํŠธ ์‚ฌ์šฉ
micro_df = item_df.sort_values("date").tail(10).copy()
if len(micro_df) < 2:
st.warning(f"{MICRO_START} ์ดํ›„ ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ์ตœ๊ทผ ๊ฐ€๊ฒฉ")
st.plotly_chart(fig, use_container_width=True)
else:
try:
with st.spinner("๋‹จ๊ธฐ ์˜ˆ์ธก ๋ชจ๋ธ ์ƒ์„ฑ ์ค‘..."):
m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
if m_micro is not None and fc_micro is not None:
fig_micro = px.line(fc_micro, x="ds", y="yhat", title="๋‹จ๊ธฐ ์˜ˆ์ธก (2024โ€“2026)")
fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="์‹ค์ œ ๊ฐ€๊ฒฉ")
st.plotly_chart(fig_micro, use_container_width=True)
latest_price = micro_df.iloc[-1]["price"]
target_date = pd.Timestamp(MICRO_END)
close_dates = fc_micro.loc[(fc_micro["ds"] - target_date).abs().argsort()[:1], "ds"].values[0]
micro_pred = fc_micro.loc[fc_micro["ds"] == close_dates, "yhat"].iloc[0]
micro_pct = (micro_pred - latest_price) / latest_price * 100
st.metric("2026 ์˜ˆ์ธก๊ฐ€", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
else:
st.warning("๋‹จ๊ธฐ ์˜ˆ์ธก ๋ชจ๋ธ์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
except Exception as e:
st.error(f"๋‹จ๊ธฐ ์˜ˆ์ธก ์˜ค๋ฅ˜: {str(e)}")
# -------------------------------------------------
# SEASONALITY & PATTERN ---------------------------
# -------------------------------------------------
with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
if 'm_micro' in locals() and m_micro is not None and 'fc_micro' in locals() and fc_micro is not None:
try:
comp_fig = m_micro.plot_components(fc_micro)
st.pyplot(comp_fig)
month_season = (fc_micro[["ds", "yearly"]]
.assign(month=lambda d: d.ds.dt.month)
.groupby("month")["yearly"].mean())
st.markdown(
f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
except Exception as e:
st.error(f"์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
else:
st.info("ํŒจํ„ด ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
# -------------------------------------------------
# CORRELATION HEATMAP -----------------------------
# -------------------------------------------------
st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
try:
# ๋„ˆ๋ฌด ๋งŽ์€ ํ’ˆ๋ชฉ์ด ์žˆ์œผ๋ฉด ์ƒ์œ„ N๊ฐœ๋งŒ ์„ ํƒ
items_to_corr = raw_df['item'].value_counts().head(30).index.tolist()
if selected_item not in items_to_corr and selected_item in raw_df['item'].unique():
items_to_corr.append(selected_item)
filtered_df = raw_df[raw_df['item'].isin(items_to_corr)]
monthly_pivot = (filtered_df.assign(month=lambda d: d.date.dt.to_period("M"))
.groupby(["month", "item"], as_index=False)["price"].mean()
.pivot(index="month", columns="item", values="price"))
# ๊ฒฐ์ธก์น˜๊ฐ€ ๋„ˆ๋ฌด ๋งŽ์€ ์—ด ์ œ๊ฑฐ
threshold = 0.5 # 50% ์ด์ƒ ๊ฒฐ์ธก์น˜๊ฐ€ ์žˆ๋Š” ์—ด ์ œ๊ฑฐ
monthly_pivot = monthly_pivot.loc[:, monthly_pivot.isnull().mean() < threshold]
if monthly_pivot.shape[1] > 1: # At least 2 items needed for correlation
# ๊ฒฐ์ธก์น˜ ์ฒ˜๋ฆฌ
monthly_pivot = monthly_pivot.fillna(method='ffill').fillna(method='bfill')
# ์ƒ๊ด€๊ด€๊ณ„ ๊ณ„์‚ฐ
corr = monthly_pivot.corr()
# ์‹œ๊ฐํ™”
fig, ax = plt.subplots(figsize=(12, 10))
mask = np.triu(np.ones_like(corr, dtype=bool))
# ์—ฌ๊ธฐ์„œ ํฐํŠธ ์„ค์ • ๋‹ค์‹œ ํ™•์ธ
plt.title(f"{selected_item} ๊ด€๋ จ ์ƒ๊ด€๊ด€๊ณ„", fontsize=15)
sns.heatmap(corr, mask=mask, annot=False, cmap="coolwarm", center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
plt.xticks(rotation=45, ha='right', fontsize=8)
plt.yticks(fontsize=8)
# Highlight correlations with selected item
if selected_item in corr.columns:
item_corr = corr[selected_item].sort_values(ascending=False)
top_corr = item_corr.drop(selected_item).head(5)
bottom_corr = item_corr.drop(selected_item).tail(5)
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**{selected_item}์™€ ์ƒ๊ด€๊ด€๊ณ„ ๋†’์€ ํ’ˆ๋ชฉ**")
for item, val in top_corr.items():
st.write(f"{item}: {val:.2f}")
with col2:
st.markdown(f"**{selected_item}์™€ ์ƒ๊ด€๊ด€๊ณ„ ๋‚ฎ์€ ํ’ˆ๋ชฉ**")
for item, val in bottom_corr.items():
st.write(f"{item}: {val:.2f}")
st.pyplot(fig)
else:
st.info("์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ํ’ˆ๋ชฉ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
except Exception as e:
st.error(f"์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
st.write("์˜ค๋ฅ˜ ์ƒ์„ธ ์ •๋ณด:", str(e))
# -------------------------------------------------
# FOOTER ------------------------------------------
# -------------------------------------------------
st.markdown("---")
st.caption("ยฉ 2025 ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก ์‹œ์Šคํ…œ | ๋ฐ์ดํ„ฐ ๋ถ„์„ ์ž๋™ํ™”")