NH-Prediction

Running

App Files Files Community

yokoha commited on Apr 30

Commit

828f0f0

verified ·

1 Parent(s): 3268778

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -26

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
@@ -12,7 +13,7 @@ from pathlib import Path
 # CONFIG ------------------------------------------
 # -------------------------------------------------
 CSV_PATH = Path("price_data.csv")
-PARQUET_PATH = Path("domae-202503.parquet")  # 1996-2025-03 일간 가격
 MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
 MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
@@ -21,33 +22,46 @@ st.set_page_config(page_title="품목별 가격 예측", page_icon="📈", layou
 # -------------------------------------------------
 # UTILITIES ---------------------------------------
 # -------------------------------------------------
-DATE_CANDIDATES = {"date", "ds", "ymd", "날짜"}
-ITEM_CANDIDATES = {"item", "품목", "code", "category"}
-PRICE_CANDIDATES = {"price", "y", "value", "가격"}
 def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
-    """Rename date/item/price cols to date, item, price (in-place)."""
     col_map = {}
     for c in df.columns:
         lc = c.lower()
-        if lc in DATE_CANDIDATES:  # date
             col_map[c] = "date"
-        elif lc in ITEM_CANDIDATES:
-            col_map[c] = "item"
         elif lc in PRICE_CANDIDATES:
             col_map[c] = "price"
-    df.rename(columns=col_map, inplace=True)
-    # date might be index
-    if "date" not in df.columns:
-        if df.index.dtype.kind == "M":
-            df.reset_index(inplace=True)
-            df.rename(columns={df.columns[0]: "date"}, inplace=True)
     return df
 @st.cache_data(show_spinner=False)
 def load_data() -> pd.DataFrame:
-    """Load price data from Parquet if available, else CSV. Tries to infer column names."""
     if PARQUET_PATH.exists():
         df = pd.read_parquet(PARQUET_PATH)
     elif CSV_PATH.exists():
@@ -57,14 +71,13 @@ def load_data() -> pd.DataFrame:
         st.stop()
     df = _standardize_columns(df)
     missing = {c for c in ["date", "item", "price"] if c not in df.columns}
     if missing:
         st.error(f"필수 컬럼 누락: {', '.join(missing)} — 파일 컬럼명을 확인하세요.")
         st.stop()
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
-    df.dropna(subset=["date", "item", "price"], inplace=True)
     df.sort_values("date", inplace=True)
     return df
@@ -134,21 +147,27 @@ with st.expander("📆 시즈널리티 & 패턴 설명"):
     comp_fig = m_micro.plot_components(fc_micro)
     st.pyplot(comp_fig)
-    month_season = (fc_micro[["ds", "yearly"]]
-                    .assign(month=lambda d: d.ds.dt.month)
-                    .groupby("month")["yearly"].mean())
     st.markdown(
         f"**연간 피크 월:** {int(month_season.idxmax())}월  \n"
         f"**연간 저점 월:** {int(month_season.idxmin())}월  \n"
-        f"**연간 변동폭:** {month_season.max() - month_season.min():.1f}")
 # -------------------------------------------------
 # CORRELATION HEATMAP -----------------------------
 # -------------------------------------------------
 st.subheader("🧮 품목 간 상관관계")
-monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
-                        .groupby(["month", "item"], as_index=False)["price"].mean()
-                        .pivot(index="month", columns="item", values="price"))
 corr = monthly_pivot.corr()
 fig, ax = plt.subplots(figsize=(12, 10))
@@ -162,8 +181,14 @@ st.info("빨간 영역: 가격 동조화 / 파란 영역: 대체재 가능성")
 # VOLATILITY --------------------------------------
 # -------------------------------------------------
 st.subheader("📊 30일 이동 표준편차 (가격 변동성)")
-vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
 fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
 st.plotly_chart(fig_vol, use_container_width=True)
-st.caption("데이터: domae-202503.parquet · Prophet 예측 · Streamlit 대시보드")

 import streamlit as st
 import pandas as pd
 import numpy as np
 # CONFIG ------------------------------------------
 # -------------------------------------------------
 CSV_PATH = Path("price_data.csv")
+PARQUET_PATH = Path("domae-202503.parquet")  # 1996-2025-03 일간/월간 가격
 MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
 MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
 # -------------------------------------------------
 # UTILITIES ---------------------------------------
 # -------------------------------------------------
+DATE_CANDIDATES = {"date", "ds", "ymd", "날짜", "prce_reg_mm", "etl_ldg_dt"}
+ITEM_CANDIDATES = {"item", "품목", "code", "category", "pdlt_nm", "spcs_nm"}
+PRICE_CANDIDATES = {"price", "y", "value", "가격", "avrg_prce"}
 def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """Rename date/item/price columns to date, item, price. Create composite item if needed."""
     col_map = {}
     for c in df.columns:
         lc = c.lower()
+        if lc in DATE_CANDIDATES:
             col_map[c] = "date"
         elif lc in PRICE_CANDIDATES:
             col_map[c] = "price"
+        elif lc in ITEM_CANDIDATES:
+            if "item" not in col_map.values():
+                col_map[c] = "item"
+            else:
+                col_map[c] = "species"
+    df = df.rename(columns=col_map)
+    if "date" not in df.columns and df.index.dtype.kind == "M":
+        df.reset_index(inplace=True)
+        df.rename(columns={df.columns[0]: "date"}, inplace=True)
+    if "date" in df.columns and df["date"].dtype == object:
+        sample = str(df["date"].iloc[0])
+        if sample.isdigit() and len(sample) in (6, 8):
+            df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
+    if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
+        df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
+    if {"item", "species"}.issubset(df.columns):
+        df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
+        df.drop(columns=["species"], inplace=True)
     return df
 @st.cache_data(show_spinner=False)
 def load_data() -> pd.DataFrame:
     if PARQUET_PATH.exists():
         df = pd.read_parquet(PARQUET_PATH)
     elif CSV_PATH.exists():
         st.stop()
     df = _standardize_columns(df)
     missing = {c for c in ["date", "item", "price"] if c not in df.columns}
     if missing:
         st.error(f"필수 컬럼 누락: {', '.join(missing)} — 파일 컬럼명을 확인하세요.")
         st.stop()
     df["date"] = pd.to_datetime(df["date"], errors="coerce")
+    df = df.dropna(subset=["date", "item", "price"])
     df.sort_values("date", inplace=True)
     return df
     comp_fig = m_micro.plot_components(fc_micro)
     st.pyplot(comp_fig)
+    month_season = (
+        fc_micro[["ds", "yearly"]]
+        .assign(month=lambda d: d.ds.dt.month)
+        .groupby("month")["yearly"].mean()
+    )
     st.markdown(
         f"**연간 피크 월:** {int(month_season.idxmax())}월  \n"
         f"**연간 저점 월:** {int(month_season.idxmin())}월  \n"
+        f"**연간 변동폭:** {month_season.max() - month_season.min():.1f}"
+    )
 # -------------------------------------------------
 # CORRELATION HEATMAP -----------------------------
 # -------------------------------------------------
 st.subheader("🧮 품목 간 상관관계")
+monthly_pivot = (
+    raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
+    .groupby(["month", "item"], as_index=False)["price"]
+    .mean()
+    .pivot(index="month", columns="item", values="price")
+)
 corr = monthly_pivot.corr()
 fig, ax = plt.subplots(figsize=(12, 10))
 # VOLATILITY --------------------------------------
 # -------------------------------------------------
 st.subheader("📊 30일 이동 표준편차 (가격 변동성)")
+vol = (
+    item_df.set_index("date")["price"]
+    .rolling(30)
+    .std()
+    .dropna()
+    .reset_index()
+)
 fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
 st.plotly_chart(fig_vol, use_container_width=True)
+st.caption("데이터: domae-202503.parquet · Prophet 예측 · Streamlit 대시보드")