NH-Prediction

Running

App Files Files Community

yokoha commited on 30 days ago

Commit

dd6e62d

verified ·

1 Parent(s): c2cf641

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -64

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
         df.rename(columns={df.columns[0]: "date"}, inplace=True)
     # ── convert YYYYMM string to datetime ──────────────────────────────
-    if "date" in df.columns and pd.api.types.is_object_dtype(df["date" ]):
         sample = str(df["date"].iloc[0])
         if sample.isdigit() and len(sample) in (6, 8):
             df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
@@ -101,10 +101,26 @@ def get_items(df: pd.DataFrame):
 @st.cache_data(show_spinner=False)
 def fit_prophet(df: pd.DataFrame, horizon_end: str):
     m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
-    m.fit(df.rename(columns={"date": "ds", "price": "y"}))
-    periods = (pd.Timestamp(horizon_end) - df["date"].max()).days
     future = m.make_future_dataframe(periods=periods, freq="D")
     forecast = m.predict(future)
     return m, forecast
@@ -127,85 +143,125 @@ if item_df.empty:
 # MACRO FORECAST 1996‑2030 ------------------------
 # -------------------------------------------------
 st.header(f"📈 {selected_item} 가격 예측 대시보드")
-macro_df = item_df[item_df["date"] >= MACRO_START]
-m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
-fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996–2030")
-fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
-st.plotly_chart(fig_macro, use_container_width=True)
-latest_price = macro_df.iloc[-1]["price"]
-macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0]
-macro_pct = (macro_pred - latest_price) / latest_price * 100
-st.metric("2030 예측가", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
 # -------------------------------------------------
 # MICRO FORECAST 2024‑2026 ------------------------
 # -------------------------------------------------
 st.subheader("🔎 2024–2026 단기 예측")
-micro_df = item_df[item_df["date"] >= MICRO_START]
-m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
-fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024–2026")
-fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="Actual")
-st.plotly_chart(fig_micro, use_container_width=True)
-micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0]
-micro_pct = (micro_pred - latest_price) / latest_price * 100
-st.metric("2026 예측가", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
 # -------------------------------------------------
 # SEASONALITY & PATTERN ---------------------------
 # -------------------------------------------------
 with st.expander("📆 시즈널리티 & 패턴 설명"):
-    comp_fig = m_micro.plot_components(fc_micro)
-    st.pyplot(comp_fig)
-    month_season = (fc_micro[["ds", "yearly"]]
-                    .assign(month=lambda d: d.ds.dt.month)
-                    .groupby("month")["yearly"].mean())
-    st.markdown(
-        f"**연간 피크 월:** {int(month_season.idxmax())}월  \n"
-        f"**연간 저점 월:** {int(month_season.idxmin())}월  \n"
-        f"**연간 변동폭:** {month_season.max() - month_season.min():.1f}")
-# -------------------------------------------------
-# CORRELATION HEATMAP -----------------------------
-# -------------------------------------------------
 # -------------------------------------------------
 # CORRELATION HEATMAP -----------------------------
 # -------------------------------------------------
 st.subheader("🧮 품목 간 상관관계")
-monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
-                        .groupby(["month", "item"], as_index=False)["price"].mean()
-                        .pivot(index="month", columns="item", values="price"))
-corr = monthly_pivot.corr()
-fig, ax = plt.subplots(figsize=(12, 10))
-mask = np.triu(np.ones_like(corr, dtype=bool))
-sns.heatmap(corr, mask=mask, annot=False, cmap="coolwarm", center=0,
-            square=True, linewidths=.5, cbar_kws={"shrink": .5})
-# Highlight correlations with selected item
-if selected_item in corr.columns:
-    item_corr = corr[selected_item].sort_values(ascending=False)
-    top_corr = item_corr.drop(selected_item).head(5)
-    bottom_corr = item_corr.drop(selected_item).tail(5)
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown(f"**{selected_item}와 상관관계 높은 품목**")
-        for item, val in top_corr.items():
-            st.write(f"{item}: {val:.2f}")
-    with col2:
-        st.markdown(f"**{selected_item}와 상관관계 낮은 품목**")
-        for item, val in bottom_corr.items():
-            st.write(f"{item}: {val:.2f}")
-st.pyplot(fig)
 # -------------------------------------------------
 # FOOTER ------------------------------------------
 # -------------------------------------------------
 st.markdown("---")
-st.caption("© 2025 품목별 가격 예측 시스템 | 데이터 분석 자동화")

         df.rename(columns={df.columns[0]: "date"}, inplace=True)
     # ── convert YYYYMM string to datetime ──────────────────────────────
+    if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):  # Fixed typo here
         sample = str(df["date"].iloc[0])
         if sample.isdigit() and len(sample) in (6, 8):
             df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
 @st.cache_data(show_spinner=False)
 def fit_prophet(df: pd.DataFrame, horizon_end: str):
+    # Make a copy and ensure we have data
+    df = df.copy()
+    df = df.dropna(subset=["date", "price"])
+    if len(df) < 2:
+        st.warning("데이터 포인트가 부족합니다. 예측을 위해서는 최소 2개 이상의 유효 데이터가 필요합니다.")
+        return None, None
+    # Convert to Prophet format
+    prophet_df = df.rename(columns={"date": "ds", "price": "y"})
+    # Fit the model
     m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
+    m.fit(prophet_df)
+    # Generate future dates
+    periods = max((pd.Timestamp(horizon_end) - df["date"].max()).days, 1)
     future = m.make_future_dataframe(periods=periods, freq="D")
+    # Make predictions
     forecast = m.predict(future)
     return m, forecast
 # MACRO FORECAST 1996‑2030 ------------------------
 # -------------------------------------------------
 st.header(f"📈 {selected_item} 가격 예측 대시보드")
+macro_df = item_df[item_df["date"] >= MACRO_START].copy()
+# Add diagnostic info
+with st.expander("데이터 진단"):
+    st.write(f"- 전체 데이터 수: {len(item_df)}")
+    st.write(f"- {MACRO_START} 이후 데이터 수: {len(macro_df)}")
+    st.write(f"- 기간: {macro_df['date'].min()} ~ {macro_df['date'].max()}")
+    st.write(macro_df.head())
+if len(macro_df) < 2:
+    st.warning(f"{MACRO_START} 이후 데이터가 충분하지 않습니다. 전체 기간 데이터를 표시합니다.")
+    fig = px.line(item_df, x="date", y="price", title=f"{selected_item} 과거 가격")
+    st.plotly_chart(fig, use_container_width=True)
+else:
+    try:
+        m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
+        if m_macro is not None and fc_macro is not None:
+            fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996–2030")
+            fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
+            st.plotly_chart(fig_macro, use_container_width=True)
+            latest_price = macro_df.iloc[-1]["price"]
+            macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0]
+            macro_pct = (macro_pred - latest_price) / latest_price * 100
+            st.metric("2030 예측가", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
+        else:
+            st.warning("예측 모델을 생성할 수 없습니다.")
+            fig = px.line(item_df, x="date", y="price", title=f"{selected_item} 과거 가격")
+            st.plotly_chart(fig, use_container_width=True)
+    except Exception as e:
+        st.error(f"오류 발생: {str(e)}")
+        fig = px.line(item_df, x="date", y="price", title=f"{selected_item} 과거 가격")
+        st.plotly_chart(fig, use_container_width=True)
 # -------------------------------------------------
 # MICRO FORECAST 2024‑2026 ------------------------
 # -------------------------------------------------
 st.subheader("🔎 2024–2026 단기 예측")
+micro_df = item_df[item_df["date"] >= MICRO_START].copy()
+if len(micro_df) < 2:
+    st.warning(f"{MICRO_START} 이후 데이터가 충분하지 않습니다.")
+    fig = px.line(item_df, x="date", y="price", title=f"{selected_item} 최근 가격")
+    st.plotly_chart(fig, use_container_width=True)
+else:
+    try:
+        m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
+        if m_micro is not None and fc_micro is not None:
+            fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024–2026")
+            fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="Actual")
+            st.plotly_chart(fig_micro, use_container_width=True)
+            latest_price = micro_df.iloc[-1]["price"]
+            micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0]
+            micro_pct = (micro_pred - latest_price) / latest_price * 100
+            st.metric("2026 예측가", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
+        else:
+            st.warning("단기 예측 모델을 생성할 수 없습니다.")
+    except Exception as e:
+        st.error(f"단기 예측 오류: {str(e)}")
 # -------------------------------------------------
 # SEASONALITY & PATTERN ---------------------------
 # -------------------------------------------------
 with st.expander("📆 시즈널리티 & 패턴 설명"):
+    if 'm_micro' in locals() and m_micro is not None and 'fc_micro' in locals() and fc_micro is not None:
+        comp_fig = m_micro.plot_components(fc_micro)
+        st.pyplot(comp_fig)
+        month_season = (fc_micro[["ds", "yearly"]]
+                        .assign(month=lambda d: d.ds.dt.month)
+                        .groupby("month")["yearly"].mean())
+        st.markdown(
+            f"**연간 피크 월:** {int(month_season.idxmax())}월  \n"
+            f"**연간 저점 월:** {int(month_season.idxmin())}월  \n"
+            f"**연간 변동폭:** {month_season.max() - month_season.min():.1f}")
+    else:
+        st.info("패턴 분석을 위한 충분한 데이터가 없습니다.")
 # -------------------------------------------------
 # CORRELATION HEATMAP -----------------------------
 # -------------------------------------------------
 st.subheader("🧮 품목 간 상관관계")
+try:
+    monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
+                            .groupby(["month", "item"], as_index=False)["price"].mean()
+                            .pivot(index="month", columns="item", values="price"))
+    if monthly_pivot.shape[1] > 1:  # At least 2 items needed for correlation
+        corr = monthly_pivot.corr()
+        fig, ax = plt.subplots(figsize=(12, 10))
+        mask = np.triu(np.ones_like(corr, dtype=bool))
+        sns.heatmap(corr, mask=mask, annot=False, cmap="coolwarm", center=0,
+                    square=True, linewidths=.5, cbar_kws={"shrink": .5})
+        # Highlight correlations with selected item
+        if selected_item in corr.columns:
+            item_corr = corr[selected_item].sort_values(ascending=False)
+            top_corr = item_corr.drop(selected_item).head(5)
+            bottom_corr = item_corr.drop(selected_item).tail(5)
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown(f"**{selected_item}와 상관관계 높은 품목**")
+                for item, val in top_corr.items():
+                    st.write(f"{item}: {val:.2f}")
+            with col2:
+                st.markdown(f"**{selected_item}와 상관관계 낮은 품목**")
+                for item, val in bottom_corr.items():
+                    st.write(f"{item}: {val:.2f}")
+        st.pyplot(fig)
+    else:
+        st.info("상관관계 분석을 위한 충분한 품목 데이터가 없습니다.")
+except Exception as e:
+    st.error(f"상관관계 분석 오류: {str(e)}")
 # -------------------------------------------------
 # FOOTER ------------------------------------------
 # -------------------------------------------------
 st.markdown("---")
+st.caption("© 2024 품목별 가격 예측 시스템 | 데이터 분석 자동화")