yokoha commited on
Commit
3268778
ยท
verified ยท
1 Parent(s): d7ad409

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -18
app.py CHANGED
@@ -12,7 +12,7 @@ from pathlib import Path
12
  # CONFIG ------------------------------------------
13
  # -------------------------------------------------
14
  CSV_PATH = Path("price_data.csv")
15
- PARQUET_PATH = Path("domae-202503.parquet") # 1996โ€‘1993-03 ๊ฐ€๊ฒฉ ๋ฐ์ดํ„ฐ
16
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
17
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
18
 
@@ -21,18 +21,50 @@ st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layou
21
  # -------------------------------------------------
22
  # UTILITIES ---------------------------------------
23
  # -------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  @st.cache_data(show_spinner=False)
25
  def load_data() -> pd.DataFrame:
26
- """Load price data from Parquet if available, else CSV."""
27
  if PARQUET_PATH.exists():
28
  df = pd.read_parquet(PARQUET_PATH)
29
  elif CSV_PATH.exists():
30
  df = pd.read_csv(CSV_PATH)
31
  else:
32
- st.error("๋ฐ์ดํ„ฐ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. price_data.csv ๋˜๋Š” domae-202503.parquet" )
 
 
 
 
 
 
 
33
  st.stop()
34
- # ํ‘œ์ค€ํ™”
35
- df["date"] = pd.to_datetime(df["date"])
 
36
  df.sort_values("date", inplace=True)
37
  return df
38
 
@@ -44,7 +76,8 @@ def get_items(df: pd.DataFrame):
44
  def fit_prophet(df: pd.DataFrame, horizon_end: str):
45
  m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
46
  m.fit(df.rename(columns={"date": "ds", "price": "y"}))
47
- future = m.make_future_dataframe(periods=(pd.Timestamp(horizon_end) - df["date"].max()).days, freq="D")
 
48
  forecast = m.predict(future)
49
  return m, forecast
50
 
@@ -64,12 +97,11 @@ if item_df.empty:
64
  st.stop()
65
 
66
  # -------------------------------------------------
67
- # PLOTS -------------------------------------------
68
  # -------------------------------------------------
69
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
70
-
71
- # Macro forecast 1996โ€“2030
72
  macro_df = item_df[item_df["date"] >= MACRO_START]
 
73
  m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
74
  fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996โ€“2030")
75
  fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
@@ -80,8 +112,11 @@ macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0]
80
  macro_pct = (macro_pred - latest_price) / latest_price * 100
81
  st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
82
 
83
- # Micro forecast 2024โ€“2026
 
 
84
  st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
 
85
  micro_df = item_df[item_df["date"] >= MICRO_START]
86
  m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
87
  fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024โ€“2026")
@@ -92,36 +127,43 @@ micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0]
92
  micro_pct = (micro_pred - latest_price) / latest_price * 100
93
  st.metric("2026 ์˜ˆ์ธก๊ฐ€", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
94
 
95
- # Seasonality components
 
 
96
  with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
97
  comp_fig = m_micro.plot_components(fc_micro)
98
  st.pyplot(comp_fig)
 
99
  month_season = (fc_micro[["ds", "yearly"]]
100
  .assign(month=lambda d: d.ds.dt.month)
101
  .groupby("month")["yearly"].mean())
102
  st.markdown(
103
- f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›”\n\n"
104
- f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›”\n\n"
105
  f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
106
 
107
- # Correlation heatmap
 
 
108
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
109
  monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
110
  .groupby(["month", "item"], as_index=False)["price"].mean()
111
  .pivot(index="month", columns="item", values="price"))
112
 
113
  corr = monthly_pivot.corr()
114
- mask = np.triu(np.ones_like(corr, dtype=bool))
115
  fig, ax = plt.subplots(figsize=(12, 10))
 
116
  sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax)
117
  st.pyplot(fig)
118
 
119
- st.info("๋นจ๊ฐ„ ์˜์—ญ: ๊ฐ€๊ฒฉ ๋™์กฐํ™” / ํŒŒ๋ž€ ์˜์—ญ: ๋Œ€์ฒด์žฌ ๊ฐ€๋Šฅ์„ฑ.")
120
 
121
- # Volatility Chart
 
 
122
  st.subheader("๐Ÿ“Š 30์ผ ์ด๋™ ํ‘œ์ค€ํŽธ์ฐจ (๊ฐ€๊ฒฉ ๋ณ€๋™์„ฑ)")
123
  vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
124
  fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
125
  st.plotly_chart(fig_vol, use_container_width=True)
126
 
127
- st.caption("๋ฐ์ดํ„ฐ: domae-202503.parquet ยท Prophet ์˜ˆ์ธก ยท Streamlit ๋Œ€์‹œ๋ณด๋“œ")
 
12
  # CONFIG ------------------------------------------
13
  # -------------------------------------------------
14
  CSV_PATH = Path("price_data.csv")
15
+ PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 ์ผ๊ฐ„ ๊ฐ€๊ฒฉ
16
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
17
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
18
 
 
21
  # -------------------------------------------------
22
  # UTILITIES ---------------------------------------
23
  # -------------------------------------------------
24
+ DATE_CANDIDATES = {"date", "ds", "ymd", "๋‚ ์งœ"}
25
+ ITEM_CANDIDATES = {"item", "ํ’ˆ๋ชฉ", "code", "category"}
26
+ PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ€๊ฒฉ"}
27
+
28
+ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
29
+ """Rename date/item/price cols to date, item, price (in-place)."""
30
+ col_map = {}
31
+ for c in df.columns:
32
+ lc = c.lower()
33
+ if lc in DATE_CANDIDATES: # date
34
+ col_map[c] = "date"
35
+ elif lc in ITEM_CANDIDATES:
36
+ col_map[c] = "item"
37
+ elif lc in PRICE_CANDIDATES:
38
+ col_map[c] = "price"
39
+ df.rename(columns=col_map, inplace=True)
40
+
41
+ # date might be index
42
+ if "date" not in df.columns:
43
+ if df.index.dtype.kind == "M":
44
+ df.reset_index(inplace=True)
45
+ df.rename(columns={df.columns[0]: "date"}, inplace=True)
46
+ return df
47
+
48
  @st.cache_data(show_spinner=False)
49
  def load_data() -> pd.DataFrame:
50
+ """Load price data from Parquet if available, else CSV. Tries to infer column names."""
51
  if PARQUET_PATH.exists():
52
  df = pd.read_parquet(PARQUET_PATH)
53
  elif CSV_PATH.exists():
54
  df = pd.read_csv(CSV_PATH)
55
  else:
56
+ st.error("๐Ÿ’พ price_data.csv ๋˜๋Š” domae-202503.parquet ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
57
+ st.stop()
58
+
59
+ df = _standardize_columns(df)
60
+
61
+ missing = {c for c in ["date", "item", "price"] if c not in df.columns}
62
+ if missing:
63
+ st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
64
  st.stop()
65
+
66
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
67
+ df.dropna(subset=["date", "item", "price"], inplace=True)
68
  df.sort_values("date", inplace=True)
69
  return df
70
 
 
76
  def fit_prophet(df: pd.DataFrame, horizon_end: str):
77
  m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
78
  m.fit(df.rename(columns={"date": "ds", "price": "y"}))
79
+ periods = (pd.Timestamp(horizon_end) - df["date"].max()).days
80
+ future = m.make_future_dataframe(periods=periods, freq="D")
81
  forecast = m.predict(future)
82
  return m, forecast
83
 
 
97
  st.stop()
98
 
99
  # -------------------------------------------------
100
+ # MACRO FORECAST 1996-2030 ------------------------
101
  # -------------------------------------------------
102
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
 
 
103
  macro_df = item_df[item_df["date"] >= MACRO_START]
104
+
105
  m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
106
  fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996โ€“2030")
107
  fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
 
112
  macro_pct = (macro_pred - latest_price) / latest_price * 100
113
  st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
114
 
115
+ # -------------------------------------------------
116
+ # MICRO FORECAST 2024-2026 ------------------------
117
+ # -------------------------------------------------
118
  st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
119
+
120
  micro_df = item_df[item_df["date"] >= MICRO_START]
121
  m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
122
  fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024โ€“2026")
 
127
  micro_pct = (micro_pred - latest_price) / latest_price * 100
128
  st.metric("2026 ์˜ˆ์ธก๊ฐ€", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
129
 
130
+ # -------------------------------------------------
131
+ # SEASONALITY & PATTERN ---------------------------
132
+ # -------------------------------------------------
133
  with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
134
  comp_fig = m_micro.plot_components(fc_micro)
135
  st.pyplot(comp_fig)
136
+
137
  month_season = (fc_micro[["ds", "yearly"]]
138
  .assign(month=lambda d: d.ds.dt.month)
139
  .groupby("month")["yearly"].mean())
140
  st.markdown(
141
+ f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
142
+ f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
143
  f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
144
 
145
+ # -------------------------------------------------
146
+ # CORRELATION HEATMAP -----------------------------
147
+ # -------------------------------------------------
148
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
149
  monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
150
  .groupby(["month", "item"], as_index=False)["price"].mean()
151
  .pivot(index="month", columns="item", values="price"))
152
 
153
  corr = monthly_pivot.corr()
 
154
  fig, ax = plt.subplots(figsize=(12, 10))
155
+ mask = np.triu(np.ones_like(corr, dtype=bool))
156
  sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax)
157
  st.pyplot(fig)
158
 
159
+ st.info("๋นจ๊ฐ„ ์˜์—ญ: ๊ฐ€๊ฒฉ ๋™์กฐํ™” / ํŒŒ๋ž€ ์˜์—ญ: ๋Œ€์ฒด์žฌ ๊ฐ€๋Šฅ์„ฑ")
160
 
161
+ # -------------------------------------------------
162
+ # VOLATILITY --------------------------------------
163
+ # -------------------------------------------------
164
  st.subheader("๐Ÿ“Š 30์ผ ์ด๋™ ํ‘œ์ค€ํŽธ์ฐจ (๊ฐ€๊ฒฉ ๋ณ€๋™์„ฑ)")
165
  vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
166
  fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
167
  st.plotly_chart(fig_vol, use_container_width=True)
168
 
169
+ st.caption("๋ฐ์ดํ„ฐ: domae-202503.parquet ยท Prophet ์˜ˆ์ธก ยท Streamlit ๋Œ€์‹œ๋ณด๋“œ")