yokoha commited on
Commit
4a31bd8
ยท
verified ยท
1 Parent(s): 828f0f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -39
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
@@ -13,7 +12,7 @@ from pathlib import Path
13
  # CONFIG ------------------------------------------
14
  # -------------------------------------------------
15
  CSV_PATH = Path("price_data.csv")
16
- PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 ์ผ๊ฐ„/์›”๊ฐ„ ๊ฐ€๊ฒฉ
17
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
18
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
19
 
@@ -26,8 +25,9 @@ DATE_CANDIDATES = {"date", "ds", "ymd", "๋‚ ์งœ", "prce_reg_mm", "etl_ldg_dt"}
26
  ITEM_CANDIDATES = {"item", "ํ’ˆ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
27
  PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ€๊ฒฉ", "avrg_prce"}
28
 
 
29
  def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
30
- """Rename date/item/price columns to date, item, price. Create composite item if needed."""
31
  col_map = {}
32
  for c in df.columns:
33
  lc = c.lower()
@@ -36,32 +36,43 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
36
  elif lc in PRICE_CANDIDATES:
37
  col_map[c] = "price"
38
  elif lc in ITEM_CANDIDATES:
 
39
  if "item" not in col_map.values():
40
  col_map[c] = "item"
41
  else:
42
  col_map[c] = "species"
43
  df = df.rename(columns=col_map)
44
 
 
 
 
 
 
45
  if "date" not in df.columns and df.index.dtype.kind == "M":
46
  df.reset_index(inplace=True)
47
  df.rename(columns={df.columns[0]: "date"}, inplace=True)
48
 
49
- if "date" in df.columns and df["date"].dtype == object:
 
50
  sample = str(df["date"].iloc[0])
51
  if sample.isdigit() and len(sample) in (6, 8):
52
  df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
53
 
 
54
  if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
55
  df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
56
 
 
57
  if {"item", "species"}.issubset(df.columns):
58
  df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
59
  df.drop(columns=["species"], inplace=True)
60
 
61
  return df
62
 
 
63
  @st.cache_data(show_spinner=False)
64
  def load_data() -> pd.DataFrame:
 
65
  if PARQUET_PATH.exists():
66
  df = pd.read_parquet(PARQUET_PATH)
67
  elif CSV_PATH.exists():
@@ -71,6 +82,7 @@ def load_data() -> pd.DataFrame:
71
  st.stop()
72
 
73
  df = _standardize_columns(df)
 
74
  missing = {c for c in ["date", "item", "price"] if c not in df.columns}
75
  if missing:
76
  st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
@@ -81,10 +93,12 @@ def load_data() -> pd.DataFrame:
81
  df.sort_values("date", inplace=True)
82
  return df
83
 
 
84
  @st.cache_data(show_spinner=False)
85
  def get_items(df: pd.DataFrame):
86
  return sorted(df["item"].unique())
87
 
 
88
  @st.cache_data(show_spinner=False)
89
  def fit_prophet(df: pd.DataFrame, horizon_end: str):
90
  m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
@@ -110,7 +124,7 @@ if item_df.empty:
110
  st.stop()
111
 
112
  # -------------------------------------------------
113
- # MACRO FORECAST 1996-2030 ------------------------
114
  # -------------------------------------------------
115
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
116
  macro_df = item_df[item_df["date"] >= MACRO_START]
@@ -126,7 +140,7 @@ macro_pct = (macro_pred - latest_price) / latest_price * 100
126
  st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
127
 
128
  # -------------------------------------------------
129
- # MICRO FORECAST 2024-2026 ------------------------
130
  # -------------------------------------------------
131
  st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
132
 
@@ -147,48 +161,23 @@ with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
147
  comp_fig = m_micro.plot_components(fc_micro)
148
  st.pyplot(comp_fig)
149
 
150
- month_season = (
151
- fc_micro[["ds", "yearly"]]
152
- .assign(month=lambda d: d.ds.dt.month)
153
- .groupby("month")["yearly"].mean()
154
- )
155
  st.markdown(
156
  f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
157
  f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
158
- f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}"
159
- )
160
 
161
  # -------------------------------------------------
162
  # CORRELATION HEATMAP -----------------------------
163
  # -------------------------------------------------
164
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
165
- monthly_pivot = (
166
- raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
167
- .groupby(["month", "item"], as_index=False)["price"]
168
- .mean()
169
- .pivot(index="month", columns="item", values="price")
170
- )
171
 
172
  corr = monthly_pivot.corr()
173
  fig, ax = plt.subplots(figsize=(12, 10))
174
  mask = np.triu(np.ones_like(corr, dtype=bool))
175
- sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax)
176
- st.pyplot(fig)
177
-
178
- st.info("๋นจ๊ฐ„ ์˜์—ญ: ๊ฐ€๊ฒฉ ๋™์กฐํ™” / ํŒŒ๋ž€ ์˜์—ญ: ๋Œ€์ฒด์žฌ ๊ฐ€๋Šฅ์„ฑ")
179
-
180
- # -------------------------------------------------
181
- # VOLATILITY --------------------------------------
182
- # -------------------------------------------------
183
- st.subheader("๐Ÿ“Š 30์ผ ์ด๋™ ํ‘œ์ค€ํŽธ์ฐจ (๊ฐ€๊ฒฉ ๋ณ€๋™์„ฑ)")
184
- vol = (
185
- item_df.set_index("date")["price"]
186
- .rolling(30)
187
- .std()
188
- .dropna()
189
- .reset_index()
190
- )
191
- fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
192
- st.plotly_chart(fig_vol, use_container_width=True)
193
-
194
- st.caption("๋ฐ์ดํ„ฐ: domae-202503.parquet ยท Prophet ์˜ˆ์ธก ยท Streamlit ๋Œ€์‹œ๋ณด๋“œ")
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
12
  # CONFIG ------------------------------------------
13
  # -------------------------------------------------
14
  CSV_PATH = Path("price_data.csv")
15
+ PARQUET_PATH = Path("domae-202503.parquet") # 1996โ€‘2025โ€‘03 ์ผ๊ฐ„/์›”๊ฐ„ ๊ฐ€๊ฒฉ
16
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
17
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
18
 
 
25
  ITEM_CANDIDATES = {"item", "ํ’ˆ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
26
  PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ€๊ฒฉ", "avrg_prce"}
27
 
28
+
29
  def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
30
+ """Standardize column names to date/item/price and deduplicate."""
31
  col_map = {}
32
  for c in df.columns:
33
  lc = c.lower()
 
36
  elif lc in PRICE_CANDIDATES:
37
  col_map[c] = "price"
38
  elif lc in ITEM_CANDIDATES:
39
+ # first hit as item, second as species
40
  if "item" not in col_map.values():
41
  col_map[c] = "item"
42
  else:
43
  col_map[c] = "species"
44
  df = df.rename(columns=col_map)
45
 
46
+ # โ”€โ”€ handle duplicated columns after rename โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
47
+ if df.columns.duplicated().any():
48
+ df = df.loc[:, ~df.columns.duplicated()]
49
+
50
+ # โ”€โ”€ index datetime to column โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
51
  if "date" not in df.columns and df.index.dtype.kind == "M":
52
  df.reset_index(inplace=True)
53
  df.rename(columns={df.columns[0]: "date"}, inplace=True)
54
 
55
+ # โ”€โ”€ convert YYYYMM string to datetime โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
56
+ if "date" in df.columns and pd.api.types.is_object_dtype(df["date" ]):
57
  sample = str(df["date"].iloc[0])
58
  if sample.isdigit() and len(sample) in (6, 8):
59
  df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
60
 
61
+ # โ”€โ”€ build item from pdlt_nm + spcs_nm if needed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
62
  if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
63
  df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
64
 
65
+ # โ”€โ”€ merge item + species โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
66
  if {"item", "species"}.issubset(df.columns):
67
  df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
68
  df.drop(columns=["species"], inplace=True)
69
 
70
  return df
71
 
72
+
73
  @st.cache_data(show_spinner=False)
74
  def load_data() -> pd.DataFrame:
75
+ """Load price data from Parquet if available, else CSV. Handle flexible schema."""
76
  if PARQUET_PATH.exists():
77
  df = pd.read_parquet(PARQUET_PATH)
78
  elif CSV_PATH.exists():
 
82
  st.stop()
83
 
84
  df = _standardize_columns(df)
85
+
86
  missing = {c for c in ["date", "item", "price"] if c not in df.columns}
87
  if missing:
88
  st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
 
93
  df.sort_values("date", inplace=True)
94
  return df
95
 
96
+
97
  @st.cache_data(show_spinner=False)
98
  def get_items(df: pd.DataFrame):
99
  return sorted(df["item"].unique())
100
 
101
+
102
  @st.cache_data(show_spinner=False)
103
  def fit_prophet(df: pd.DataFrame, horizon_end: str):
104
  m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
 
124
  st.stop()
125
 
126
  # -------------------------------------------------
127
+ # MACRO FORECAST 1996โ€‘2030 ------------------------
128
  # -------------------------------------------------
129
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
130
  macro_df = item_df[item_df["date"] >= MACRO_START]
 
140
  st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
141
 
142
  # -------------------------------------------------
143
+ # MICRO FORECAST 2024โ€‘2026 ------------------------
144
  # -------------------------------------------------
145
  st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
146
 
 
161
  comp_fig = m_micro.plot_components(fc_micro)
162
  st.pyplot(comp_fig)
163
 
164
+ month_season = (fc_micro[["ds", "yearly"]]
165
+ .assign(month=lambda d: d.ds.dt.month)
166
+ .groupby("month")["yearly"].mean())
 
 
167
  st.markdown(
168
  f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
169
  f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
170
+ f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
 
171
 
172
  # -------------------------------------------------
173
  # CORRELATION HEATMAP -----------------------------
174
  # -------------------------------------------------
175
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
176
+ monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
177
+ .groupby(["month", "item"], as_index=False)["price"].mean()
178
+ .pivot(index="month", columns="item", values="price"))
 
 
 
179
 
180
  corr = monthly_pivot.corr()
181
  fig, ax = plt.subplots(figsize=(12, 10))
182
  mask = np.triu(np.ones_like(corr, dtype=bool))
183
+ sns.heatmap(c