yokoha commited on
Commit
0f95c64
ยท
verified ยท
1 Parent(s): dd6e62d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -67
app.py CHANGED
@@ -7,15 +7,31 @@ import seaborn as sns
7
  import matplotlib.pyplot as plt
8
  from datetime import date
9
  from pathlib import Path
 
 
10
 
11
  # -------------------------------------------------
12
  # CONFIG ------------------------------------------
13
  # -------------------------------------------------
14
  CSV_PATH = Path("price_data.csv")
15
- PARQUET_PATH = Path("domae-202503.parquet") # 1996โ€‘2025โ€‘03 ์ผ๊ฐ„/์›”๊ฐ„ ๊ฐ€๊ฒฉ
16
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
17
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layout="wide")
20
 
21
  # -------------------------------------------------
@@ -53,10 +69,11 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
53
  df.rename(columns={df.columns[0]: "date"}, inplace=True)
54
 
55
  # โ”€โ”€ convert YYYYMM string to datetime โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
56
- if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]): # Fixed typo here
57
- sample = str(df["date"].iloc[0])
58
- if sample.isdigit() and len(sample) in (6, 8):
59
- df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
 
60
 
61
  # โ”€โ”€ build item from pdlt_nm + spcs_nm if needed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
62
  if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
@@ -73,62 +90,105 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
73
  @st.cache_data(show_spinner=False)
74
  def load_data() -> pd.DataFrame:
75
  """Load price data from Parquet if available, else CSV. Handle flexible schema."""
76
- if PARQUET_PATH.exists():
77
- df = pd.read_parquet(PARQUET_PATH)
78
- elif CSV_PATH.exists():
79
- df = pd.read_csv(CSV_PATH)
80
- else:
81
- st.error("๐Ÿ’พ price_data.csv ๋˜๋Š” domae-202503.parquet ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
82
- st.stop()
83
-
84
- df = _standardize_columns(df)
85
-
86
- missing = {c for c in ["date", "item", "price"] if c not in df.columns}
87
- if missing:
88
- st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  st.stop()
90
 
91
- df["date"] = pd.to_datetime(df["date"], errors="coerce")
92
- df = df.dropna(subset=["date", "item", "price"])
93
- df.sort_values("date", inplace=True)
94
- return df
95
-
96
 
97
  @st.cache_data(show_spinner=False)
98
  def get_items(df: pd.DataFrame):
99
  return sorted(df["item"].unique())
100
 
101
 
102
- @st.cache_data(show_spinner=False)
103
  def fit_prophet(df: pd.DataFrame, horizon_end: str):
104
  # Make a copy and ensure we have data
105
  df = df.copy()
106
  df = df.dropna(subset=["date", "price"])
107
 
 
 
 
108
  if len(df) < 2:
109
- st.warning("๋ฐ์ดํ„ฐ ํฌ์ธํŠธ๊ฐ€ ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ์ธก์„ ์œ„ํ•ด์„œ๋Š” ์ตœ์†Œ 2๊ฐœ ์ด์ƒ์˜ ์œ ํšจ ๋ฐ์ดํ„ฐ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
110
  return None, None
111
 
112
  # Convert to Prophet format
113
  prophet_df = df.rename(columns={"date": "ds", "price": "y"})
114
 
115
- # Fit the model
116
- m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
117
- m.fit(prophet_df)
118
-
119
- # Generate future dates
120
- periods = max((pd.Timestamp(horizon_end) - df["date"].max()).days, 1)
121
- future = m.make_future_dataframe(periods=periods, freq="D")
122
-
123
- # Make predictions
124
- forecast = m.predict(future)
125
- return m, forecast
 
 
 
 
126
 
127
  # -------------------------------------------------
128
  # LOAD DATA ---------------------------------------
129
  # -------------------------------------------------
130
  raw_df = load_data()
131
 
 
 
 
 
132
  st.sidebar.header("๐Ÿ” ํ’ˆ๋ชฉ ์„ ํƒ")
133
  selected_item = st.sidebar.selectbox("ํ’ˆ๋ชฉ", get_items(raw_df))
134
  current_date = date.today()
@@ -143,29 +203,50 @@ if item_df.empty:
143
  # MACRO FORECAST 1996โ€‘2030 ------------------------
144
  # -------------------------------------------------
145
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
146
- macro_df = item_df[item_df["date"] >= MACRO_START].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # Add diagnostic info
149
  with st.expander("๋ฐ์ดํ„ฐ ์ง„๋‹จ"):
150
  st.write(f"- ์ „์ฒด ๋ฐ์ดํ„ฐ ์ˆ˜: {len(item_df)}")
151
- st.write(f"- {MACRO_START} ์ดํ›„ ๋ฐ์ดํ„ฐ ์ˆ˜: {len(macro_df)}")
152
- st.write(f"- ๊ธฐ๊ฐ„: {macro_df['date'].min()} ~ {macro_df['date'].max()}")
153
- st.write(macro_df.head())
 
 
 
154
 
155
  if len(macro_df) < 2:
156
- st.warning(f"{MACRO_START} ์ดํ›„ ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์ „์ฒด ๊ธฐ๊ฐ„ ๋ฐ์ดํ„ฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
157
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
158
  st.plotly_chart(fig, use_container_width=True)
159
  else:
160
  try:
161
- m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
 
 
162
  if m_macro is not None and fc_macro is not None:
163
- fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996โ€“2030")
164
- fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
165
  st.plotly_chart(fig_macro, use_container_width=True)
166
 
167
  latest_price = macro_df.iloc[-1]["price"]
168
- macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0]
 
 
 
169
  macro_pct = (macro_pred - latest_price) / latest_price * 100
170
  st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
171
  else:
@@ -173,7 +254,7 @@ else:
173
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
174
  st.plotly_chart(fig, use_container_width=True)
175
  except Exception as e:
176
- st.error(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
177
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
178
  st.plotly_chart(fig, use_container_width=True)
179
 
@@ -182,21 +263,40 @@ else:
182
  # -------------------------------------------------
183
  st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
184
 
185
- micro_df = item_df[item_df["date"] >= MICRO_START].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  if len(micro_df) < 2:
187
  st.warning(f"{MICRO_START} ์ดํ›„ ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
188
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ์ตœ๊ทผ ๊ฐ€๊ฒฉ")
189
  st.plotly_chart(fig, use_container_width=True)
190
  else:
191
  try:
192
- m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
 
 
193
  if m_micro is not None and fc_micro is not None:
194
- fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024โ€“2026")
195
- fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="Actual")
196
  st.plotly_chart(fig_micro, use_container_width=True)
197
 
198
  latest_price = micro_df.iloc[-1]["price"]
199
- micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0]
 
 
200
  micro_pct = (micro_pred - latest_price) / latest_price * 100
201
  st.metric("2026 ์˜ˆ์ธก๊ฐ€", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
202
  else:
@@ -209,16 +309,19 @@ else:
209
  # -------------------------------------------------
210
  with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
211
  if 'm_micro' in locals() and m_micro is not None and 'fc_micro' in locals() and fc_micro is not None:
212
- comp_fig = m_micro.plot_components(fc_micro)
213
- st.pyplot(comp_fig)
214
-
215
- month_season = (fc_micro[["ds", "yearly"]]
216
- .assign(month=lambda d: d.ds.dt.month)
217
- .groupby("month")["yearly"].mean())
218
- st.markdown(
219
- f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
220
- f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
221
- f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
 
 
 
222
  else:
223
  st.info("ํŒจํ„ด ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
224
 
@@ -226,17 +329,42 @@ with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
226
  # CORRELATION HEATMAP -----------------------------
227
  # -------------------------------------------------
228
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
229
- try:
230
- monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
231
- .groupby(["month", "item"], as_index=False)["price"].mean()
232
- .pivot(index="month", columns="item", values="price"))
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  if monthly_pivot.shape[1] > 1: # At least 2 items needed for correlation
 
 
 
 
235
  corr = monthly_pivot.corr()
 
 
236
  fig, ax = plt.subplots(figsize=(12, 10))
237
  mask = np.triu(np.ones_like(corr, dtype=bool))
238
- sns.heatmap(corr, mask=mask, annot=False, cmap="coolwarm", center=0,
 
 
 
 
239
  square=True, linewidths=.5, cbar_kws={"shrink": .5})
 
 
 
240
 
241
  # Highlight correlations with selected item
242
  if selected_item in corr.columns:
@@ -259,9 +387,10 @@ try:
259
  st.info("์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ํ’ˆ๋ชฉ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
260
  except Exception as e:
261
  st.error(f"์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
 
262
 
263
  # -------------------------------------------------
264
  # FOOTER ------------------------------------------
265
  # -------------------------------------------------
266
  st.markdown("---")
267
- st.caption("ยฉ 2024 ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก ์‹œ์Šคํ…œ | ๋ฐ์ดํ„ฐ ๋ถ„์„ ์ž๋™ํ™”")
 
7
  import matplotlib.pyplot as plt
8
  from datetime import date
9
  from pathlib import Path
10
+ import matplotlib.font_manager as fm
11
+ import matplotlib as mpl
12
 
13
  # -------------------------------------------------
14
  # CONFIG ------------------------------------------
15
  # -------------------------------------------------
16
  CSV_PATH = Path("price_data.csv")
17
+ PARQUET_PATH = Path("domae-202503.parquet")
18
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
19
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
20
 
21
+ # ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
22
+ # 1. ์‹œ์Šคํ…œ์— ์„ค์น˜๋œ ํ•œ๊ธ€ ํฐํŠธ ์ฐพ๊ธฐ
23
+ font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or
24
+ 'gulim' in f.name.lower() or 'malgun' in f.name.lower() or
25
+ 'nanum' in f.name.lower() or 'batang' in f.name.lower()]
26
+
27
+ if font_list:
28
+ font_name = font_list[0]
29
+ plt.rcParams['font.family'] = font_name
30
+ mpl.rcParams['axes.unicode_minus'] = False
31
+ else:
32
+ # ํฐํŠธ๊ฐ€ ์—†์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ํฐํŠธ ์„ค์ •
33
+ plt.rcParams['font.family'] = 'DejaVu Sans'
34
+
35
  st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layout="wide")
36
 
37
  # -------------------------------------------------
 
69
  df.rename(columns={df.columns[0]: "date"}, inplace=True)
70
 
71
  # โ”€โ”€ convert YYYYMM string to datetime โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
72
+ if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
73
+ if len(df) > 0: # ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
74
+ sample = str(df["date"].iloc[0])
75
+ if sample.isdigit() and len(sample) in (6, 8):
76
+ df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
77
 
78
  # โ”€โ”€ build item from pdlt_nm + spcs_nm if needed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
79
  if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
 
90
  @st.cache_data(show_spinner=False)
91
  def load_data() -> pd.DataFrame:
92
  """Load price data from Parquet if available, else CSV. Handle flexible schema."""
93
+ try:
94
+ if PARQUET_PATH.exists():
95
+ st.sidebar.info("Parquet ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
96
+ df = pd.read_parquet(PARQUET_PATH)
97
+ st.sidebar.success(f"Parquet ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
98
+ elif CSV_PATH.exists():
99
+ st.sidebar.info("CSV ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
100
+ df = pd.read_csv(CSV_PATH)
101
+ st.sidebar.success(f"CSV ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
102
+ else:
103
+ st.error("๐Ÿ’พ price_data.csv ๋˜๋Š” domae-202503.parquet ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
104
+ st.stop()
105
+
106
+ # ์›๋ณธ ๋ฐ์ดํ„ฐ ํ˜•ํƒœ ํ™•์ธ
107
+ st.sidebar.write("์›๋ณธ ๋ฐ์ดํ„ฐ ์ปฌ๋Ÿผ:", list(df.columns))
108
+
109
+ df = _standardize_columns(df)
110
+ st.sidebar.write("ํ‘œ์ค€ํ™” ํ›„ ์ปฌ๋Ÿผ:", list(df.columns))
111
+
112
+ missing = {c for c in ["date", "item", "price"] if c not in df.columns}
113
+ if missing:
114
+ st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
115
+ st.stop()
116
+
117
+ # ๋‚ ์งœ ๋ณ€ํ™˜ ์ „ํ›„ ๋ฐ์ดํ„ฐ ์ˆ˜ ํ™•์ธ
118
+ before_date_convert = len(df)
119
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
120
+ after_date_convert = df.dropna(subset=["date"]).shape[0]
121
+ if before_date_convert != after_date_convert:
122
+ st.warning(f"๋‚ ์งœ ๋ณ€ํ™˜ ์ค‘ {before_date_convert - after_date_convert}๊ฐœ ํ–‰์ด ์ œ์™ธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
123
+
124
+ # NA ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
125
+ before_na_drop = len(df)
126
+ df = df.dropna(subset=["date", "item", "price"])
127
+ after_na_drop = len(df)
128
+ if before_na_drop != after_na_drop:
129
+ st.warning(f"NA ์ œ๊ฑฐ ์ค‘ {before_na_drop - after_na_drop}๊ฐœ ํ–‰์ด ์ œ์™ธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
130
+
131
+ df.sort_values("date", inplace=True)
132
+
133
+ # ๏ฟฝ๏ฟฝ์ดํ„ฐ ๋‚ ์งœ ๋ฒ”์œ„ ํ™•์ธ
134
+ if len(df) > 0:
135
+ st.sidebar.write(f"๋ฐ์ดํ„ฐ ๋‚ ์งœ ๋ฒ”์œ„: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}")
136
+ st.sidebar.write(f"์ด ํ’ˆ๋ชฉ ์ˆ˜: {df['item'].nunique()}")
137
+ else:
138
+ st.error("์œ ํšจํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค!")
139
+
140
+ return df
141
+ except Exception as e:
142
+ st.error(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
143
  st.stop()
144
 
 
 
 
 
 
145
 
146
  @st.cache_data(show_spinner=False)
147
  def get_items(df: pd.DataFrame):
148
  return sorted(df["item"].unique())
149
 
150
 
151
+ @st.cache_data(show_spinner=False, ttl=3600)
152
  def fit_prophet(df: pd.DataFrame, horizon_end: str):
153
  # Make a copy and ensure we have data
154
  df = df.copy()
155
  df = df.dropna(subset=["date", "price"])
156
 
157
+ # ์ค‘๋ณต ๋‚ ์งœ ์ฒ˜๋ฆฌ - ๋™์ผ ๋‚ ์งœ์— ์—ฌ๋Ÿฌ ๊ฐ’์ด ์žˆ์œผ๋ฉด ํ‰๊ท ๊ฐ’ ์‚ฌ์šฉ
158
+ df = df.groupby("date")["price"].mean().reset_index()
159
+
160
  if len(df) < 2:
161
+ st.warning(f"๋ฐ์ดํ„ฐ ํฌ์ธํŠธ๊ฐ€ ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ์ธก์„ ์œ„ํ•ด์„œ๋Š” ์ตœ์†Œ 2๊ฐœ ์ด์ƒ์˜ ์œ ํšจ ๋ฐ์ดํ„ฐ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. (ํ˜„์žฌ {len(df)}๊ฐœ)")
162
  return None, None
163
 
164
  # Convert to Prophet format
165
  prophet_df = df.rename(columns={"date": "ds", "price": "y"})
166
 
167
+ try:
168
+ # Fit the model
169
+ m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
170
+ m.fit(prophet_df)
171
+
172
+ # Generate future dates
173
+ periods = max((pd.Timestamp(horizon_end) - df["date"].max()).days, 1)
174
+ future = m.make_future_dataframe(periods=periods, freq="D")
175
+
176
+ # Make predictions
177
+ forecast = m.predict(future)
178
+ return m, forecast
179
+ except Exception as e:
180
+ st.error(f"Prophet ๋ชจ๋ธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
181
+ return None, None
182
 
183
  # -------------------------------------------------
184
  # LOAD DATA ---------------------------------------
185
  # -------------------------------------------------
186
  raw_df = load_data()
187
 
188
+ if len(raw_df) == 0:
189
+ st.error("๋ฐ์ดํ„ฐ๊ฐ€ ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ํŒŒ์ผ์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
190
+ st.stop()
191
+
192
  st.sidebar.header("๐Ÿ” ํ’ˆ๋ชฉ ์„ ํƒ")
193
  selected_item = st.sidebar.selectbox("ํ’ˆ๋ชฉ", get_items(raw_df))
194
  current_date = date.today()
 
203
  # MACRO FORECAST 1996โ€‘2030 ------------------------
204
  # -------------------------------------------------
205
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
206
+
207
+ # ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ๋กœ์ง ๊ฐœ์„  - ์‹œ๊ฐ„ ๋ฒ”์œ„๋ฅผ ์กฐ์ •ํ•˜์—ฌ ๋” ๋งŽ์€ ๋ฐ์ดํ„ฐ ํฌํ•จ
208
+ try:
209
+ macro_start_dt = pd.Timestamp(MACRO_START)
210
+ # ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์œผ๋ฉด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ ์กฐ์ •
211
+ if len(item_df[item_df["date"] >= macro_start_dt]) < 10:
212
+ # ๊ฐ€์žฅ ์˜ค๋ž˜๋œ ๋‚ ์งœ๋ถ€ํ„ฐ ์‹œ์ž‘
213
+ macro_start_dt = item_df["date"].min()
214
+ st.info(f"์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ {macro_start_dt.strftime('%Y-%m-%d')}๋กœ ์กฐ์ •ํ–ˆ์Šต๋‹ˆ๋‹ค.")
215
+
216
+ macro_df = item_df[item_df["date"] >= macro_start_dt].copy()
217
+ except Exception as e:
218
+ st.error(f"๋‚ ์งœ ํ•„ํ„ฐ๋ง ์˜ค๋ฅ˜: {str(e)}")
219
+ macro_df = item_df.copy() # ํ•„ํ„ฐ๋ง ์—†์ด ์ „์ฒด ๋ฐ์ดํ„ฐ ์‚ฌ์šฉ
220
 
221
  # Add diagnostic info
222
  with st.expander("๋ฐ์ดํ„ฐ ์ง„๋‹จ"):
223
  st.write(f"- ์ „์ฒด ๋ฐ์ดํ„ฐ ์ˆ˜: {len(item_df)}")
224
+ st.write(f"- ๋ถ„์„ ๋ฐ์ดํ„ฐ ์ˆ˜: {len(macro_df)}")
225
+ if len(macro_df) > 0:
226
+ st.write(f"- ๊ธฐ๊ฐ„: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}")
227
+ st.dataframe(macro_df.head())
228
+ else:
229
+ st.write("๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
230
 
231
  if len(macro_df) < 2:
232
+ st.warning(f"{selected_item}์— ๋Œ€ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ์ „์ฒด ๊ธฐ๊ฐ„ ๋ฐ์ดํ„ฐ๋ฅผ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
233
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
234
  st.plotly_chart(fig, use_container_width=True)
235
  else:
236
  try:
237
+ with st.spinner("์žฅ๊ธฐ ์˜ˆ์ธก ๋ชจ๋ธ ์ƒ์„ฑ ์ค‘..."):
238
+ m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
239
+
240
  if m_macro is not None and fc_macro is not None:
241
+ fig_macro = px.line(fc_macro, x="ds", y="yhat", title="์žฅ๊ธฐ ์˜ˆ์ธก (1996โ€“2030)")
242
+ fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="์‹ค์ œ ๊ฐ€๊ฒฉ")
243
  st.plotly_chart(fig_macro, use_container_width=True)
244
 
245
  latest_price = macro_df.iloc[-1]["price"]
246
+ # 2030๋…„ ๋งˆ์ง€๋ง‰ ๋‚  ์ฐพ๊ธฐ
247
+ target_date = pd.Timestamp(MACRO_END)
248
+ close_dates = fc_macro.loc[(fc_macro["ds"] - target_date).abs().argsort()[:1], "ds"].values[0]
249
+ macro_pred = fc_macro.loc[fc_macro["ds"] == close_dates, "yhat"].iloc[0]
250
  macro_pct = (macro_pred - latest_price) / latest_price * 100
251
  st.metric("2030 ์˜ˆ์ธก๊ฐ€", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
252
  else:
 
254
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
255
  st.plotly_chart(fig, use_container_width=True)
256
  except Exception as e:
257
+ st.error(f"์žฅ๊ธฐ ์˜ˆ์ธก ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
258
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ๊ณผ๊ฑฐ ๊ฐ€๊ฒฉ")
259
  st.plotly_chart(fig, use_container_width=True)
260
 
 
263
  # -------------------------------------------------
264
  st.subheader("๐Ÿ”Ž 2024โ€“2026 ๋‹จ๊ธฐ ์˜ˆ์ธก")
265
 
266
+ # ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ๋กœ์ง ๊ฐœ์„ 
267
+ try:
268
+ micro_start_dt = pd.Timestamp(MICRO_START)
269
+ # ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์œผ๋ฉด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ ์กฐ์ •
270
+ if len(item_df[item_df["date"] >= micro_start_dt]) < 10:
271
+ # ์ตœ๊ทผ 30% ๋ฐ์ดํ„ฐ๋งŒ ์‚ฌ์šฉ
272
+ n = max(2, int(len(item_df) * 0.3))
273
+ micro_df = item_df.sort_values("date").tail(n).copy()
274
+ st.info(f"์ถฉ๋ถ„ํ•œ ์ตœ๊ทผ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ์ตœ๊ทผ {n}๊ฐœ ๋ฐ์ดํ„ฐ ํฌ์ธํŠธ๋งŒ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.")
275
+ else:
276
+ micro_df = item_df[item_df["date"] >= micro_start_dt].copy()
277
+ except Exception as e:
278
+ st.error(f"๋‹จ๊ธฐ ์˜ˆ์ธก ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ์˜ค๋ฅ˜: {str(e)}")
279
+ # ์ตœ๊ทผ 10๊ฐœ ๋ฐ์ดํ„ฐ ํฌ์ธํŠธ ์‚ฌ์šฉ
280
+ micro_df = item_df.sort_values("date").tail(10).copy()
281
+
282
  if len(micro_df) < 2:
283
  st.warning(f"{MICRO_START} ์ดํ›„ ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
284
  fig = px.line(item_df, x="date", y="price", title=f"{selected_item} ์ตœ๊ทผ ๊ฐ€๊ฒฉ")
285
  st.plotly_chart(fig, use_container_width=True)
286
  else:
287
  try:
288
+ with st.spinner("๋‹จ๊ธฐ ์˜ˆ์ธก ๋ชจ๋ธ ์ƒ์„ฑ ์ค‘..."):
289
+ m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
290
+
291
  if m_micro is not None and fc_micro is not None:
292
+ fig_micro = px.line(fc_micro, x="ds", y="yhat", title="๋‹จ๊ธฐ ์˜ˆ์ธก (2024โ€“2026)")
293
+ fig_micro.add_scatter(x=micro_df["date"], y=micro_df["price"], mode="lines", name="์‹ค์ œ ๊ฐ€๊ฒฉ")
294
  st.plotly_chart(fig_micro, use_container_width=True)
295
 
296
  latest_price = micro_df.iloc[-1]["price"]
297
+ target_date = pd.Timestamp(MICRO_END)
298
+ close_dates = fc_micro.loc[(fc_micro["ds"] - target_date).abs().argsort()[:1], "ds"].values[0]
299
+ micro_pred = fc_micro.loc[fc_micro["ds"] == close_dates, "yhat"].iloc[0]
300
  micro_pct = (micro_pred - latest_price) / latest_price * 100
301
  st.metric("2026 ์˜ˆ์ธก๊ฐ€", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
302
  else:
 
309
  # -------------------------------------------------
310
  with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
311
  if 'm_micro' in locals() and m_micro is not None and 'fc_micro' in locals() and fc_micro is not None:
312
+ try:
313
+ comp_fig = m_micro.plot_components(fc_micro)
314
+ st.pyplot(comp_fig)
315
+
316
+ month_season = (fc_micro[["ds", "yearly"]]
317
+ .assign(month=lambda d: d.ds.dt.month)
318
+ .groupby("month")["yearly"].mean())
319
+ st.markdown(
320
+ f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
321
+ f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
322
+ f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
323
+ except Exception as e:
324
+ st.error(f"์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
325
  else:
326
  st.info("ํŒจํ„ด ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
327
 
 
329
  # CORRELATION HEATMAP -----------------------------
330
  # -------------------------------------------------
331
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
 
 
 
 
332
 
333
+ try:
334
+ # ๋„ˆ๋ฌด ๋งŽ์€ ํ’ˆ๋ชฉ์ด ์žˆ์œผ๋ฉด ์ƒ์œ„ N๊ฐœ๋งŒ ์„ ํƒ
335
+ items_to_corr = raw_df['item'].value_counts().head(30).index.tolist()
336
+ if selected_item not in items_to_corr and selected_item in raw_df['item'].unique():
337
+ items_to_corr.append(selected_item)
338
+
339
+ filtered_df = raw_df[raw_df['item'].isin(items_to_corr)]
340
+
341
+ monthly_pivot = (filtered_df.assign(month=lambda d: d.date.dt.to_period("M"))
342
+ .groupby(["month", "item"], as_index=False)["price"].mean()
343
+ .pivot(index="month", columns="item", values="price"))
344
+
345
+ # ๊ฒฐ์ธก์น˜๊ฐ€ ๋„ˆ๋ฌด ๋งŽ์€ ์—ด ์ œ๊ฑฐ
346
+ threshold = 0.5 # 50% ์ด์ƒ ๊ฒฐ์ธก์น˜๊ฐ€ ์žˆ๋Š” ์—ด ์ œ๊ฑฐ
347
+ monthly_pivot = monthly_pivot.loc[:, monthly_pivot.isnull().mean() < threshold]
348
+
349
  if monthly_pivot.shape[1] > 1: # At least 2 items needed for correlation
350
+ # ๊ฒฐ์ธก์น˜ ์ฒ˜๋ฆฌ
351
+ monthly_pivot = monthly_pivot.fillna(method='ffill').fillna(method='bfill')
352
+
353
+ # ์ƒ๊ด€๊ด€๊ณ„ ๊ณ„์‚ฐ
354
  corr = monthly_pivot.corr()
355
+
356
+ # ์‹œ๊ฐํ™”
357
  fig, ax = plt.subplots(figsize=(12, 10))
358
  mask = np.triu(np.ones_like(corr, dtype=bool))
359
+
360
+ # ์—ฌ๊ธฐ์„œ ํฐํŠธ ์„ค์ • ๋‹ค์‹œ ํ™•์ธ
361
+ plt.title(f"{selected_item} ๊ด€๋ จ ์ƒ๊ด€๊ด€๊ณ„", fontsize=15)
362
+
363
+ sns.heatmap(corr, mask=mask, annot=False, cmap="coolwarm", center=0,
364
  square=True, linewidths=.5, cbar_kws={"shrink": .5})
365
+
366
+ plt.xticks(rotation=45, ha='right', fontsize=8)
367
+ plt.yticks(fontsize=8)
368
 
369
  # Highlight correlations with selected item
370
  if selected_item in corr.columns:
 
387
  st.info("์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ํ’ˆ๋ชฉ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
388
  except Exception as e:
389
  st.error(f"์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
390
+ st.write("์˜ค๋ฅ˜ ์ƒ์„ธ ์ •๋ณด:", str(e))
391
 
392
  # -------------------------------------------------
393
  # FOOTER ------------------------------------------
394
  # -------------------------------------------------
395
  st.markdown("---")
396
+ st.caption("ยฉ 2025 ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก ์‹œ์Šคํ…œ | ๋ฐ์ดํ„ฐ ๋ถ„์„ ์ž๋™ํ™”")