yokoha commited on
Commit
0b12519
ยท
verified ยท
1 Parent(s): 0f95c64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -84
app.py CHANGED
@@ -3,7 +3,6 @@ import pandas as pd
3
  import numpy as np
4
  from prophet import Prophet
5
  import plotly.express as px
6
- import seaborn as sns
7
  import matplotlib.pyplot as plt
8
  from datetime import date
9
  from pathlib import Path
@@ -13,13 +12,11 @@ import matplotlib as mpl
13
  # -------------------------------------------------
14
  # CONFIG ------------------------------------------
15
  # -------------------------------------------------
16
- CSV_PATH = Path("price_data.csv")
17
- PARQUET_PATH = Path("domae-202503.parquet")
18
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
19
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
20
 
21
  # ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
22
- # 1. ์‹œ์Šคํ…œ์— ์„ค์น˜๋œ ํ•œ๊ธ€ ํฐํŠธ ์ฐพ๊ธฐ
23
  font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or
24
  'gulim' in f.name.lower() or 'malgun' in f.name.lower() or
25
  'nanum' in f.name.lower() or 'batang' in f.name.lower()]
@@ -29,7 +26,6 @@ if font_list:
29
  plt.rcParams['font.family'] = font_name
30
  mpl.rcParams['axes.unicode_minus'] = False
31
  else:
32
- # ํฐํŠธ๊ฐ€ ์—†์„ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ํฐํŠธ ์„ค์ •
33
  plt.rcParams['font.family'] = 'DejaVu Sans'
34
 
35
  st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layout="wide")
@@ -70,7 +66,7 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
70
 
71
  # โ”€โ”€ convert YYYYMM string to datetime โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
72
  if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
73
- if len(df) > 0: # ๋ฐ์ดํ„ฐ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
74
  sample = str(df["date"].iloc[0])
75
  if sample.isdigit() and len(sample) in (6, 8):
76
  df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
@@ -89,20 +85,18 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
89
 
90
  @st.cache_data(show_spinner=False)
91
  def load_data() -> pd.DataFrame:
92
- """Load price data from Parquet if available, else CSV. Handle flexible schema."""
93
  try:
94
- if PARQUET_PATH.exists():
95
- st.sidebar.info("Parquet ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
96
- df = pd.read_parquet(PARQUET_PATH)
97
- st.sidebar.success(f"Parquet ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
98
- elif CSV_PATH.exists():
99
- st.sidebar.info("CSV ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
100
- df = pd.read_csv(CSV_PATH)
101
- st.sidebar.success(f"CSV ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
102
- else:
103
- st.error("๐Ÿ’พ price_data.csv ๋˜๋Š” domae-202503.parquet ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
104
  st.stop()
105
-
 
 
 
 
 
 
106
  # ์›๋ณธ ๋ฐ์ดํ„ฐ ํ˜•ํƒœ ํ™•์ธ
107
  st.sidebar.write("์›๋ณธ ๋ฐ์ดํ„ฐ ์ปฌ๋Ÿผ:", list(df.columns))
108
 
@@ -114,7 +108,7 @@ def load_data() -> pd.DataFrame:
114
  st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
115
  st.stop()
116
 
117
- # ๋‚ ์งœ ๋ณ€ํ™˜ ์ „ํ›„ ๋ฐ์ดํ„ฐ ์ˆ˜ ํ™•์ธ
118
  before_date_convert = len(df)
119
  df["date"] = pd.to_datetime(df["date"], errors="coerce")
120
  after_date_convert = df.dropna(subset=["date"]).shape[0]
@@ -140,6 +134,9 @@ def load_data() -> pd.DataFrame:
140
  return df
141
  except Exception as e:
142
  st.error(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
 
 
 
143
  st.stop()
144
 
145
 
@@ -204,7 +201,7 @@ if item_df.empty:
204
  # -------------------------------------------------
205
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
206
 
207
- # ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ๋กœ์ง ๊ฐœ์„  - ์‹œ๊ฐ„ ๋ฒ”์œ„๋ฅผ ์กฐ์ •ํ•˜์—ฌ ๋” ๋งŽ์€ ๋ฐ์ดํ„ฐ ํฌํ•จ
208
  try:
209
  macro_start_dt = pd.Timestamp(MACRO_START)
210
  # ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์œผ๋ฉด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ ์กฐ์ •
@@ -325,70 +322,6 @@ with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
325
  else:
326
  st.info("ํŒจํ„ด ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
327
 
328
- # -------------------------------------------------
329
- # CORRELATION HEATMAP -----------------------------
330
- # -------------------------------------------------
331
- st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
332
-
333
- try:
334
- # ๋„ˆ๋ฌด ๋งŽ์€ ํ’ˆ๋ชฉ์ด ์žˆ์œผ๋ฉด ์ƒ์œ„ N๊ฐœ๋งŒ ์„ ํƒ
335
- items_to_corr = raw_df['item'].value_counts().head(30).index.tolist()
336
- if selected_item not in items_to_corr and selected_item in raw_df['item'].unique():
337
- items_to_corr.append(selected_item)
338
-
339
- filtered_df = raw_df[raw_df['item'].isin(items_to_corr)]
340
-
341
- monthly_pivot = (filtered_df.assign(month=lambda d: d.date.dt.to_period("M"))
342
- .groupby(["month", "item"], as_index=False)["price"].mean()
343
- .pivot(index="month", columns="item", values="price"))
344
-
345
- # ๊ฒฐ์ธก์น˜๊ฐ€ ๋„ˆ๋ฌด ๋งŽ์€ ์—ด ์ œ๊ฑฐ
346
- threshold = 0.5 # 50% ์ด์ƒ ๊ฒฐ์ธก์น˜๊ฐ€ ์žˆ๋Š” ์—ด ์ œ๊ฑฐ
347
- monthly_pivot = monthly_pivot.loc[:, monthly_pivot.isnull().mean() < threshold]
348
-
349
- if monthly_pivot.shape[1] > 1: # At least 2 items needed for correlation
350
- # ๊ฒฐ์ธก์น˜ ์ฒ˜๋ฆฌ
351
- monthly_pivot = monthly_pivot.fillna(method='ffill').fillna(method='bfill')
352
-
353
- # ์ƒ๊ด€๊ด€๊ณ„ ๊ณ„์‚ฐ
354
- corr = monthly_pivot.corr()
355
-
356
- # ์‹œ๊ฐํ™”
357
- fig, ax = plt.subplots(figsize=(12, 10))
358
- mask = np.triu(np.ones_like(corr, dtype=bool))
359
-
360
- # ์—ฌ๊ธฐ์„œ ํฐํŠธ ์„ค์ • ๋‹ค์‹œ ํ™•์ธ
361
- plt.title(f"{selected_item} ๊ด€๋ จ ์ƒ๊ด€๊ด€๊ณ„", fontsize=15)
362
-
363
- sns.heatmap(corr, mask=mask, annot=False, cmap="coolwarm", center=0,
364
- square=True, linewidths=.5, cbar_kws={"shrink": .5})
365
-
366
- plt.xticks(rotation=45, ha='right', fontsize=8)
367
- plt.yticks(fontsize=8)
368
-
369
- # Highlight correlations with selected item
370
- if selected_item in corr.columns:
371
- item_corr = corr[selected_item].sort_values(ascending=False)
372
- top_corr = item_corr.drop(selected_item).head(5)
373
- bottom_corr = item_corr.drop(selected_item).tail(5)
374
-
375
- col1, col2 = st.columns(2)
376
- with col1:
377
- st.markdown(f"**{selected_item}์™€ ์ƒ๊ด€๊ด€๊ณ„ ๋†’์€ ํ’ˆ๋ชฉ**")
378
- for item, val in top_corr.items():
379
- st.write(f"{item}: {val:.2f}")
380
- with col2:
381
- st.markdown(f"**{selected_item}์™€ ์ƒ๊ด€๊ด€๊ณ„ ๋‚ฎ์€ ํ’ˆ๋ชฉ**")
382
- for item, val in bottom_corr.items():
383
- st.write(f"{item}: {val:.2f}")
384
-
385
- st.pyplot(fig)
386
- else:
387
- st.info("์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ํ’ˆ๋ชฉ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
388
- except Exception as e:
389
- st.error(f"์ƒ๊ด€๊ด€๊ณ„ ๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}")
390
- st.write("์˜ค๋ฅ˜ ์ƒ์„ธ ์ •๋ณด:", str(e))
391
-
392
  # -------------------------------------------------
393
  # FOOTER ------------------------------------------
394
  # -------------------------------------------------
 
3
  import numpy as np
4
  from prophet import Prophet
5
  import plotly.express as px
 
6
  import matplotlib.pyplot as plt
7
  from datetime import date
8
  from pathlib import Path
 
12
  # -------------------------------------------------
13
  # CONFIG ------------------------------------------
14
  # -------------------------------------------------
15
+ CSV_PATH = Path("2025-domae.csv") # ํŒŒ์ผ ๊ฒฝ๋กœ ์ˆ˜์ •
 
16
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
17
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
18
 
19
  # ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
 
20
  font_list = [f.name for f in fm.fontManager.ttflist if 'gothic' in f.name.lower() or
21
  'gulim' in f.name.lower() or 'malgun' in f.name.lower() or
22
  'nanum' in f.name.lower() or 'batang' in f.name.lower()]
 
26
  plt.rcParams['font.family'] = font_name
27
  mpl.rcParams['axes.unicode_minus'] = False
28
  else:
 
29
  plt.rcParams['font.family'] = 'DejaVu Sans'
30
 
31
  st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layout="wide")
 
66
 
67
  # โ”€โ”€ convert YYYYMM string to datetime โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
68
  if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
69
+ if len(df) > 0:
70
  sample = str(df["date"].iloc[0])
71
  if sample.isdigit() and len(sample) in (6, 8):
72
  df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
 
85
 
86
  @st.cache_data(show_spinner=False)
87
  def load_data() -> pd.DataFrame:
88
+ """Load price data from CSV file."""
89
  try:
90
+ if not CSV_PATH.exists():
91
+ st.error(f"๐Ÿ’พ {CSV_PATH} ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
 
 
 
 
 
 
 
 
92
  st.stop()
93
+
94
+ st.sidebar.info(f"{CSV_PATH} ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์˜ต๋‹ˆ๋‹ค.")
95
+
96
+ # CSV ํŒŒ์ผ ์ง์ ‘ ๋กœ๋“œ
97
+ df = pd.read_csv(CSV_PATH)
98
+ st.sidebar.success(f"CSV ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์™„๋ฃŒ: {len(df)}๊ฐœ ํ–‰")
99
+
100
  # ์›๋ณธ ๋ฐ์ดํ„ฐ ํ˜•ํƒœ ํ™•์ธ
101
  st.sidebar.write("์›๋ณธ ๋ฐ์ดํ„ฐ ์ปฌ๋Ÿผ:", list(df.columns))
102
 
 
108
  st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
109
  st.stop()
110
 
111
+ # ๋‚ ์งœ ๋ณ€ํ™˜
112
  before_date_convert = len(df)
113
  df["date"] = pd.to_datetime(df["date"], errors="coerce")
114
  after_date_convert = df.dropna(subset=["date"]).shape[0]
 
134
  return df
135
  except Exception as e:
136
  st.error(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
137
+ # ์˜ค๋ฅ˜ ์ƒ์„ธ ์ •๋ณด ํ‘œ์‹œ
138
+ import traceback
139
+ st.code(traceback.format_exc())
140
  st.stop()
141
 
142
 
 
201
  # -------------------------------------------------
202
  st.header(f"๐Ÿ“ˆ {selected_item} ๊ฐ€๊ฒฉ ์˜ˆ์ธก ๋Œ€์‹œ๋ณด๋“œ")
203
 
204
+ # ๋ฐ์ดํ„ฐ ํ•„ํ„ฐ๋ง ๋กœ์ง ๊ฐœ์„ 
205
  try:
206
  macro_start_dt = pd.Timestamp(MACRO_START)
207
  # ๋ฐ์ดํ„ฐ๊ฐ€ ์ถฉ๋ถ„ํ•˜์ง€ ์•Š์œผ๋ฉด ์‹œ์ž‘ ๋‚ ์งœ๋ฅผ ์กฐ์ •
 
322
  else:
323
  st.info("ํŒจํ„ด ๋ถ„์„์„ ์œ„ํ•œ ์ถฉ๋ถ„ํ•œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  # -------------------------------------------------
326
  # FOOTER ------------------------------------------
327
  # -------------------------------------------------