yokoha commited on
Commit
828f0f0
ยท
verified ยท
1 Parent(s): 3268778

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -26
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
@@ -12,7 +13,7 @@ from pathlib import Path
12
  # CONFIG ------------------------------------------
13
  # -------------------------------------------------
14
  CSV_PATH = Path("price_data.csv")
15
- PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 ์ผ๊ฐ„ ๊ฐ€๊ฒฉ
16
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
17
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
18
 
@@ -21,33 +22,46 @@ st.set_page_config(page_title="ํ’ˆ๋ชฉ๋ณ„ ๊ฐ€๊ฒฉ ์˜ˆ์ธก", page_icon="๐Ÿ“ˆ", layou
21
  # -------------------------------------------------
22
  # UTILITIES ---------------------------------------
23
  # -------------------------------------------------
24
- DATE_CANDIDATES = {"date", "ds", "ymd", "๋‚ ์งœ"}
25
- ITEM_CANDIDATES = {"item", "ํ’ˆ๋ชฉ", "code", "category"}
26
- PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ€๊ฒฉ"}
27
 
28
  def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
29
- """Rename date/item/price cols to date, item, price (in-place)."""
30
  col_map = {}
31
  for c in df.columns:
32
  lc = c.lower()
33
- if lc in DATE_CANDIDATES: # date
34
  col_map[c] = "date"
35
- elif lc in ITEM_CANDIDATES:
36
- col_map[c] = "item"
37
  elif lc in PRICE_CANDIDATES:
38
  col_map[c] = "price"
39
- df.rename(columns=col_map, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # date might be index
42
- if "date" not in df.columns:
43
- if df.index.dtype.kind == "M":
44
- df.reset_index(inplace=True)
45
- df.rename(columns={df.columns[0]: "date"}, inplace=True)
46
  return df
47
 
48
  @st.cache_data(show_spinner=False)
49
  def load_data() -> pd.DataFrame:
50
- """Load price data from Parquet if available, else CSV. Tries to infer column names."""
51
  if PARQUET_PATH.exists():
52
  df = pd.read_parquet(PARQUET_PATH)
53
  elif CSV_PATH.exists():
@@ -57,14 +71,13 @@ def load_data() -> pd.DataFrame:
57
  st.stop()
58
 
59
  df = _standardize_columns(df)
60
-
61
  missing = {c for c in ["date", "item", "price"] if c not in df.columns}
62
  if missing:
63
  st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
64
  st.stop()
65
 
66
  df["date"] = pd.to_datetime(df["date"], errors="coerce")
67
- df.dropna(subset=["date", "item", "price"], inplace=True)
68
  df.sort_values("date", inplace=True)
69
  return df
70
 
@@ -134,21 +147,27 @@ with st.expander("๐Ÿ“† ์‹œ์ฆˆ๋„๋ฆฌํ‹ฐ & ํŒจํ„ด ์„ค๋ช…"):
134
  comp_fig = m_micro.plot_components(fc_micro)
135
  st.pyplot(comp_fig)
136
 
137
- month_season = (fc_micro[["ds", "yearly"]]
138
- .assign(month=lambda d: d.ds.dt.month)
139
- .groupby("month")["yearly"].mean())
 
 
140
  st.markdown(
141
  f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
142
  f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
143
- f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}")
 
144
 
145
  # -------------------------------------------------
146
  # CORRELATION HEATMAP -----------------------------
147
  # -------------------------------------------------
148
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
149
- monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
150
- .groupby(["month", "item"], as_index=False)["price"].mean()
151
- .pivot(index="month", columns="item", values="price"))
 
 
 
152
 
153
  corr = monthly_pivot.corr()
154
  fig, ax = plt.subplots(figsize=(12, 10))
@@ -162,8 +181,14 @@ st.info("๋นจ๊ฐ„ ์˜์—ญ: ๊ฐ€๊ฒฉ ๋™์กฐํ™” / ํŒŒ๋ž€ ์˜์—ญ: ๋Œ€์ฒด์žฌ ๊ฐ€๋Šฅ์„ฑ")
162
  # VOLATILITY --------------------------------------
163
  # -------------------------------------------------
164
  st.subheader("๐Ÿ“Š 30์ผ ์ด๋™ ํ‘œ์ค€ํŽธ์ฐจ (๊ฐ€๊ฒฉ ๋ณ€๋™์„ฑ)")
165
- vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
 
 
 
 
 
 
166
  fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
167
  st.plotly_chart(fig_vol, use_container_width=True)
168
 
169
- st.caption("๋ฐ์ดํ„ฐ: domae-202503.parquet ยท Prophet ์˜ˆ์ธก ยท Streamlit ๋Œ€์‹œ๋ณด๋“œ")
 
1
+
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
 
13
  # CONFIG ------------------------------------------
14
  # -------------------------------------------------
15
  CSV_PATH = Path("price_data.csv")
16
+ PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 ์ผ๊ฐ„/์›”๊ฐ„ ๊ฐ€๊ฒฉ
17
  MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
18
  MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
19
 
 
22
  # -------------------------------------------------
23
  # UTILITIES ---------------------------------------
24
  # -------------------------------------------------
25
+ DATE_CANDIDATES = {"date", "ds", "ymd", "๋‚ ์งœ", "prce_reg_mm", "etl_ldg_dt"}
26
+ ITEM_CANDIDATES = {"item", "ํ’ˆ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
27
+ PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ€๊ฒฉ", "avrg_prce"}
28
 
29
  def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
30
+ """Rename date/item/price columns to date, item, price. Create composite item if needed."""
31
  col_map = {}
32
  for c in df.columns:
33
  lc = c.lower()
34
+ if lc in DATE_CANDIDATES:
35
  col_map[c] = "date"
 
 
36
  elif lc in PRICE_CANDIDATES:
37
  col_map[c] = "price"
38
+ elif lc in ITEM_CANDIDATES:
39
+ if "item" not in col_map.values():
40
+ col_map[c] = "item"
41
+ else:
42
+ col_map[c] = "species"
43
+ df = df.rename(columns=col_map)
44
+
45
+ if "date" not in df.columns and df.index.dtype.kind == "M":
46
+ df.reset_index(inplace=True)
47
+ df.rename(columns={df.columns[0]: "date"}, inplace=True)
48
+
49
+ if "date" in df.columns and df["date"].dtype == object:
50
+ sample = str(df["date"].iloc[0])
51
+ if sample.isdigit() and len(sample) in (6, 8):
52
+ df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
53
+
54
+ if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
55
+ df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
56
+
57
+ if {"item", "species"}.issubset(df.columns):
58
+ df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
59
+ df.drop(columns=["species"], inplace=True)
60
 
 
 
 
 
 
61
  return df
62
 
63
  @st.cache_data(show_spinner=False)
64
  def load_data() -> pd.DataFrame:
 
65
  if PARQUET_PATH.exists():
66
  df = pd.read_parquet(PARQUET_PATH)
67
  elif CSV_PATH.exists():
 
71
  st.stop()
72
 
73
  df = _standardize_columns(df)
 
74
  missing = {c for c in ["date", "item", "price"] if c not in df.columns}
75
  if missing:
76
  st.error(f"ํ•„์ˆ˜ ์ปฌ๋Ÿผ ๋ˆ„๋ฝ: {', '.join(missing)} โ€” ํŒŒ์ผ ์ปฌ๋Ÿผ๋ช…์„ ํ™•์ธํ•˜์„ธ์š”.")
77
  st.stop()
78
 
79
  df["date"] = pd.to_datetime(df["date"], errors="coerce")
80
+ df = df.dropna(subset=["date", "item", "price"])
81
  df.sort_values("date", inplace=True)
82
  return df
83
 
 
147
  comp_fig = m_micro.plot_components(fc_micro)
148
  st.pyplot(comp_fig)
149
 
150
+ month_season = (
151
+ fc_micro[["ds", "yearly"]]
152
+ .assign(month=lambda d: d.ds.dt.month)
153
+ .groupby("month")["yearly"].mean()
154
+ )
155
  st.markdown(
156
  f"**์—ฐ๊ฐ„ ํ”ผํฌ ์›”:** {int(month_season.idxmax())}์›” \n"
157
  f"**์—ฐ๊ฐ„ ์ €์  ์›”:** {int(month_season.idxmin())}์›” \n"
158
+ f"**์—ฐ๊ฐ„ ๋ณ€๋™ํญ:** {month_season.max() - month_season.min():.1f}"
159
+ )
160
 
161
  # -------------------------------------------------
162
  # CORRELATION HEATMAP -----------------------------
163
  # -------------------------------------------------
164
  st.subheader("๐Ÿงฎ ํ’ˆ๋ชฉ ๊ฐ„ ์ƒ๊ด€๊ด€๊ณ„")
165
+ monthly_pivot = (
166
+ raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
167
+ .groupby(["month", "item"], as_index=False)["price"]
168
+ .mean()
169
+ .pivot(index="month", columns="item", values="price")
170
+ )
171
 
172
  corr = monthly_pivot.corr()
173
  fig, ax = plt.subplots(figsize=(12, 10))
 
181
  # VOLATILITY --------------------------------------
182
  # -------------------------------------------------
183
  st.subheader("๐Ÿ“Š 30์ผ ์ด๋™ ํ‘œ์ค€ํŽธ์ฐจ (๊ฐ€๊ฒฉ ๋ณ€๋™์„ฑ)")
184
+ vol = (
185
+ item_df.set_index("date")["price"]
186
+ .rolling(30)
187
+ .std()
188
+ .dropna()
189
+ .reset_index()
190
+ )
191
  fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
192
  st.plotly_chart(fig_vol, use_container_width=True)
193
 
194
+ st.caption("๋ฐ์ดํ„ฐ: domae-202503.parquet ยท Prophet ์˜ˆ์ธก ยท Streamlit ๋Œ€์‹œ๋ณด๋“œ")