Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from pathlib import Path
|
|
12 |
# CONFIG ------------------------------------------
|
13 |
# -------------------------------------------------
|
14 |
CSV_PATH = Path("price_data.csv")
|
15 |
-
PARQUET_PATH = Path("domae-202503.parquet") # 1996
|
16 |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
|
17 |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
|
18 |
|
@@ -21,18 +21,50 @@ st.set_page_config(page_title="ํ๋ชฉ๋ณ ๊ฐ๊ฒฉ ์์ธก", page_icon="๐", layou
|
|
21 |
# -------------------------------------------------
|
22 |
# UTILITIES ---------------------------------------
|
23 |
# -------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
@st.cache_data(show_spinner=False)
|
25 |
def load_data() -> pd.DataFrame:
|
26 |
-
"""Load price data from Parquet if available, else CSV."""
|
27 |
if PARQUET_PATH.exists():
|
28 |
df = pd.read_parquet(PARQUET_PATH)
|
29 |
elif CSV_PATH.exists():
|
30 |
df = pd.read_csv(CSV_PATH)
|
31 |
else:
|
32 |
-
st.error("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
st.stop()
|
34 |
-
|
35 |
-
df["date"] = pd.to_datetime(df["date"])
|
|
|
36 |
df.sort_values("date", inplace=True)
|
37 |
return df
|
38 |
|
@@ -44,7 +76,8 @@ def get_items(df: pd.DataFrame):
|
|
44 |
def fit_prophet(df: pd.DataFrame, horizon_end: str):
|
45 |
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
|
46 |
m.fit(df.rename(columns={"date": "ds", "price": "y"}))
|
47 |
-
|
|
|
48 |
forecast = m.predict(future)
|
49 |
return m, forecast
|
50 |
|
@@ -64,12 +97,11 @@ if item_df.empty:
|
|
64 |
st.stop()
|
65 |
|
66 |
# -------------------------------------------------
|
67 |
-
#
|
68 |
# -------------------------------------------------
|
69 |
st.header(f"๐ {selected_item} ๊ฐ๊ฒฉ ์์ธก ๋์๋ณด๋")
|
70 |
-
|
71 |
-
# Macro forecast 1996โ2030
|
72 |
macro_df = item_df[item_df["date"] >= MACRO_START]
|
|
|
73 |
m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
|
74 |
fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996โ2030")
|
75 |
fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
|
@@ -80,8 +112,11 @@ macro_pred = fc_macro.loc[fc_macro["ds"] == MACRO_END, "yhat"].iloc[0]
|
|
80 |
macro_pct = (macro_pred - latest_price) / latest_price * 100
|
81 |
st.metric("2030 ์์ธก๊ฐ", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
|
82 |
|
83 |
-
#
|
|
|
|
|
84 |
st.subheader("๐ 2024โ2026 ๋จ๊ธฐ ์์ธก")
|
|
|
85 |
micro_df = item_df[item_df["date"] >= MICRO_START]
|
86 |
m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
|
87 |
fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024โ2026")
|
@@ -92,36 +127,43 @@ micro_pred = fc_micro.loc[fc_micro["ds"] == MICRO_END, "yhat"].iloc[0]
|
|
92 |
micro_pct = (micro_pred - latest_price) / latest_price * 100
|
93 |
st.metric("2026 ์์ธก๊ฐ", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
|
94 |
|
95 |
-
#
|
|
|
|
|
96 |
with st.expander("๐ ์์ฆ๋๋ฆฌํฐ & ํจํด ์ค๋ช
"):
|
97 |
comp_fig = m_micro.plot_components(fc_micro)
|
98 |
st.pyplot(comp_fig)
|
|
|
99 |
month_season = (fc_micro[["ds", "yearly"]]
|
100 |
.assign(month=lambda d: d.ds.dt.month)
|
101 |
.groupby("month")["yearly"].mean())
|
102 |
st.markdown(
|
103 |
-
f"**์ฐ๊ฐ ํผํฌ ์:** {int(month_season.idxmax())}
|
104 |
-
f"**์ฐ๊ฐ ์ ์ ์:** {int(month_season.idxmin())}
|
105 |
f"**์ฐ๊ฐ ๋ณ๋ํญ:** {month_season.max() - month_season.min():.1f}")
|
106 |
|
107 |
-
#
|
|
|
|
|
108 |
st.subheader("๐งฎ ํ๋ชฉ ๊ฐ ์๊ด๊ด๊ณ")
|
109 |
monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
|
110 |
.groupby(["month", "item"], as_index=False)["price"].mean()
|
111 |
.pivot(index="month", columns="item", values="price"))
|
112 |
|
113 |
corr = monthly_pivot.corr()
|
114 |
-
mask = np.triu(np.ones_like(corr, dtype=bool))
|
115 |
fig, ax = plt.subplots(figsize=(12, 10))
|
|
|
116 |
sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax)
|
117 |
st.pyplot(fig)
|
118 |
|
119 |
-
st.info("๋นจ๊ฐ ์์ญ: ๊ฐ๊ฒฉ ๋์กฐํ / ํ๋ ์์ญ: ๋์ฒด์ฌ
|
120 |
|
121 |
-
#
|
|
|
|
|
122 |
st.subheader("๐ 30์ผ ์ด๋ ํ์คํธ์ฐจ (๊ฐ๊ฒฉ ๋ณ๋์ฑ)")
|
123 |
vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
|
124 |
fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
|
125 |
st.plotly_chart(fig_vol, use_container_width=True)
|
126 |
|
127 |
-
st.caption("๋ฐ์ดํฐ: domae-202503.parquet ยท Prophet ์์ธก ยท Streamlit ๋์๋ณด๋")
|
|
|
12 |
# CONFIG ------------------------------------------
|
13 |
# -------------------------------------------------
|
14 |
CSV_PATH = Path("price_data.csv")
|
15 |
+
PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 ์ผ๊ฐ ๊ฐ๊ฒฉ
|
16 |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
|
17 |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
|
18 |
|
|
|
21 |
# -------------------------------------------------
|
22 |
# UTILITIES ---------------------------------------
|
23 |
# -------------------------------------------------
|
24 |
+
DATE_CANDIDATES = {"date", "ds", "ymd", "๋ ์ง"}
|
25 |
+
ITEM_CANDIDATES = {"item", "ํ๋ชฉ", "code", "category"}
|
26 |
+
PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ๊ฒฉ"}
|
27 |
+
|
28 |
+
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
29 |
+
"""Rename date/item/price cols to date, item, price (in-place)."""
|
30 |
+
col_map = {}
|
31 |
+
for c in df.columns:
|
32 |
+
lc = c.lower()
|
33 |
+
if lc in DATE_CANDIDATES: # date
|
34 |
+
col_map[c] = "date"
|
35 |
+
elif lc in ITEM_CANDIDATES:
|
36 |
+
col_map[c] = "item"
|
37 |
+
elif lc in PRICE_CANDIDATES:
|
38 |
+
col_map[c] = "price"
|
39 |
+
df.rename(columns=col_map, inplace=True)
|
40 |
+
|
41 |
+
# date might be index
|
42 |
+
if "date" not in df.columns:
|
43 |
+
if df.index.dtype.kind == "M":
|
44 |
+
df.reset_index(inplace=True)
|
45 |
+
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
46 |
+
return df
|
47 |
+
|
48 |
@st.cache_data(show_spinner=False)
|
49 |
def load_data() -> pd.DataFrame:
|
50 |
+
"""Load price data from Parquet if available, else CSV. Tries to infer column names."""
|
51 |
if PARQUET_PATH.exists():
|
52 |
df = pd.read_parquet(PARQUET_PATH)
|
53 |
elif CSV_PATH.exists():
|
54 |
df = pd.read_csv(CSV_PATH)
|
55 |
else:
|
56 |
+
st.error("๐พ price_data.csv ๋๋ domae-202503.parquet ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
57 |
+
st.stop()
|
58 |
+
|
59 |
+
df = _standardize_columns(df)
|
60 |
+
|
61 |
+
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
62 |
+
if missing:
|
63 |
+
st.error(f"ํ์ ์ปฌ๋ผ ๋๋ฝ: {', '.join(missing)} โ ํ์ผ ์ปฌ๋ผ๋ช
์ ํ์ธํ์ธ์.")
|
64 |
st.stop()
|
65 |
+
|
66 |
+
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
67 |
+
df.dropna(subset=["date", "item", "price"], inplace=True)
|
68 |
df.sort_values("date", inplace=True)
|
69 |
return df
|
70 |
|
|
|
76 |
def fit_prophet(df: pd.DataFrame, horizon_end: str):
|
77 |
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
|
78 |
m.fit(df.rename(columns={"date": "ds", "price": "y"}))
|
79 |
+
periods = (pd.Timestamp(horizon_end) - df["date"].max()).days
|
80 |
+
future = m.make_future_dataframe(periods=periods, freq="D")
|
81 |
forecast = m.predict(future)
|
82 |
return m, forecast
|
83 |
|
|
|
97 |
st.stop()
|
98 |
|
99 |
# -------------------------------------------------
|
100 |
+
# MACRO FORECAST 1996-2030 ------------------------
|
101 |
# -------------------------------------------------
|
102 |
st.header(f"๐ {selected_item} ๊ฐ๊ฒฉ ์์ธก ๋์๋ณด๋")
|
|
|
|
|
103 |
macro_df = item_df[item_df["date"] >= MACRO_START]
|
104 |
+
|
105 |
m_macro, fc_macro = fit_prophet(macro_df, MACRO_END)
|
106 |
fig_macro = px.line(fc_macro, x="ds", y="yhat", title="Macro Forecast 1996โ2030")
|
107 |
fig_macro.add_scatter(x=macro_df["date"], y=macro_df["price"], mode="lines", name="Actual")
|
|
|
112 |
macro_pct = (macro_pred - latest_price) / latest_price * 100
|
113 |
st.metric("2030 ์์ธก๊ฐ", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
|
114 |
|
115 |
+
# -------------------------------------------------
|
116 |
+
# MICRO FORECAST 2024-2026 ------------------------
|
117 |
+
# -------------------------------------------------
|
118 |
st.subheader("๐ 2024โ2026 ๋จ๊ธฐ ์์ธก")
|
119 |
+
|
120 |
micro_df = item_df[item_df["date"] >= MICRO_START]
|
121 |
m_micro, fc_micro = fit_prophet(micro_df, MICRO_END)
|
122 |
fig_micro = px.line(fc_micro, x="ds", y="yhat", title="Micro Forecast 2024โ2026")
|
|
|
127 |
micro_pct = (micro_pred - latest_price) / latest_price * 100
|
128 |
st.metric("2026 ์์ธก๊ฐ", f"{micro_pred:,.0f}", f"{micro_pct:+.1f}%")
|
129 |
|
130 |
+
# -------------------------------------------------
|
131 |
+
# SEASONALITY & PATTERN ---------------------------
|
132 |
+
# -------------------------------------------------
|
133 |
with st.expander("๐ ์์ฆ๋๋ฆฌํฐ & ํจํด ์ค๋ช
"):
|
134 |
comp_fig = m_micro.plot_components(fc_micro)
|
135 |
st.pyplot(comp_fig)
|
136 |
+
|
137 |
month_season = (fc_micro[["ds", "yearly"]]
|
138 |
.assign(month=lambda d: d.ds.dt.month)
|
139 |
.groupby("month")["yearly"].mean())
|
140 |
st.markdown(
|
141 |
+
f"**์ฐ๊ฐ ํผํฌ ์:** {int(month_season.idxmax())}์ \n"
|
142 |
+
f"**์ฐ๊ฐ ์ ์ ์:** {int(month_season.idxmin())}์ \n"
|
143 |
f"**์ฐ๊ฐ ๋ณ๋ํญ:** {month_season.max() - month_season.min():.1f}")
|
144 |
|
145 |
+
# -------------------------------------------------
|
146 |
+
# CORRELATION HEATMAP -----------------------------
|
147 |
+
# -------------------------------------------------
|
148 |
st.subheader("๐งฎ ํ๋ชฉ ๊ฐ ์๊ด๊ด๊ณ")
|
149 |
monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
|
150 |
.groupby(["month", "item"], as_index=False)["price"].mean()
|
151 |
.pivot(index="month", columns="item", values="price"))
|
152 |
|
153 |
corr = monthly_pivot.corr()
|
|
|
154 |
fig, ax = plt.subplots(figsize=(12, 10))
|
155 |
+
mask = np.triu(np.ones_like(corr, dtype=bool))
|
156 |
sns.heatmap(corr, mask=mask, cmap="RdBu_r", center=0, linewidths=.5, ax=ax)
|
157 |
st.pyplot(fig)
|
158 |
|
159 |
+
st.info("๋นจ๊ฐ ์์ญ: ๊ฐ๊ฒฉ ๋์กฐํ / ํ๋ ์์ญ: ๋์ฒด์ฌ ๊ฐ๋ฅ์ฑ")
|
160 |
|
161 |
+
# -------------------------------------------------
|
162 |
+
# VOLATILITY --------------------------------------
|
163 |
+
# -------------------------------------------------
|
164 |
st.subheader("๐ 30์ผ ์ด๋ ํ์คํธ์ฐจ (๊ฐ๊ฒฉ ๋ณ๋์ฑ)")
|
165 |
vol = item_df.set_index("date")["price"].rolling(30).std().dropna().reset_index()
|
166 |
fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
|
167 |
st.plotly_chart(fig_vol, use_container_width=True)
|
168 |
|
169 |
+
st.caption("๋ฐ์ดํฐ: domae-202503.parquet ยท Prophet ์์ธก ยท Streamlit ๋์๋ณด๋")
|