Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
@@ -13,7 +12,7 @@ from pathlib import Path
|
|
13 |
# CONFIG ------------------------------------------
|
14 |
# -------------------------------------------------
|
15 |
CSV_PATH = Path("price_data.csv")
|
16 |
-
PARQUET_PATH = Path("domae-202503.parquet") # 1996
|
17 |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
|
18 |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
|
19 |
|
@@ -26,8 +25,9 @@ DATE_CANDIDATES = {"date", "ds", "ymd", "๋ ์ง", "prce_reg_mm", "etl_ldg_dt"}
|
|
26 |
ITEM_CANDIDATES = {"item", "ํ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
|
27 |
PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ๊ฒฉ", "avrg_prce"}
|
28 |
|
|
|
29 |
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
30 |
-
"""
|
31 |
col_map = {}
|
32 |
for c in df.columns:
|
33 |
lc = c.lower()
|
@@ -36,32 +36,43 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
36 |
elif lc in PRICE_CANDIDATES:
|
37 |
col_map[c] = "price"
|
38 |
elif lc in ITEM_CANDIDATES:
|
|
|
39 |
if "item" not in col_map.values():
|
40 |
col_map[c] = "item"
|
41 |
else:
|
42 |
col_map[c] = "species"
|
43 |
df = df.rename(columns=col_map)
|
44 |
|
|
|
|
|
|
|
|
|
|
|
45 |
if "date" not in df.columns and df.index.dtype.kind == "M":
|
46 |
df.reset_index(inplace=True)
|
47 |
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
48 |
|
49 |
-
|
|
|
50 |
sample = str(df["date"].iloc[0])
|
51 |
if sample.isdigit() and len(sample) in (6, 8):
|
52 |
df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
|
53 |
|
|
|
54 |
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
|
55 |
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
|
56 |
|
|
|
57 |
if {"item", "species"}.issubset(df.columns):
|
58 |
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
|
59 |
df.drop(columns=["species"], inplace=True)
|
60 |
|
61 |
return df
|
62 |
|
|
|
63 |
@st.cache_data(show_spinner=False)
|
64 |
def load_data() -> pd.DataFrame:
|
|
|
65 |
if PARQUET_PATH.exists():
|
66 |
df = pd.read_parquet(PARQUET_PATH)
|
67 |
elif CSV_PATH.exists():
|
@@ -71,6 +82,7 @@ def load_data() -> pd.DataFrame:
|
|
71 |
st.stop()
|
72 |
|
73 |
df = _standardize_columns(df)
|
|
|
74 |
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
75 |
if missing:
|
76 |
st.error(f"ํ์ ์ปฌ๋ผ ๋๋ฝ: {', '.join(missing)} โ ํ์ผ ์ปฌ๋ผ๋ช
์ ํ์ธํ์ธ์.")
|
@@ -81,10 +93,12 @@ def load_data() -> pd.DataFrame:
|
|
81 |
df.sort_values("date", inplace=True)
|
82 |
return df
|
83 |
|
|
|
84 |
@st.cache_data(show_spinner=False)
|
85 |
def get_items(df: pd.DataFrame):
|
86 |
return sorted(df["item"].unique())
|
87 |
|
|
|
88 |
@st.cache_data(show_spinner=False)
|
89 |
def fit_prophet(df: pd.DataFrame, horizon_end: str):
|
90 |
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
|
@@ -110,7 +124,7 @@ if item_df.empty:
|
|
110 |
st.stop()
|
111 |
|
112 |
# -------------------------------------------------
|
113 |
-
# MACRO FORECAST 1996
|
114 |
# -------------------------------------------------
|
115 |
st.header(f"๐ {selected_item} ๊ฐ๊ฒฉ ์์ธก ๋์๋ณด๋")
|
116 |
macro_df = item_df[item_df["date"] >= MACRO_START]
|
@@ -126,7 +140,7 @@ macro_pct = (macro_pred - latest_price) / latest_price * 100
|
|
126 |
st.metric("2030 ์์ธก๊ฐ", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
|
127 |
|
128 |
# -------------------------------------------------
|
129 |
-
# MICRO FORECAST 2024
|
130 |
# -------------------------------------------------
|
131 |
st.subheader("๐ 2024โ2026 ๋จ๊ธฐ ์์ธก")
|
132 |
|
@@ -147,48 +161,23 @@ with st.expander("๐ ์์ฆ๋๋ฆฌํฐ & ํจํด ์ค๋ช
"):
|
|
147 |
comp_fig = m_micro.plot_components(fc_micro)
|
148 |
st.pyplot(comp_fig)
|
149 |
|
150 |
-
month_season = (
|
151 |
-
|
152 |
-
|
153 |
-
.groupby("month")["yearly"].mean()
|
154 |
-
)
|
155 |
st.markdown(
|
156 |
f"**์ฐ๊ฐ ํผํฌ ์:** {int(month_season.idxmax())}์ \n"
|
157 |
f"**์ฐ๊ฐ ์ ์ ์:** {int(month_season.idxmin())}์ \n"
|
158 |
-
f"**์ฐ๊ฐ ๋ณ๋ํญ:** {month_season.max() - month_season.min():.1f}"
|
159 |
-
)
|
160 |
|
161 |
# -------------------------------------------------
|
162 |
# CORRELATION HEATMAP -----------------------------
|
163 |
# -------------------------------------------------
|
164 |
st.subheader("๐งฎ ํ๋ชฉ ๊ฐ ์๊ด๊ด๊ณ")
|
165 |
-
monthly_pivot = (
|
166 |
-
|
167 |
-
|
168 |
-
.mean()
|
169 |
-
.pivot(index="month", columns="item", values="price")
|
170 |
-
)
|
171 |
|
172 |
corr = monthly_pivot.corr()
|
173 |
fig, ax = plt.subplots(figsize=(12, 10))
|
174 |
mask = np.triu(np.ones_like(corr, dtype=bool))
|
175 |
-
sns.heatmap(
|
176 |
-
st.pyplot(fig)
|
177 |
-
|
178 |
-
st.info("๋นจ๊ฐ ์์ญ: ๊ฐ๊ฒฉ ๋์กฐํ / ํ๋ ์์ญ: ๋์ฒด์ฌ ๊ฐ๋ฅ์ฑ")
|
179 |
-
|
180 |
-
# -------------------------------------------------
|
181 |
-
# VOLATILITY --------------------------------------
|
182 |
-
# -------------------------------------------------
|
183 |
-
st.subheader("๐ 30์ผ ์ด๋ ํ์คํธ์ฐจ (๊ฐ๊ฒฉ ๋ณ๋์ฑ)")
|
184 |
-
vol = (
|
185 |
-
item_df.set_index("date")["price"]
|
186 |
-
.rolling(30)
|
187 |
-
.std()
|
188 |
-
.dropna()
|
189 |
-
.reset_index()
|
190 |
-
)
|
191 |
-
fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
|
192 |
-
st.plotly_chart(fig_vol, use_container_width=True)
|
193 |
-
|
194 |
-
st.caption("๋ฐ์ดํฐ: domae-202503.parquet ยท Prophet ์์ธก ยท Streamlit ๋์๋ณด๋")
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
12 |
# CONFIG ------------------------------------------
|
13 |
# -------------------------------------------------
|
14 |
CSV_PATH = Path("price_data.csv")
|
15 |
+
PARQUET_PATH = Path("domae-202503.parquet") # 1996โ2025โ03 ์ผ๊ฐ/์๊ฐ ๊ฐ๊ฒฉ
|
16 |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
|
17 |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
|
18 |
|
|
|
25 |
ITEM_CANDIDATES = {"item", "ํ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
|
26 |
PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ๊ฒฉ", "avrg_prce"}
|
27 |
|
28 |
+
|
29 |
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
30 |
+
"""Standardize column names to date/item/price and deduplicate."""
|
31 |
col_map = {}
|
32 |
for c in df.columns:
|
33 |
lc = c.lower()
|
|
|
36 |
elif lc in PRICE_CANDIDATES:
|
37 |
col_map[c] = "price"
|
38 |
elif lc in ITEM_CANDIDATES:
|
39 |
+
# first hit as item, second as species
|
40 |
if "item" not in col_map.values():
|
41 |
col_map[c] = "item"
|
42 |
else:
|
43 |
col_map[c] = "species"
|
44 |
df = df.rename(columns=col_map)
|
45 |
|
46 |
+
# โโ handle duplicated columns after rename โโโโโโโโโโโโโโโโโโโโโโโโโ
|
47 |
+
if df.columns.duplicated().any():
|
48 |
+
df = df.loc[:, ~df.columns.duplicated()]
|
49 |
+
|
50 |
+
# โโ index datetime to column โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
51 |
if "date" not in df.columns and df.index.dtype.kind == "M":
|
52 |
df.reset_index(inplace=True)
|
53 |
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
54 |
|
55 |
+
# โโ convert YYYYMM string to datetime โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
56 |
+
if "date" in df.columns and pd.api.types.is_object_dtype(df["date" ]):
|
57 |
sample = str(df["date"].iloc[0])
|
58 |
if sample.isdigit() and len(sample) in (6, 8):
|
59 |
df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
|
60 |
|
61 |
+
# โโ build item from pdlt_nm + spcs_nm if needed โโโโโโโโโโโโโโโโโโโโ
|
62 |
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
|
63 |
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
|
64 |
|
65 |
+
# โโ merge item + species โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
66 |
if {"item", "species"}.issubset(df.columns):
|
67 |
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
|
68 |
df.drop(columns=["species"], inplace=True)
|
69 |
|
70 |
return df
|
71 |
|
72 |
+
|
73 |
@st.cache_data(show_spinner=False)
|
74 |
def load_data() -> pd.DataFrame:
|
75 |
+
"""Load price data from Parquet if available, else CSV. Handle flexible schema."""
|
76 |
if PARQUET_PATH.exists():
|
77 |
df = pd.read_parquet(PARQUET_PATH)
|
78 |
elif CSV_PATH.exists():
|
|
|
82 |
st.stop()
|
83 |
|
84 |
df = _standardize_columns(df)
|
85 |
+
|
86 |
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
87 |
if missing:
|
88 |
st.error(f"ํ์ ์ปฌ๋ผ ๋๋ฝ: {', '.join(missing)} โ ํ์ผ ์ปฌ๋ผ๋ช
์ ํ์ธํ์ธ์.")
|
|
|
93 |
df.sort_values("date", inplace=True)
|
94 |
return df
|
95 |
|
96 |
+
|
97 |
@st.cache_data(show_spinner=False)
|
98 |
def get_items(df: pd.DataFrame):
|
99 |
return sorted(df["item"].unique())
|
100 |
|
101 |
+
|
102 |
@st.cache_data(show_spinner=False)
|
103 |
def fit_prophet(df: pd.DataFrame, horizon_end: str):
|
104 |
m = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
|
|
|
124 |
st.stop()
|
125 |
|
126 |
# -------------------------------------------------
|
127 |
+
# MACRO FORECAST 1996โ2030 ------------------------
|
128 |
# -------------------------------------------------
|
129 |
st.header(f"๐ {selected_item} ๊ฐ๊ฒฉ ์์ธก ๋์๋ณด๋")
|
130 |
macro_df = item_df[item_df["date"] >= MACRO_START]
|
|
|
140 |
st.metric("2030 ์์ธก๊ฐ", f"{macro_pred:,.0f}", f"{macro_pct:+.1f}%")
|
141 |
|
142 |
# -------------------------------------------------
|
143 |
+
# MICRO FORECAST 2024โ2026 ------------------------
|
144 |
# -------------------------------------------------
|
145 |
st.subheader("๐ 2024โ2026 ๋จ๊ธฐ ์์ธก")
|
146 |
|
|
|
161 |
comp_fig = m_micro.plot_components(fc_micro)
|
162 |
st.pyplot(comp_fig)
|
163 |
|
164 |
+
month_season = (fc_micro[["ds", "yearly"]]
|
165 |
+
.assign(month=lambda d: d.ds.dt.month)
|
166 |
+
.groupby("month")["yearly"].mean())
|
|
|
|
|
167 |
st.markdown(
|
168 |
f"**์ฐ๊ฐ ํผํฌ ์:** {int(month_season.idxmax())}์ \n"
|
169 |
f"**์ฐ๊ฐ ์ ์ ์:** {int(month_season.idxmin())}์ \n"
|
170 |
+
f"**์ฐ๊ฐ ๋ณ๋ํญ:** {month_season.max() - month_season.min():.1f}")
|
|
|
171 |
|
172 |
# -------------------------------------------------
|
173 |
# CORRELATION HEATMAP -----------------------------
|
174 |
# -------------------------------------------------
|
175 |
st.subheader("๐งฎ ํ๋ชฉ ๊ฐ ์๊ด๊ด๊ณ")
|
176 |
+
monthly_pivot = (raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
|
177 |
+
.groupby(["month", "item"], as_index=False)["price"].mean()
|
178 |
+
.pivot(index="month", columns="item", values="price"))
|
|
|
|
|
|
|
179 |
|
180 |
corr = monthly_pivot.corr()
|
181 |
fig, ax = plt.subplots(figsize=(12, 10))
|
182 |
mask = np.triu(np.ones_like(corr, dtype=bool))
|
183 |
+
sns.heatmap(c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|