Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
@@ -12,7 +13,7 @@ from pathlib import Path
|
|
12 |
# CONFIG ------------------------------------------
|
13 |
# -------------------------------------------------
|
14 |
CSV_PATH = Path("price_data.csv")
|
15 |
-
PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03
|
16 |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
|
17 |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
|
18 |
|
@@ -21,33 +22,46 @@ st.set_page_config(page_title="ํ๋ชฉ๋ณ ๊ฐ๊ฒฉ ์์ธก", page_icon="๐", layou
|
|
21 |
# -------------------------------------------------
|
22 |
# UTILITIES ---------------------------------------
|
23 |
# -------------------------------------------------
|
24 |
-
DATE_CANDIDATES = {"date", "ds", "ymd", "๋ ์ง"}
|
25 |
-
ITEM_CANDIDATES = {"item", "ํ๋ชฉ", "code", "category"}
|
26 |
-
PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ๊ฒฉ"}
|
27 |
|
28 |
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
29 |
-
"""Rename date/item/price
|
30 |
col_map = {}
|
31 |
for c in df.columns:
|
32 |
lc = c.lower()
|
33 |
-
if lc in DATE_CANDIDATES:
|
34 |
col_map[c] = "date"
|
35 |
-
elif lc in ITEM_CANDIDATES:
|
36 |
-
col_map[c] = "item"
|
37 |
elif lc in PRICE_CANDIDATES:
|
38 |
col_map[c] = "price"
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
# date might be index
|
42 |
-
if "date" not in df.columns:
|
43 |
-
if df.index.dtype.kind == "M":
|
44 |
-
df.reset_index(inplace=True)
|
45 |
-
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
46 |
return df
|
47 |
|
48 |
@st.cache_data(show_spinner=False)
|
49 |
def load_data() -> pd.DataFrame:
|
50 |
-
"""Load price data from Parquet if available, else CSV. Tries to infer column names."""
|
51 |
if PARQUET_PATH.exists():
|
52 |
df = pd.read_parquet(PARQUET_PATH)
|
53 |
elif CSV_PATH.exists():
|
@@ -57,14 +71,13 @@ def load_data() -> pd.DataFrame:
|
|
57 |
st.stop()
|
58 |
|
59 |
df = _standardize_columns(df)
|
60 |
-
|
61 |
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
62 |
if missing:
|
63 |
st.error(f"ํ์ ์ปฌ๋ผ ๋๋ฝ: {', '.join(missing)} โ ํ์ผ ์ปฌ๋ผ๋ช
์ ํ์ธํ์ธ์.")
|
64 |
st.stop()
|
65 |
|
66 |
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
67 |
-
df.dropna(subset=["date", "item", "price"]
|
68 |
df.sort_values("date", inplace=True)
|
69 |
return df
|
70 |
|
@@ -134,21 +147,27 @@ with st.expander("๐ ์์ฆ๋๋ฆฌํฐ & ํจํด ์ค๋ช
"):
|
|
134 |
comp_fig = m_micro.plot_components(fc_micro)
|
135 |
st.pyplot(comp_fig)
|
136 |
|
137 |
-
month_season = (
|
138 |
-
|
139 |
-
|
|
|
|
|
140 |
st.markdown(
|
141 |
f"**์ฐ๊ฐ ํผํฌ ์:** {int(month_season.idxmax())}์ \n"
|
142 |
f"**์ฐ๊ฐ ์ ์ ์:** {int(month_season.idxmin())}์ \n"
|
143 |
-
f"**์ฐ๊ฐ ๋ณ๋ํญ:** {month_season.max() - month_season.min():.1f}"
|
|
|
144 |
|
145 |
# -------------------------------------------------
|
146 |
# CORRELATION HEATMAP -----------------------------
|
147 |
# -------------------------------------------------
|
148 |
st.subheader("๐งฎ ํ๋ชฉ ๊ฐ ์๊ด๊ด๊ณ")
|
149 |
-
monthly_pivot = (
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
152 |
|
153 |
corr = monthly_pivot.corr()
|
154 |
fig, ax = plt.subplots(figsize=(12, 10))
|
@@ -162,8 +181,14 @@ st.info("๋นจ๊ฐ ์์ญ: ๊ฐ๊ฒฉ ๋์กฐํ / ํ๋ ์์ญ: ๋์ฒด์ฌ ๊ฐ๋ฅ์ฑ")
|
|
162 |
# VOLATILITY --------------------------------------
|
163 |
# -------------------------------------------------
|
164 |
st.subheader("๐ 30์ผ ์ด๋ ํ์คํธ์ฐจ (๊ฐ๊ฒฉ ๋ณ๋์ฑ)")
|
165 |
-
vol =
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
|
167 |
st.plotly_chart(fig_vol, use_container_width=True)
|
168 |
|
169 |
-
st.caption("๋ฐ์ดํฐ: domae-202503.parquet ยท Prophet ์์ธก ยท Streamlit ๋์๋ณด๋")
|
|
|
1 |
+
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
|
|
13 |
# CONFIG ------------------------------------------
|
14 |
# -------------------------------------------------
|
15 |
CSV_PATH = Path("price_data.csv")
|
16 |
+
PARQUET_PATH = Path("domae-202503.parquet") # 1996-2025-03 ์ผ๊ฐ/์๊ฐ ๊ฐ๊ฒฉ
|
17 |
MACRO_START, MACRO_END = "1996-01-01", "2030-12-31"
|
18 |
MICRO_START, MICRO_END = "2020-01-01", "2026-12-31"
|
19 |
|
|
|
22 |
# -------------------------------------------------
|
23 |
# UTILITIES ---------------------------------------
|
24 |
# -------------------------------------------------
|
25 |
+
DATE_CANDIDATES = {"date", "ds", "ymd", "๋ ์ง", "prce_reg_mm", "etl_ldg_dt"}
|
26 |
+
ITEM_CANDIDATES = {"item", "ํ๋ชฉ", "code", "category", "pdlt_nm", "spcs_nm"}
|
27 |
+
PRICE_CANDIDATES = {"price", "y", "value", "๊ฐ๊ฒฉ", "avrg_prce"}
|
28 |
|
29 |
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
30 |
+
"""Rename date/item/price columns to date, item, price. Create composite item if needed."""
|
31 |
col_map = {}
|
32 |
for c in df.columns:
|
33 |
lc = c.lower()
|
34 |
+
if lc in DATE_CANDIDATES:
|
35 |
col_map[c] = "date"
|
|
|
|
|
36 |
elif lc in PRICE_CANDIDATES:
|
37 |
col_map[c] = "price"
|
38 |
+
elif lc in ITEM_CANDIDATES:
|
39 |
+
if "item" not in col_map.values():
|
40 |
+
col_map[c] = "item"
|
41 |
+
else:
|
42 |
+
col_map[c] = "species"
|
43 |
+
df = df.rename(columns=col_map)
|
44 |
+
|
45 |
+
if "date" not in df.columns and df.index.dtype.kind == "M":
|
46 |
+
df.reset_index(inplace=True)
|
47 |
+
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
48 |
+
|
49 |
+
if "date" in df.columns and df["date"].dtype == object:
|
50 |
+
sample = str(df["date"].iloc[0])
|
51 |
+
if sample.isdigit() and len(sample) in (6, 8):
|
52 |
+
df["date"] = pd.to_datetime(df["date"].astype(str).str[:6], format="%Y%m", errors="coerce")
|
53 |
+
|
54 |
+
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
|
55 |
+
df["item"] = df["pdlt_nm"].str.strip() + "-" + df["spcs_nm"].str.strip()
|
56 |
+
|
57 |
+
if {"item", "species"}.issubset(df.columns):
|
58 |
+
df["item"] = df["item"].astype(str).str.strip() + "-" + df["species"].astype(str).str.strip()
|
59 |
+
df.drop(columns=["species"], inplace=True)
|
60 |
|
|
|
|
|
|
|
|
|
|
|
61 |
return df
|
62 |
|
63 |
@st.cache_data(show_spinner=False)
|
64 |
def load_data() -> pd.DataFrame:
|
|
|
65 |
if PARQUET_PATH.exists():
|
66 |
df = pd.read_parquet(PARQUET_PATH)
|
67 |
elif CSV_PATH.exists():
|
|
|
71 |
st.stop()
|
72 |
|
73 |
df = _standardize_columns(df)
|
|
|
74 |
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
75 |
if missing:
|
76 |
st.error(f"ํ์ ์ปฌ๋ผ ๋๋ฝ: {', '.join(missing)} โ ํ์ผ ์ปฌ๋ผ๋ช
์ ํ์ธํ์ธ์.")
|
77 |
st.stop()
|
78 |
|
79 |
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
80 |
+
df = df.dropna(subset=["date", "item", "price"])
|
81 |
df.sort_values("date", inplace=True)
|
82 |
return df
|
83 |
|
|
|
147 |
comp_fig = m_micro.plot_components(fc_micro)
|
148 |
st.pyplot(comp_fig)
|
149 |
|
150 |
+
month_season = (
|
151 |
+
fc_micro[["ds", "yearly"]]
|
152 |
+
.assign(month=lambda d: d.ds.dt.month)
|
153 |
+
.groupby("month")["yearly"].mean()
|
154 |
+
)
|
155 |
st.markdown(
|
156 |
f"**์ฐ๊ฐ ํผํฌ ์:** {int(month_season.idxmax())}์ \n"
|
157 |
f"**์ฐ๊ฐ ์ ์ ์:** {int(month_season.idxmin())}์ \n"
|
158 |
+
f"**์ฐ๊ฐ ๋ณ๋ํญ:** {month_season.max() - month_season.min():.1f}"
|
159 |
+
)
|
160 |
|
161 |
# -------------------------------------------------
|
162 |
# CORRELATION HEATMAP -----------------------------
|
163 |
# -------------------------------------------------
|
164 |
st.subheader("๐งฎ ํ๋ชฉ ๊ฐ ์๊ด๊ด๊ณ")
|
165 |
+
monthly_pivot = (
|
166 |
+
raw_df.assign(month=lambda d: d.date.dt.to_period("M"))
|
167 |
+
.groupby(["month", "item"], as_index=False)["price"]
|
168 |
+
.mean()
|
169 |
+
.pivot(index="month", columns="item", values="price")
|
170 |
+
)
|
171 |
|
172 |
corr = monthly_pivot.corr()
|
173 |
fig, ax = plt.subplots(figsize=(12, 10))
|
|
|
181 |
# VOLATILITY --------------------------------------
|
182 |
# -------------------------------------------------
|
183 |
st.subheader("๐ 30์ผ ์ด๋ ํ์คํธ์ฐจ (๊ฐ๊ฒฉ ๋ณ๋์ฑ)")
|
184 |
+
vol = (
|
185 |
+
item_df.set_index("date")["price"]
|
186 |
+
.rolling(30)
|
187 |
+
.std()
|
188 |
+
.dropna()
|
189 |
+
.reset_index()
|
190 |
+
)
|
191 |
fig_vol = px.area(vol, x="date", y="price", title="Rolling 30D Std Dev")
|
192 |
st.plotly_chart(fig_vol, use_container_width=True)
|
193 |
|
194 |
+
st.caption("๋ฐ์ดํฐ: domae-202503.parquet ยท Prophet ์์ธก ยท Streamlit ๋์๋ณด๋")
|