Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -148,16 +148,31 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
148 |
df.reset_index(inplace=True)
|
149 |
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
150 |
|
151 |
-
|
|
|
152 |
if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
|
153 |
if len(df) > 0:
|
154 |
-
|
155 |
-
|
156 |
-
#
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
# ββ build item from pdlt_nm + spcs_nm if needed ββββββββββββββββββββ
|
163 |
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
|
@@ -170,6 +185,8 @@ def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
170 |
|
171 |
return df
|
172 |
|
|
|
|
|
173 |
@st.cache_data(show_spinner=False)
|
174 |
def load_data() -> pd.DataFrame:
|
175 |
"""Load price data from CSV file."""
|
@@ -178,46 +195,55 @@ def load_data() -> pd.DataFrame:
|
|
178 |
st.error(f"πΎ {CSV_PATH} νμΌμ μ°Ύμ μ μμ΅λλ€.")
|
179 |
st.stop()
|
180 |
|
181 |
-
st.sidebar.info(f"{CSV_PATH} νμΌμμ λ°μ΄ν°λ₯Ό λΆλ¬μ΅λλ€.")
|
182 |
-
|
183 |
# CSV νμΌ μ§μ λ‘λ
|
184 |
df = pd.read_csv(CSV_PATH)
|
185 |
st.sidebar.success(f"CSV λ°μ΄ν° λ‘λ μλ£: {len(df)}κ° ν")
|
186 |
|
187 |
-
# μλ³Έ λ°μ΄ν° νν νμΈ
|
188 |
st.sidebar.write("μλ³Έ λ°μ΄ν° 컬λΌ:", list(df.columns))
|
189 |
|
|
|
|
|
190 |
df = _standardize_columns(df)
|
|
|
|
|
|
|
|
|
|
|
191 |
st.sidebar.write("νμ€ν ν 컬λΌ:", list(df.columns))
|
192 |
|
|
|
193 |
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
194 |
if missing:
|
195 |
st.error(f"νμ μ»¬λΌ λλ½: {', '.join(missing)} β νμΌ μ»¬λΌλͺ
μ νμΈνμΈμ.")
|
196 |
st.stop()
|
197 |
|
198 |
-
# λ μ§ λ³ν
|
199 |
before_date_convert = len(df)
|
200 |
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
201 |
after_date_convert = df.dropna(subset=["date"]).shape[0]
|
202 |
if before_date_convert != after_date_convert:
|
203 |
-
st.warning(f"λ μ§ λ³ν μ€ {before_date_convert - after_date_convert}κ° νμ΄ μ μΈλμμ΅λλ€.")
|
204 |
|
205 |
-
# κ°κ²© λ°μ΄ν°
|
206 |
df["price"] = pd.to_numeric(df["price"], errors="coerce")
|
207 |
|
208 |
-
# NA λ°μ΄ν° μ²λ¦¬
|
209 |
before_na_drop = len(df)
|
210 |
df = df.dropna(subset=["date", "item", "price"])
|
211 |
after_na_drop = len(df)
|
212 |
if before_na_drop != after_na_drop:
|
213 |
-
st.warning(f"NA μ κ±° μ€ {before_na_drop - after_na_drop}κ° νμ΄
|
214 |
|
|
|
215 |
df.sort_values("date", inplace=True)
|
216 |
|
217 |
-
# λ°μ΄ν°
|
218 |
if len(df) > 0:
|
|
|
219 |
st.sidebar.write(f"λ°μ΄ν° λ μ§ λ²μ: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}")
|
220 |
st.sidebar.write(f"μ΄ νλͺ© μ: {df['item'].nunique()}")
|
|
|
221 |
else:
|
222 |
st.error("μ ν¨ν λ°μ΄ν°κ° μμ΅λλ€!")
|
223 |
|
@@ -228,6 +254,47 @@ def load_data() -> pd.DataFrame:
|
|
228 |
st.code(traceback.format_exc())
|
229 |
st.stop()
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
@st.cache_data(show_spinner=False)
|
232 |
def get_items(df: pd.DataFrame):
|
233 |
return sorted(df["item"].unique())
|
|
|
148 |
df.reset_index(inplace=True)
|
149 |
df.rename(columns={df.columns[0]: "date"}, inplace=True)
|
150 |
|
151 |
+
|
152 |
+
# ββ convert YYYYMM string to datetime ββββββββββββββββββββββββββββββββββββββ
|
153 |
if "date" in df.columns and pd.api.types.is_object_dtype(df["date"]):
|
154 |
if len(df) > 0:
|
155 |
+
# λ μ μ°ν λ μ§ λ³ν
|
156 |
+
try:
|
157 |
+
# μν νμΈ
|
158 |
+
sample = str(df["date"].iloc[0])
|
159 |
+
|
160 |
+
# YYYYMM νμ (6μ리)
|
161 |
+
if sample.isdigit() and len(sample) == 6:
|
162 |
+
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m", errors="coerce")
|
163 |
+
df["date"] = df["date"] + pd.offsets.MonthEnd(0) # ν΄λΉ μμ λ§μ§λ§ λ λ‘ μ€μ
|
164 |
+
|
165 |
+
# YYYYMMDD νμ (8μ리)
|
166 |
+
elif sample.isdigit() and len(sample) == 8:
|
167 |
+
df["date"] = pd.to_datetime(df["date"].astype(str), format="%Y%m%d", errors="coerce")
|
168 |
+
|
169 |
+
# κΈ°ν νμμ μλ κ°μ§
|
170 |
+
else:
|
171 |
+
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
172 |
+
except:
|
173 |
+
# μ€ν¨ μ μΌλ° λ³ν μλ
|
174 |
+
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
175 |
+
|
176 |
|
177 |
# ββ build item from pdlt_nm + spcs_nm if needed ββββββββββββββββββββ
|
178 |
if "item" not in df.columns and {"pdlt_nm", "spcs_nm"}.issubset(df.columns):
|
|
|
185 |
|
186 |
return df
|
187 |
|
188 |
+
|
189 |
+
|
190 |
@st.cache_data(show_spinner=False)
|
191 |
def load_data() -> pd.DataFrame:
|
192 |
"""Load price data from CSV file."""
|
|
|
195 |
st.error(f"πΎ {CSV_PATH} νμΌμ μ°Ύμ μ μμ΅λλ€.")
|
196 |
st.stop()
|
197 |
|
|
|
|
|
198 |
# CSV νμΌ μ§μ λ‘λ
|
199 |
df = pd.read_csv(CSV_PATH)
|
200 |
st.sidebar.success(f"CSV λ°μ΄ν° λ‘λ μλ£: {len(df)}κ° ν")
|
201 |
|
202 |
+
# λ°μ΄ν° νμ€ν μ μλ³Έ λ°μ΄ν° νν νμΈ
|
203 |
st.sidebar.write("μλ³Έ λ°μ΄ν° 컬λΌ:", list(df.columns))
|
204 |
|
205 |
+
# νμ€ν μ μμΈ λ‘κ·Έ
|
206 |
+
before_std = len(df)
|
207 |
df = _standardize_columns(df)
|
208 |
+
after_std = len(df)
|
209 |
+
if before_std != after_std:
|
210 |
+
st.sidebar.warning(f"νμ€ν μ€ {before_std - after_std}κ° νμ΄ μ μΈλμμ΅λλ€.")
|
211 |
+
|
212 |
+
# νμ€ν ν λ‘κ·Έ
|
213 |
st.sidebar.write("νμ€ν ν 컬λΌ:", list(df.columns))
|
214 |
|
215 |
+
# νμ μ»¬λΌ νμΈ
|
216 |
missing = {c for c in ["date", "item", "price"] if c not in df.columns}
|
217 |
if missing:
|
218 |
st.error(f"νμ μ»¬λΌ λλ½: {', '.join(missing)} β νμΌ μ»¬λΌλͺ
μ νμΈνμΈμ.")
|
219 |
st.stop()
|
220 |
|
221 |
+
# λ μ§ λ³ν μ ν λ°μ΄ν° μ νμΈ
|
222 |
before_date_convert = len(df)
|
223 |
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
224 |
after_date_convert = df.dropna(subset=["date"]).shape[0]
|
225 |
if before_date_convert != after_date_convert:
|
226 |
+
st.sidebar.warning(f"λ μ§ λ³ν μ€ {before_date_convert - after_date_convert}κ° νμ΄ μ μΈλμμ΅λλ€.")
|
227 |
|
228 |
+
# κ°κ²© λ°μ΄ν° μ«μλ‘ λ³ν
|
229 |
df["price"] = pd.to_numeric(df["price"], errors="coerce")
|
230 |
|
231 |
+
# NA λ°μ΄ν° μ²λ¦¬ μ ν μ νμΈ
|
232 |
before_na_drop = len(df)
|
233 |
df = df.dropna(subset=["date", "item", "price"])
|
234 |
after_na_drop = len(df)
|
235 |
if before_na_drop != after_na_drop:
|
236 |
+
st.sidebar.warning(f"NA μ κ±° μ€ {before_na_drop - after_na_drop}κ° νμ΄ μ μΈλμμ΅λοΏ½οΏ½οΏ½.")
|
237 |
|
238 |
+
# κ²°κ³Ό μ λ ¬
|
239 |
df.sort_values("date", inplace=True)
|
240 |
|
241 |
+
# λ°μ΄ν° μ 보 νμ
|
242 |
if len(df) > 0:
|
243 |
+
st.sidebar.write(f"μ΅μ’
λ°μ΄ν°: {len(df)}κ° ν")
|
244 |
st.sidebar.write(f"λ°μ΄ν° λ μ§ λ²μ: {df['date'].min().strftime('%Y-%m-%d')} ~ {df['date'].max().strftime('%Y-%m-%d')}")
|
245 |
st.sidebar.write(f"μ΄ νλͺ© μ: {df['item'].nunique()}")
|
246 |
+
st.sidebar.write(f"νλͺ©λ³ νκ· λ°μ΄ν° μ: {len(df)/df['item'].nunique():.1f}κ°")
|
247 |
else:
|
248 |
st.error("μ ν¨ν λ°μ΄ν°κ° μμ΅λλ€!")
|
249 |
|
|
|
254 |
st.code(traceback.format_exc())
|
255 |
st.stop()
|
256 |
|
257 |
+
# λ©μΈ μ½λμ λ€μ λΆλΆ μΆκ° - νλͺ©λ³ λ°μ΄ν° μ νμΈ
|
258 |
+
item_df = raw_df.query("item == @selected_item").copy()
|
259 |
+
if item_df.empty:
|
260 |
+
st.error(f"μ νν νλͺ© '{selected_item}' λ°μ΄ν°κ° μμ΅λλ€.")
|
261 |
+
st.stop()
|
262 |
+
elif len(item_df) < 2:
|
263 |
+
st.warning(f"μ νν νλͺ© '{selected_item}' λ°μ΄ν°κ° λ무 μ μ΅λλ€ (λ°μ΄ν° μ: {len(item_df)}). μμΈ‘μ΄ λΆμ νν μ μμ΅λλ€.")
|
264 |
+
else:
|
265 |
+
st.success(f"μ νν νλͺ© '{selected_item}'μ λν΄ {len(item_df)}κ°μ λ°μ΄ν°κ° μμ΅λλ€.")
|
266 |
+
|
267 |
+
# μλμ²λΌ μ₯κΈ° μμΈ‘ λΆλΆ μμ
|
268 |
+
try:
|
269 |
+
# λ°μ΄ν° νν°λ§ λ‘μ§ κ°μ
|
270 |
+
macro_start_dt = pd.Timestamp("1996-01-01")
|
271 |
+
|
272 |
+
# μ΅μ λ°μ΄ν° μ νμΈ
|
273 |
+
macro_df = item_df.copy() # μ 체 λ°μ΄ν° μ¬μ©
|
274 |
+
|
275 |
+
# λ°μ΄ν°κ° λ§€μ° μ μ κ²½μ° κ²½κ³ νμ
|
276 |
+
if len(macro_df) < 5:
|
277 |
+
st.warning(f"{selected_item}μ λν λ°μ΄ν°κ° λ§€μ° μ μ΅λλ€ (λ°μ΄ν° μ: {len(macro_df)}). μμΈ‘μ΄ λΆμ νν μ μμ΅λλ€.")
|
278 |
+
|
279 |
+
# μ§λ¨ μ 보 νμ
|
280 |
+
with st.expander("λ°μ΄ν° μ§λ¨"):
|
281 |
+
st.write(f"- μ 체 λ°μ΄ν° μ: {len(item_df)}")
|
282 |
+
st.write(f"- λΆμ λ°μ΄ν° μ: {len(macro_df)}")
|
283 |
+
if len(macro_df) > 0:
|
284 |
+
st.write(f"- κΈ°κ°: {macro_df['date'].min().strftime('%Y-%m-%d')} ~ {macro_df['date'].max().strftime('%Y-%m-%d')}")
|
285 |
+
st.dataframe(macro_df.head())
|
286 |
+
else:
|
287 |
+
st.write("λ°μ΄ν°κ° μμ΅λλ€.")
|
288 |
+
|
289 |
+
# λ°μ΄ν° νν°λ§ 쑰건 μν - μ΅μ 2κ° μ΄μμ΄λ©΄ μ§ν
|
290 |
+
if len(macro_df) >= 2:
|
291 |
+
# κΈ°μ‘΄ μ½λ (λͺ¨λΈ νμ΅ λ° μκ°ν)
|
292 |
+
with st.spinner("μ₯κΈ° μμΈ‘ λͺ¨λΈ μμ± μ€..."):
|
293 |
+
if use_ensemble:
|
294 |
+
fc_macro = fit_ensemble_model(macro_df, selected_item, MACRO_END)
|
295 |
+
else:
|
296 |
+
fc_macro = fit_optimal_model(macro_df, selected_item, MACRO_END)
|
297 |
+
|
298 |
@st.cache_data(show_spinner=False)
|
299 |
def get_items(df: pd.DataFrame):
|
300 |
return sorted(df["item"].unique())
|