Create preprocessing.py
Browse files- utils/preprocessing.py +23 -0
utils/preprocessing.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.ensemble import IsolationForest
|
3 |
+
|
4 |
+
def clean_data(file):
|
5 |
+
"""
|
6 |
+
Bersihkan data UMKM dari anomaly (outlier).
|
7 |
+
Contoh input: File CSV dengan kolom: tanggal, demand, supply
|
8 |
+
"""
|
9 |
+
# Baca data
|
10 |
+
df = pd.read_csv(file)
|
11 |
+
|
12 |
+
# Konversi tanggal
|
13 |
+
df['tanggal'] = pd.to_datetime(df['tanggal'])
|
14 |
+
|
15 |
+
# Deteksi anomaly
|
16 |
+
clf = IsolationForest(contamination=0.05, random_state=42)
|
17 |
+
df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
|
18 |
+
|
19 |
+
# Filter data bersih
|
20 |
+
clean_df = df[df['anomaly'] == 1].copy()
|
21 |
+
clean_df.drop('anomaly', axis=1, inplace=True)
|
22 |
+
|
23 |
+
return clean_df
|