Callmebowoo-22 commited on
Commit
dfa098d
·
verified ·
1 Parent(s): 79636b6

Create preprocessing.py

Browse files
Files changed (1) hide show
  1. utils/preprocessing.py +23 -0
utils/preprocessing.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.ensemble import IsolationForest
3
+
4
+ def clean_data(file):
5
+ """
6
+ Bersihkan data UMKM dari anomaly (outlier).
7
+ Contoh input: File CSV dengan kolom: tanggal, demand, supply
8
+ """
9
+ # Baca data
10
+ df = pd.read_csv(file)
11
+
12
+ # Konversi tanggal
13
+ df['tanggal'] = pd.to_datetime(df['tanggal'])
14
+
15
+ # Deteksi anomaly
16
+ clf = IsolationForest(contamination=0.05, random_state=42)
17
+ df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
18
+
19
+ # Filter data bersih
20
+ clean_df = df[df['anomaly'] == 1].copy()
21
+ clean_df.drop('anomaly', axis=1, inplace=True)
22
+
23
+ return clean_df