import pandas as pd | |
from sklearn.ensemble import IsolationForest | |
def clean_data(file): | |
# Baca dan format data | |
df = pd.read_csv(file) | |
df['tanggal'] = pd.to_datetime(df['tanggal']) | |
df = df.sort_values('tanggal').reset_index(drop=True) | |
# Deteksi anomali | |
clf = IsolationForest(contamination=0.05, random_state=42) | |
df['anomali'] = clf.fit_predict(df[['demand', 'supply']]) | |
return df[df['anomali'] == 1].drop('anomali', axis=1) |