import pandas as pd from sklearn.ensemble import IsolationForest def clean_data(file): # Baca dan format data df = pd.read_csv(file) df['tanggal'] = pd.to_datetime(df['tanggal']) df = df.sort_values('tanggal').reset_index(drop=True) # Deteksi anomali clf = IsolationForest(contamination=0.05, random_state=42) df['anomali'] = clf.fit_predict(df[['demand', 'supply']]) return df[df['anomali'] == 1].drop('anomali', axis=1)