Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,14 @@ with open("anomaly_detector_rf_model.pkl", "rb") as f:
|
|
11 |
|
12 |
# Function to preprocess and predict anomalies
|
13 |
def detect_anomalies(df):
|
|
|
|
|
|
|
|
|
|
|
14 |
df["log_amount"] = np.log1p(df["amount"])
|
15 |
df["amount_zscore"] = (df["amount"] - df["amount"].mean()) / df["amount"].std()
|
16 |
-
df["transaction_date"] = pd.to_datetime(df["transaction_date"])
|
17 |
df["day_of_week"] = df["transaction_date"].dt.dayofweek
|
18 |
df["hour"] = df["transaction_date"].dt.hour
|
19 |
df["is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)
|
@@ -22,8 +27,6 @@ def detect_anomalies(df):
|
|
22 |
df["is_anomalous"] = model.predict(df[features])
|
23 |
|
24 |
anomalies = df[df["is_anomalous"] == 1][["transaction_id", "amount", "merchant", "location", "transaction_date"]]
|
25 |
-
|
26 |
-
# Save anomalies to CSV
|
27 |
csv_path = "/tmp/anomalies.csv"
|
28 |
anomalies.to_csv(csv_path, index=False)
|
29 |
|
|
|
11 |
|
12 |
# Function to preprocess and predict anomalies
|
13 |
def detect_anomalies(df):
|
14 |
+
required_columns = {"transaction_id", "amount", "merchant", "location", "transaction_date"}
|
15 |
+
if not required_columns.issubset(df.columns):
|
16 |
+
missing = required_columns - set(df.columns)
|
17 |
+
raise ValueError(f"Missing required columns: {', '.join(missing)}")
|
18 |
+
|
19 |
df["log_amount"] = np.log1p(df["amount"])
|
20 |
df["amount_zscore"] = (df["amount"] - df["amount"].mean()) / df["amount"].std()
|
21 |
+
df["transaction_date"] = pd.to_datetime(df["transaction_date"], errors='coerce')
|
22 |
df["day_of_week"] = df["transaction_date"].dt.dayofweek
|
23 |
df["hour"] = df["transaction_date"].dt.hour
|
24 |
df["is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)
|
|
|
27 |
df["is_anomalous"] = model.predict(df[features])
|
28 |
|
29 |
anomalies = df[df["is_anomalous"] == 1][["transaction_id", "amount", "merchant", "location", "transaction_date"]]
|
|
|
|
|
30 |
csv_path = "/tmp/anomalies.csv"
|
31 |
anomalies.to_csv(csv_path, index=False)
|
32 |
|