Spaces:
Sleeping
Sleeping
Create train.py
Browse files
train.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import joblib
|
3 |
+
from sklearn.ensemble import RandomForestClassifier
|
4 |
+
from sklearn.preprocessing import LabelEncoder
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
|
7 |
+
# Load data
|
8 |
+
df = pd.read_csv("data/transactions.csv")
|
9 |
+
|
10 |
+
# Feature engineering
|
11 |
+
df["hour"] = pd.to_datetime(df["time"], format="%H:%M").dt.hour
|
12 |
+
df.drop(columns=["check_id", "time"], inplace=True)
|
13 |
+
|
14 |
+
# Encode categorical variables
|
15 |
+
categorical_cols = ["employee_id", "terminal_id"]
|
16 |
+
encoders = {}
|
17 |
+
|
18 |
+
for col in categorical_cols:
|
19 |
+
enc = LabelEncoder()
|
20 |
+
df[col] = enc.fit_transform(df[col])
|
21 |
+
encoders[col] = enc
|
22 |
+
|
23 |
+
# Features and target
|
24 |
+
X = df.drop(columns=["suspicious"])
|
25 |
+
y = df["suspicious"]
|
26 |
+
|
27 |
+
# Train/test split
|
28 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
29 |
+
|
30 |
+
# Train model
|
31 |
+
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
32 |
+
model.fit(X_train, y_train)
|
33 |
+
|
34 |
+
# Save model and encoders
|
35 |
+
joblib.dump(model, "model/model.pkl")
|
36 |
+
joblib.dump(encoders, "model/encoders.pkl")
|
37 |
+
|
38 |
+
print("Training complete. Model saved.")
|