kevalfst commited on
Commit
5c8d177
·
verified ·
1 Parent(s): 0f1b324

Create train.py

Browse files
Files changed (1) hide show
  1. train.py +38 -0
train.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+ from sklearn.ensemble import RandomForestClassifier
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ # Load data
8
+ df = pd.read_csv("data/transactions.csv")
9
+
10
+ # Feature engineering
11
+ df["hour"] = pd.to_datetime(df["time"], format="%H:%M").dt.hour
12
+ df.drop(columns=["check_id", "time"], inplace=True)
13
+
14
+ # Encode categorical variables
15
+ categorical_cols = ["employee_id", "terminal_id"]
16
+ encoders = {}
17
+
18
+ for col in categorical_cols:
19
+ enc = LabelEncoder()
20
+ df[col] = enc.fit_transform(df[col])
21
+ encoders[col] = enc
22
+
23
+ # Features and target
24
+ X = df.drop(columns=["suspicious"])
25
+ y = df["suspicious"]
26
+
27
+ # Train/test split
28
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
29
+
30
+ # Train model
31
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
32
+ model.fit(X_train, y_train)
33
+
34
+ # Save model and encoders
35
+ joblib.dump(model, "model/model.pkl")
36
+ joblib.dump(encoders, "model/encoders.pkl")
37
+
38
+ print("Training complete. Model saved.")