Rupesh386 commited on
Commit
ebb1d93
·
verified ·
1 Parent(s): 893a157

Upload build_features.py

Browse files
Files changed (1) hide show
  1. build_features.py +79 -0
build_features.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Author : Rupesh Garsondiya
3
+ github : @Rupeshgarsondiya
4
+ Organization : L.J university
5
+
6
+ '''
7
+
8
+ # Feature Engineering
9
+
10
+ # import library
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+ import streamlit as st
15
+ from sklearn.preprocessing import OneHotEncoder,StandardScaler
16
+ from sklearn.model_selection import train_test_split
17
+ from sklearn.pipeline import Pipeline,make_pipeline
18
+ from sklearn.compose import ColumnTransformer
19
+
20
+
21
+ '''create class FeatureEngineering is created to perform feature engineering on the dataset'''
22
+ class FeatureEngineering:
23
+
24
+ def __init__(self): # define constructor
25
+ pass
26
+
27
+ def cleandata(self):
28
+ data = pd.read_csv('/home/rupeshgarsondiya/workstation/lab/Project-1/Data/user_behavior_dataset.csv') # load Dataset
29
+
30
+ data.drop('User ID',axis=1,inplace=True) # Drop user id column it not required
31
+
32
+ '''Rename column name'''
33
+ data.rename(columns={'Device Model':'P_Model','Operating System':'OS','App Usage Time (min/day)':'App_Time(hours/day)',
34
+ 'Screen On Time (hours/day)':'(hours/Screen_timeday)','Battery Drain (mAh/day)':'Battery_Drain(mAh/day)',
35
+ 'Number of Apps Installed':'Installed_app','Data Usage (MB/day)':'Data_Usage(GB/day)'},inplace=True)
36
+
37
+ # App time convert minit into the hours
38
+ data['App_Time(hours/day)']=data['App_Time(hours/day)']/60
39
+
40
+ # convert data use MB into GB
41
+ data['Data_Usage(GB/day)']=data['Data_Usage(GB/day)']/1024
42
+
43
+ return data
44
+
45
+ def get_clean_data(self):
46
+ df = FeatureEngineering().cleandata()
47
+ print(df.head())
48
+
49
+ X = df.drop('User Behavior Class', axis=1)
50
+ y = df['User Behavior Class']
51
+
52
+ x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
53
+
54
+ categorical_col = ['P_Model','OS','Gender']
55
+ categorical_transform = OneHotEncoder()
56
+
57
+ numerical_col = ['Battery_Drain(mAh/day)']
58
+ numerical_transform = StandardScaler()
59
+
60
+ # use to column transformer to perform onehotencoing and standard scaling
61
+ preprocessor = ColumnTransformer(
62
+ transformers=[
63
+
64
+ ('cat', categorical_transform, categorical_col)
65
+ ],remainder='passthrough')
66
+
67
+ # create sklearn pipeline
68
+ pipeline = Pipeline(steps=[('preprocessor', preprocessor)])
69
+ pipeline.fit(x_train)
70
+ pipeline.fit(x_test)
71
+ x_train_t = pipeline.transform(x_train)
72
+ x_test_t = pipeline.transform(x_test)
73
+
74
+
75
+ return x_train_t,x_test_t,y_train,y_test,pipeline
76
+
77
+
78
+
79
+