Rupesh386 commited on
Commit
fc67400
·
verified ·
1 Parent(s): ac38607

Delete build_features.py

Browse files
Files changed (1) hide show
  1. build_features.py +0 -79
build_features.py DELETED
@@ -1,79 +0,0 @@
1
- '''
2
- Author : Rupesh Garsondiya
3
- github : @Rupeshgarsondiya
4
- Organization : L.J university
5
-
6
- '''
7
-
8
- # Feature Engineering
9
-
10
- # import library
11
-
12
- import pandas as pd
13
- import numpy as np
14
- import streamlit as st
15
- from sklearn.preprocessing import OneHotEncoder,StandardScaler
16
- from sklearn.model_selection import train_test_split
17
- from sklearn.pipeline import Pipeline,make_pipeline
18
- from sklearn.compose import ColumnTransformer
19
-
20
-
21
- '''create class FeatureEngineering is created to perform feature engineering on the dataset'''
22
- class FeatureEngineering:
23
-
24
- def __init__(self): # define constructor
25
- pass
26
-
27
- def cleandata(self):
28
- data = pd.read_csv('/home/rupeshgarsondiya/workstation/lab/Project-1/Data/user_behavior_dataset.csv') # load Dataset
29
-
30
- data.drop('User ID',axis=1,inplace=True) # Drop user id column it not required
31
-
32
- '''Rename column name'''
33
- data.rename(columns={'Device Model':'P_Model','Operating System':'OS','App Usage Time (min/day)':'App_Time(hours/day)',
34
- 'Screen On Time (hours/day)':'(hours/Screen_timeday)','Battery Drain (mAh/day)':'Battery_Drain(mAh/day)',
35
- 'Number of Apps Installed':'Installed_app','Data Usage (MB/day)':'Data_Usage(GB/day)'},inplace=True)
36
-
37
- # App time convert minit into the hours
38
- data['App_Time(hours/day)']=data['App_Time(hours/day)']/60
39
-
40
- # convert data use MB into GB
41
- data['Data_Usage(GB/day)']=data['Data_Usage(GB/day)']/1024
42
-
43
- return data
44
-
45
- def get_clean_data(self):
46
- df = FeatureEngineering().cleandata()
47
- print(df.head())
48
-
49
- X = df.drop('User Behavior Class', axis=1)
50
- y = df['User Behavior Class']
51
-
52
- x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
53
-
54
- categorical_col = ['P_Model','OS','Gender']
55
- categorical_transform = OneHotEncoder()
56
-
57
- numerical_col = ['Battery_Drain(mAh/day)']
58
- numerical_transform = StandardScaler()
59
-
60
- # use to column transformer to perform onehotencoing and standard scaling
61
- preprocessor = ColumnTransformer(
62
- transformers=[
63
-
64
- ('cat', categorical_transform, categorical_col)
65
- ],remainder='passthrough')
66
-
67
- # create sklearn pipeline
68
- pipeline = Pipeline(steps=[('preprocessor', preprocessor)])
69
- pipeline.fit(x_train)
70
- pipeline.fit(x_test)
71
- x_train_t = pipeline.transform(x_train)
72
- x_test_t = pipeline.transform(x_test)
73
-
74
-
75
- return x_train_t,x_test_t,y_train,y_test,pipeline
76
-
77
-
78
-
79
-