sourav11295 commited on
Commit
2c4c15a
·
1 Parent(s): f17a5d3

Commit App.py

Browse files
Files changed (1) hide show
  1. App.py +148 -0
App.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ from sklearn.preprocessing import LabelEncoder
6
+ from sklearn.pipeline import Pipeline
7
+ from sklearn.model_selection import GridSearchCV
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ from sklearn.linear_model import LinearRegression
12
+ from sklearn.svm import SVR
13
+ from sklearn.tree import DecisionTreeRegressor
14
+ from sklearn.ensemble import RandomForestRegressor
15
+
16
+ from sklearn.linear_model import LogisticRegression
17
+ from sklearn.neighbors import KNeighborsClassifier
18
+ from sklearn.svm import SVC
19
+ from sklearn.tree import DecisionTreeClassifier
20
+ from sklearn.ensemble import RandomForestClassifier
21
+
22
+ def read(file,dep,ord):
23
+ df = pd.read_csv(file.name)
24
+ cat = list()
25
+ dep_type = str(df.dtypes[dep])
26
+ for col in df.columns.values:
27
+ if str(df.dtypes[col]) == 'bool' or str(df.dtypes[col]) == 'object':
28
+ cat.append(col)
29
+ new_df = df.dropna(axis=0)
30
+ if ord == "" and (dep_type == 'bool' or dep_type == 'object'):
31
+ ord = list()
32
+ ord.append(dep)
33
+ elif ord == "":
34
+ ord = list()
35
+ else:
36
+ pass
37
+ if len(ord)!=0:
38
+ le = LabelEncoder()
39
+ new_df[ord] = new_df[ord].apply(lambda col: le.fit_transform(col))
40
+ nom = list(set(cat).difference(set(ord)))
41
+ if len(nom) == 0:
42
+ pass
43
+ else:
44
+ ohe_df = pd.get_dummies(new_df[nom], drop_first=True)
45
+ new_df.drop(columns=nom, axis=1,inplace=True)
46
+ new_df = pd.concat([new_df,ohe_df],axis=1)
47
+ if dep_type == 'bool' or dep_type == 'object':
48
+ text = "classification"
49
+ result = classification(new_df,dep)
50
+ else:
51
+ text = "regression"
52
+ result = regression(new_df,dep)
53
+ return df.head(5),new_df.head(5),result, text, cat, ord, nom
54
+
55
+ def classification(df,dep):
56
+ X = df.drop(dep,axis=1)
57
+ y = df[dep]
58
+
59
+ X_train, X_test, y_train, y_test = train_test_split(X, y)
60
+
61
+ scale = StandardScaler()
62
+
63
+ pipe = Pipeline(steps=[('scale',scale),('classification','pass')])
64
+
65
+ parameters = [
66
+ {
67
+ 'classification':[LogisticRegression()],
68
+ },
69
+ {
70
+ 'classification':[RandomForestClassifier()],
71
+ },
72
+ {
73
+ 'classification':[DecisionTreeClassifier()],
74
+ },
75
+ {
76
+ 'classification':[SVC()],
77
+ },
78
+ {
79
+ 'classification':[KNeighborsClassifier(n_neighbors=5)],
80
+ },
81
+ ]
82
+
83
+ search = GridSearchCV(pipe, param_grid=parameters, n_jobs=-1, scoring='accuracy')
84
+ search.fit(X_train,y_train)
85
+
86
+ result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']]
87
+
88
+ result['mean_test_score']= (result['mean_test_score'])*100
89
+ result = result.astype({'params': str})
90
+
91
+ result.sort_values('rank_test_score',inplace=True)
92
+ return result
93
+
94
+ def regression(df,dep):
95
+ X = df.drop(dep,axis=1)
96
+ y =df[dep]
97
+
98
+ X_train, X_test, y_train, y_test = train_test_split(X, y)
99
+
100
+ scale = StandardScaler()
101
+
102
+ pipe = Pipeline(steps=[('scale',scale),('regression','pass')])
103
+
104
+ parameters = [
105
+ {
106
+ 'regression':[LinearRegression()]
107
+ },
108
+ {
109
+ 'regression':[RandomForestRegressor()],
110
+ },
111
+ {
112
+ 'regression':[DecisionTreeRegressor()],
113
+ },
114
+ {
115
+ 'regression':[SVR()],
116
+ },
117
+ ]
118
+
119
+ search = GridSearchCV(pipe, param_grid=parameters, cv=5, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
120
+ search.fit(X_train,y_train)
121
+
122
+ result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']]
123
+
124
+ result['mean_test_score']= (result['mean_test_score']+1)*100
125
+ result = result.astype({'params': str})
126
+
127
+ result.sort_values('rank_test_score',inplace=True)
128
+ return result
129
+
130
+
131
+ with gr.Blocks() as demo:
132
+ gr.Markdown("Model Recommendation App **Upload** file to see the output.")
133
+ with gr.Column():
134
+ with gr.Row():
135
+ file = gr.File(label="Upload File(Comma Separated)")
136
+ dep = gr.Textbox(label="Dependent Variable(Variable as in the file)")
137
+ ord = gr.Textbox(label="Ordinal Variables(Seperate with a comma)")
138
+ submit = gr.Button("Submit")
139
+ text = gr.Text(label="Suitable Algorithm")
140
+ other1 = gr.Text(label="Categorical Variables")
141
+ other2 = gr.Text(label="LabelEncoded Vairables")
142
+ other3 = gr.Text(label="OneHotEncoded Variables")
143
+ with gr.Row():
144
+ org = gr.DataFrame(overflow_row_behaviour="paginate", label="Original Data")
145
+ converted = gr.DataFrame(overflow_row_behaviour="paginate", label="Transformed Data")
146
+ result = gr.DataFrame(label="Result")
147
+ submit.click(fn=read, inputs=[file,dep,ord], outputs=[org,converted,result,text,other1,other2,other3])
148
+ demo.launch()