Spaces:

EuroPython2022
/

Model-Recommendation

Build error

File size: 4,836 Bytes

import gradio as gr
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

def read(file,dep,ord):
    df = pd.read_csv(file.name)
    cat = list()
    dep_type = str(df.dtypes[dep])
    for col in df.columns.values:
        if str(df.dtypes[col]) == 'bool' or str(df.dtypes[col]) == 'object':
            cat.append(col)
    new_df = df.dropna(axis=0)
    if ord == "" and (dep_type == 'bool' or dep_type == 'object'):
        ord = list()
        ord.append(dep)
    elif ord == "":
        ord = list()
    else:
        pass
    if len(ord)!=0:
        le = LabelEncoder()
        new_df[ord] = new_df[ord].apply(lambda col: le.fit_transform(col))
    nom = list(set(cat).difference(set(ord)))
    if len(nom) == 0:
        pass
    else:
        ohe_df = pd.get_dummies(new_df[nom], drop_first=True)
        new_df.drop(columns=nom, axis=1,inplace=True)
        new_df = pd.concat([new_df,ohe_df],axis=1)
    if dep_type == 'bool' or dep_type == 'object':
        text = "classification"
        result = classification(new_df,dep)
    else:
        text = "regression"
        result = regression(new_df,dep)
    return df.head(5),new_df.head(5),result, text, cat, ord, nom
    
def classification(df,dep):
    X = df.drop(dep,axis=1)
    y = df[dep]

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    scale = StandardScaler()

    pipe = Pipeline(steps=[('scale',scale),('classification','pass')])

    parameters = [
        {
            'classification':[LogisticRegression()],
        },
        {
            'classification':[RandomForestClassifier()],
        },
        {
            'classification':[DecisionTreeClassifier()],
        },
        {
            'classification':[SVC()],
        },
        {
            'classification':[KNeighborsClassifier(n_neighbors=5)],
        },
    ]

    search = GridSearchCV(pipe, param_grid=parameters, n_jobs=-1, scoring='accuracy')
    search.fit(X_train,y_train)

    result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']]

    result['mean_test_score']= (result['mean_test_score'])*100
    result = result.astype({'params': str})

    result.sort_values('rank_test_score',inplace=True)
    return result

def regression(df,dep):
    X = df.drop(dep,axis=1)
    y =df[dep]

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    scale = StandardScaler()

    pipe = Pipeline(steps=[('scale',scale),('regression','pass')])

    parameters = [
        {
            'regression':[LinearRegression()]
        },
        {
            'regression':[RandomForestRegressor()],
        },
        {
            'regression':[DecisionTreeRegressor()],
        },
        {
            'regression':[SVR()],
        },
    ]

    search = GridSearchCV(pipe, param_grid=parameters, cv=5, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
    search.fit(X_train,y_train)

    result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']]
    
    result['mean_test_score']= (result['mean_test_score']+1)*100
    result = result.astype({'params': str})
    
    result.sort_values('rank_test_score',inplace=True)
    return result
    

with gr.Blocks() as demo:
    gr.Markdown("Model Recommendation App **Upload** file to see the output.")
    with gr.Column():
        with gr.Row():
            file = gr.File(label="Upload File(Comma Separated)")
            dep = gr.Textbox(label="Dependent Variable(Variable as in the file)")
            ord = gr.Textbox(label="Ordinal Variables(Seperate with a comma)")
            submit = gr.Button("Submit")
        text = gr.Text(label="Suitable Algorithm")
        other1 = gr.Text(label="Categorical Variables")
        other2 = gr.Text(label="LabelEncoded Vairables")
        other3 = gr.Text(label="OneHotEncoded Variables")
        with gr.Row():
            org = gr.DataFrame(overflow_row_behaviour="paginate", label="Original Data")
            converted = gr.DataFrame(overflow_row_behaviour="paginate", label="Transformed Data")
        result = gr.DataFrame(label="Result")
    submit.click(fn=read, inputs=[file,dep,ord], outputs=[org,converted,result,text,other1,other2,other3])
demo.launch()