File size: 2,753 Bytes
4a8f985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ca5e1f
4a8f985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import numpy as np
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error


st.title(":red[Welcome to My ML Project]")
df=pd.read_csv("tips.csv")

y=df.pop("total_bill")
x=df

X_train, X_test, y_train, y_test=train_test_split(x,y,test_size=0.15,random_state=30)

numerical_data=X_train.select_dtypes("number")
cat_data=X_train.select_dtypes("object")


encoder=OneHotEncoder(sparse_output=False)
X_train_cat=pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out())
scaler=StandardScaler()
res=scaler.fit_transform(numerical_data)
X_train_num=pd.DataFrame(res,columns=numerical_data.columns)
Final_X_train_data=pd.concat([X_train_cat,X_train_num],axis=1)

X_test_num=X_test.select_dtypes("number")
X_test_cat=X_test.select_dtypes("object")

X_test_num_trans=scaler.transform(X_test_num)
res1=pd.DataFrame(X_test_num_trans, columns=X_test_num.columns)


X_test_cat_trans=encoder.transform(X_test_cat)
res2=pd.DataFrame(X_test_cat_trans, columns=encoder.get_feature_names_out())

Final_X_test=pd.concat([res2,res1],axis=1)

regression=KNeighborsRegressor()
regression.fit(Final_X_train_data,y_train)
y_pred=regression.predict(Final_X_test)

mean_squared_error(y_test,y_pred)

#Application
tip = st.number_input("Enter Customer Tip")

sex =["Female","Male"]
select_sex=st.selectbox("Select Customer Gender",sex)

smoker=["No","Yes"]
select_smoker=st.selectbox("Select Customer Smoker or not",smoker)

day=["Sun","Sat","Fri"]
select_day=st.selectbox("Select Day",day)


time_options = ["Dinner", "Lunch"]
select_time = st.selectbox("Select Time", time_options)
size=st.number_input("Enter Size")

if st.button("Predict total bill"):
    query_point=pd.DataFrame([
        {
            "tip":tip,
            "sex":select_sex,
            "smoker":select_smoker,
            "day":select_day,
            "time":select_time,
            "size":size

        }]
    )
    cat_query_point=query_point.select_dtypes("object")

    num_query_point=query_point.select_dtypes("number")

    cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point),columns=encoder.get_feature_names_out())
    num_query_point_trans=pd.DataFrame(scaler.transform(num_query_point),columns=X_test_num.columns)

    final_query_point=pd.concat([cat_query_point_trans, num_query_point_trans], axis=1)

    def fun(query_point):
        res=regression.predict(query_point)[0]
        return res

    

    st.write(fun(final_query_point))