File size: 2,837 Bytes
87d5aaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import numpy as np
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
st.title(":red[Welcome to my ML Project]")
df = pd.read_csv("tips.csv")
y = df.pop("total_bill")
x = df
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.15, random_state=30)
numerical_data = X_train.select_dtypes("number")
cat_data = X_train.select_dtypes("object")
encoder = OneHotEncoder(sparse_output= False)
X_train_cat = pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out())
scaler = StandardScaler()
res = scaler.fit_transform(numerical_data)
X_train_num = pd.DataFrame(res, columns = numerical_data.columns)
Final_X_train_data = pd.concat([X_train_cat, X_train_num], axis=1)
X_test_num = X_test.select_dtypes("number")
X_test_cat = X_test.select_dtypes("object")
X_test_num_trans = scaler.transform(X_test_num)
res1 = pd.DataFrame(X_test_num_trans, columns = X_test_num.columns)
X_test_cat_trans = encoder.transform(X_test_cat)
res2 = pd.DataFrame(X_test_cat_trans, columns = encoder.get_feature_names_out())
Final_X_test = pd.concat([res2,res1], axis =1)
regression = KNeighborsRegressor()
regression.fit(Final_X_train_data, y_train)
y_pred = regression.predict(Final_X_test)
mean_squared_error(y_test, y_pred)
tip = st.number_input("Enter Customer Tip")
sex = ["Female", "Male"]
select_sex = st.selectbox("Select Customer Gender", sex)
smoker = ["No", "Yes"]
select_smoker = st.selectbox("Select Customer Smoker or not", smoker)
day = ["Sun", "Sat", "Fri", "Thur"]
select_day = st.selectbox("select day", day)
time = ["Dinner", "Lunch"]
select_time = st.selectbox("Select time", time)
size = st.number_input("Enter size")
if st.button("Predict Total Bill"):
query_point = pd.DataFrame([
{
"tip" : tip,
"sex" : select_sex,
"smoker" : select_smoker,
"day" : select_day,
"time" : select_time,
"size" : size
}]
)
cat_query_point = query_point.select_dtypes("object")
num_query_point = query_point.select_dtypes("number")
cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point), columns= encoder.get_feature_names_out())
num_query_point_trans = pd.DataFrame(scaler.transform(num_query_point), columns = X_test_num.columns)
final_query_point = pd.concat([cat_query_point_trans, num_query_point_trans], axis = 1)
def fun(query_point):
res = regression.predict(query_point)[0]
return res
st.write(fun(final_query_point)) |