lakshmi082024's picture
Update app.py
7ca5e1f verified
import numpy as np
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
st.title(":red[Welcome to My ML Project]")
df=pd.read_csv("tips.csv")
y=df.pop("total_bill")
x=df
X_train, X_test, y_train, y_test=train_test_split(x,y,test_size=0.15,random_state=30)
numerical_data=X_train.select_dtypes("number")
cat_data=X_train.select_dtypes("object")
encoder=OneHotEncoder(sparse_output=False)
X_train_cat=pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out())
scaler=StandardScaler()
res=scaler.fit_transform(numerical_data)
X_train_num=pd.DataFrame(res,columns=numerical_data.columns)
Final_X_train_data=pd.concat([X_train_cat,X_train_num],axis=1)
X_test_num=X_test.select_dtypes("number")
X_test_cat=X_test.select_dtypes("object")
X_test_num_trans=scaler.transform(X_test_num)
res1=pd.DataFrame(X_test_num_trans, columns=X_test_num.columns)
X_test_cat_trans=encoder.transform(X_test_cat)
res2=pd.DataFrame(X_test_cat_trans, columns=encoder.get_feature_names_out())
Final_X_test=pd.concat([res2,res1],axis=1)
regression=KNeighborsRegressor()
regression.fit(Final_X_train_data,y_train)
y_pred=regression.predict(Final_X_test)
mean_squared_error(y_test,y_pred)
#Application
tip = st.number_input("Enter Customer Tip")
sex =["Female","Male"]
select_sex=st.selectbox("Select Customer Gender",sex)
smoker=["No","Yes"]
select_smoker=st.selectbox("Select Customer Smoker or not",smoker)
day=["Sun","Sat","Fri"]
select_day=st.selectbox("Select Day",day)
time_options = ["Dinner", "Lunch"]
select_time = st.selectbox("Select Time", time_options)
size=st.number_input("Enter Size")
if st.button("Predict total bill"):
query_point=pd.DataFrame([
{
"tip":tip,
"sex":select_sex,
"smoker":select_smoker,
"day":select_day,
"time":select_time,
"size":size
}]
)
cat_query_point=query_point.select_dtypes("object")
num_query_point=query_point.select_dtypes("number")
cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point),columns=encoder.get_feature_names_out())
num_query_point_trans=pd.DataFrame(scaler.transform(num_query_point),columns=X_test_num.columns)
final_query_point=pd.concat([cat_query_point_trans, num_query_point_trans], axis=1)
def fun(query_point):
res=regression.predict(query_point)[0]
return res
st.write(fun(final_query_point))