import numpy as np import streamlit as st import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error st.title(":red[Welcome to My ML Project]") df=pd.read_csv("tips.csv") y=df.pop("total_bill") x=df X_train, X_test, y_train, y_test=train_test_split(x,y,test_size=0.15,random_state=30) numerical_data=X_train.select_dtypes("number") cat_data=X_train.select_dtypes("object") encoder=OneHotEncoder(sparse_output=False) X_train_cat=pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out()) scaler=StandardScaler() res=scaler.fit_transform(numerical_data) X_train_num=pd.DataFrame(res,columns=numerical_data.columns) Final_X_train_data=pd.concat([X_train_cat,X_train_num],axis=1) X_test_num=X_test.select_dtypes("number") X_test_cat=X_test.select_dtypes("object") X_test_num_trans=scaler.transform(X_test_num) res1=pd.DataFrame(X_test_num_trans, columns=X_test_num.columns) X_test_cat_trans=encoder.transform(X_test_cat) res2=pd.DataFrame(X_test_cat_trans, columns=encoder.get_feature_names_out()) Final_X_test=pd.concat([res2,res1],axis=1) regression=KNeighborsRegressor() regression.fit(Final_X_train_data,y_train) y_pred=regression.predict(Final_X_test) mean_squared_error(y_test,y_pred) #Application tip = st.number_input("Enter Customer Tip") sex =["Female","Male"] select_sex=st.selectbox("Select Customer Gender",sex) smoker=["No","Yes"] select_smoker=st.selectbox("Select Customer Smoker or not",smoker) day=["Sun","Sat","Fri"] select_day=st.selectbox("Select Day",day) time_options = ["Dinner", "Lunch"] select_time = st.selectbox("Select Time", time_options) size=st.number_input("Enter Size") if st.button("Predict total bill"): query_point=pd.DataFrame([ { "tip":tip, "sex":select_sex, "smoker":select_smoker, "day":select_day, "time":select_time, "size":size }] ) cat_query_point=query_point.select_dtypes("object") num_query_point=query_point.select_dtypes("number") cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point),columns=encoder.get_feature_names_out()) num_query_point_trans=pd.DataFrame(scaler.transform(num_query_point),columns=X_test_num.columns) final_query_point=pd.concat([cat_query_point_trans, num_query_point_trans], axis=1) def fun(query_point): res=regression.predict(query_point)[0] return res st.write(fun(final_query_point))