import numpy as np import streamlit as st import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.tree import DecisionTreeRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error st.title(":red[Welcome to my ML Project]") df = pd.read_csv("tips.csv") y = df.pop("total_bill") x = df X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.15, random_state=30) numerical_data = X_train.select_dtypes("number") cat_data = X_train.select_dtypes("object") encoder = OneHotEncoder(sparse_output= False) X_train_cat = pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out()) scaler = StandardScaler() res = scaler.fit_transform(numerical_data) X_train_num = pd.DataFrame(res, columns = numerical_data.columns) Final_X_train_data = pd.concat([X_train_cat, X_train_num], axis=1) X_test_num = X_test.select_dtypes("number") X_test_cat = X_test.select_dtypes("object") X_test_num_trans = scaler.transform(X_test_num) res1 = pd.DataFrame(X_test_num_trans, columns = X_test_num.columns) X_test_cat_trans = encoder.transform(X_test_cat) res2 = pd.DataFrame(X_test_cat_trans, columns = encoder.get_feature_names_out()) Final_X_test = pd.concat([res2,res1], axis =1) regression = KNeighborsRegressor() regression.fit(Final_X_train_data, y_train) y_pred = regression.predict(Final_X_test) mean_squared_error(y_test, y_pred) tip = st.number_input("Enter Customer Tip") sex = ["Female", "Male"] select_sex = st.selectbox("Select Customer Gender", sex) smoker = ["No", "Yes"] select_smoker = st.selectbox("Select Customer Smoker or not", smoker) day = ["Sun", "Sat", "Fri", "Thur"] select_day = st.selectbox("select day", day) time = ["Dinner", "Lunch"] select_time = st.selectbox("Select time", time) size = st.number_input("Enter size") if st.button("Predict Total Bill"): query_point = pd.DataFrame([ { "tip" : tip, "sex" : select_sex, "smoker" : select_smoker, "day" : select_day, "time" : select_time, "size" : size }] ) cat_query_point = query_point.select_dtypes("object") num_query_point = query_point.select_dtypes("number") cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point), columns= encoder.get_feature_names_out()) num_query_point_trans = pd.DataFrame(scaler.transform(num_query_point), columns = X_test_num.columns) final_query_point = pd.concat([cat_query_point_trans, num_query_point_trans], axis = 1) def fun(query_point): res = regression.predict(query_point)[0] return res st.write(fun(final_query_point))