Spaces:
Sleeping
Sleeping
import numpy as np | |
import streamlit as st | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
from sklearn.model_selection import train_test_split | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.neighbors import KNeighborsRegressor | |
from sklearn.metrics import mean_squared_error | |
st.title(":red[Welcome to My ML Project]") | |
df=pd.read_csv("tips.csv") | |
y=df.pop("total_bill") | |
x=df | |
X_train, X_test, y_train, y_test=train_test_split(x,y,test_size=0.15,random_state=30) | |
numerical_data=X_train.select_dtypes("number") | |
cat_data=X_train.select_dtypes("object") | |
encoder=OneHotEncoder(sparse_output=False) | |
X_train_cat=pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out()) | |
scaler=StandardScaler() | |
res=scaler.fit_transform(numerical_data) | |
X_train_num=pd.DataFrame(res,columns=numerical_data.columns) | |
Final_X_train_data=pd.concat([X_train_cat,X_train_num],axis=1) | |
X_test_num=X_test.select_dtypes("number") | |
X_test_cat=X_test.select_dtypes("object") | |
X_test_num_trans=scaler.transform(X_test_num) | |
res1=pd.DataFrame(X_test_num_trans, columns=X_test_num.columns) | |
X_test_cat_trans=encoder.transform(X_test_cat) | |
res2=pd.DataFrame(X_test_cat_trans, columns=encoder.get_feature_names_out()) | |
Final_X_test=pd.concat([res2,res1],axis=1) | |
regression=KNeighborsRegressor() | |
regression.fit(Final_X_train_data,y_train) | |
y_pred=regression.predict(Final_X_test) | |
mean_squared_error(y_test,y_pred) | |
#Application | |
tip = st.number_input("Enter Customer Tip") | |
sex =["Female","Male"] | |
select_sex=st.selectbox("Select Customer Gender",sex) | |
smoker=["No","Yes"] | |
select_smoker=st.selectbox("Select Customer Smoker or not",smoker) | |
day=["Sun","Sat","Fri"] | |
select_day=st.selectbox("Select Day",day) | |
time_options = ["Dinner", "Lunch"] | |
select_time = st.selectbox("Select Time", time_options) | |
size=st.number_input("Enter Size") | |
if st.button("Predict total bill"): | |
query_point=pd.DataFrame([ | |
{ | |
"tip":tip, | |
"sex":select_sex, | |
"smoker":select_smoker, | |
"day":select_day, | |
"time":select_time, | |
"size":size | |
}] | |
) | |
cat_query_point=query_point.select_dtypes("object") | |
num_query_point=query_point.select_dtypes("number") | |
cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point),columns=encoder.get_feature_names_out()) | |
num_query_point_trans=pd.DataFrame(scaler.transform(num_query_point),columns=X_test_num.columns) | |
final_query_point=pd.concat([cat_query_point_trans, num_query_point_trans], axis=1) | |
def fun(query_point): | |
res=regression.predict(query_point)[0] | |
return res | |
st.write(fun(final_query_point)) | |