Spaces:

BlendMMM
/

Mastercard

Sleeping

App Files Files Community

Mastercard / Model_Results_Pretrained.py

BlendMMM

Upload 81 files

94bbd2b verified over 1 year ago

raw

history blame

13.8 kB

	import streamlit as st
	import plotly.express as px
	import numpy as np
	import plotly.graph_objects as go
	from sklearn.metrics import r2_score
	from collections import OrderedDict
	import pickle
	import json
	import streamlit as st
	import plotly.express as px
	import numpy as np
	import plotly.graph_objects as go
	from sklearn.metrics import r2_score
	import pickle
	import json
	import pandas as pd
	import statsmodels.api as sm
	from sklearn.metrics import mean_absolute_percentage_error
	import sys
	import os
	from utilities import (set_header,
	initialize_data,
	load_local_css,
	create_channel_summary,
	create_contribution_pie,
	create_contribuion_stacked_plot,
	create_channel_spends_sales_plot,
	format_numbers,
	channel_name_formating,
	load_authenticator)
	import seaborn as sns
	import matplotlib.pyplot as plt
	import sweetviz as sv
	import tempfile

	original_stdout = sys.stdout
	sys.stdout = open('temp_stdout.txt', 'w')
	sys.stdout.close()
	sys.stdout = original_stdout

	st.set_page_config(layout='wide')
	load_local_css('styles.css')
	set_header()

	for k, v in st.session_state.items():
	if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
	st.session_state[k] = v

	authenticator = st.session_state.get('authenticator')
	if authenticator is None:
	authenticator = load_authenticator()

	name, authentication_status, username = authenticator.login('Login', 'main')
	auth_status = st.session_state.get('authentication_status')

	if auth_status == True:
	is_state_initiaized = st.session_state.get('initialized',False)
	if not is_state_initiaized:
	a=1


	def plot_residual_predicted(actual, predicted, df_):
	df_['Residuals'] = actual - pd.Series(predicted)
	df_['StdResidual'] = (df_['Residuals'] - df_['Residuals'].mean()) / df_['Residuals'].std()

	# Create a Plotly scatter plot
	fig = px.scatter(df_, x=predicted, y='StdResidual', opacity=0.5,color_discrete_sequence=["#11B6BD"])

	# Add horizontal lines
	fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
	fig.add_hline(y=2, line_color="red")
	fig.add_hline(y=-2, line_color="red")

	fig.update_xaxes(title='Predicted')
	fig.update_yaxes(title='Standardized Residuals (Actual - Predicted)')

	# Set the same width and height for both figures
	fig.update_layout(title='Residuals over Predicted Values', autosize=False, width=600, height=400)

	return fig

	def residual_distribution(actual, predicted):
	Residuals = actual - pd.Series(predicted)

	# Create a Seaborn distribution plot
	sns.set(style="whitegrid")
	plt.figure(figsize=(6, 4))
	sns.histplot(Residuals, kde=True, color="#11B6BD")

	plt.title(' Distribution of Residuals')
	plt.xlabel('Residuals')
	plt.ylabel('Probability Density')

	return plt


	def qqplot(actual, predicted):
	Residuals = actual - pd.Series(predicted)
	Residuals = pd.Series(Residuals)
	Resud_std = (Residuals - Residuals.mean()) / Residuals.std()

	# Create a QQ plot using Plotly with custom colors
	fig = go.Figure()
	fig.add_trace(go.Scatter(x=sm.ProbPlot(Resud_std).theoretical_quantiles,
	y=sm.ProbPlot(Resud_std).sample_quantiles,
	mode='markers',
	marker=dict(size=5, color="#11B6BD"),
	name='QQ Plot'))

	# Add the 45-degree reference line
	diagonal_line = go.Scatter(
	x=[-2, 2], # Adjust the x values as needed to fit the range of your data
	y=[-2, 2], # Adjust the y values accordingly
	mode='lines',
	line=dict(color='red'), # Customize the line color and style
	name=' '
	)
	fig.add_trace(diagonal_line)

	# Customize the layout
	fig.update_layout(title='QQ Plot of Residuals',title_x=0.5, autosize=False, width=600, height=400,
	xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')

	return fig


	def plot_actual_vs_predicted(date, y, predicted_values, model):
	fig = go.Figure()

	fig.add_trace(go.Scatter(x=date, y=y, mode='lines', name='Actual', line=dict(color='blue')))
	fig.add_trace(go.Scatter(x=date, y=predicted_values, mode='lines', name='Predicted', line=dict(color='orange')))

	# Calculate MAPE
	mape = mean_absolute_percentage_error(y, predicted_values)*100

	# Calculate R-squared
	rss = np.sum((y - predicted_values) ** 2)
	tss = np.sum((y - np.mean(y)) ** 2)
	r_squared = 1 - (rss / tss)

	# Get the number of predictors
	num_predictors = model.df_model

	# Get the number of samples
	num_samples = len(y)

	# Calculate Adjusted R-squared
	adj_r_squared = 1 - ((1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1)))
	metrics_table = pd.DataFrame({
	'Metric': ['MAPE', 'R-squared', 'AdjR-squared'],
	'Value': [mape, r_squared, adj_r_squared]})
	fig.update_layout(
	xaxis=dict(title='Date'),
	yaxis=dict(title='Value'),
	title=f'MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}',
	xaxis_tickangle=-30
	)

	return metrics_table,fig




	# # Perform linear regression
	# model = sm.OLS(y, X).fit()
	eda_columns=st.columns(3)
	with eda_columns[0]:
	tactic=st.checkbox('Tactic Level Model')
	if tactic:
	with open('mastercard_mmm_model.pkl', 'rb') as file:
	model = pickle.load(file)
	train=pd.read_csv('train_mastercard.csv')
	test=pd.read_csv('test_mastercard.csv')
	train['Date']=pd.to_datetime(train['Date'])
	test['Date']=pd.to_datetime(test['Date'])
	train.set_index('Date',inplace=True)
	test.set_index('Date',inplace=True)
	test.dropna(inplace=True)
	X_train=train.drop(["total_approved_accounts_revenue"],axis=1)
	y_train=train['total_approved_accounts_revenue']
	X_test=test.drop(["total_approved_accounts_revenue"],axis=1)
	X_train=sm.add_constant(X_train)
	X_test=sm.add_constant(X_test)
	y_test=test['total_approved_accounts_revenue']

	# sys.stdout.close()
	# sys.stdout = original_stdout

	# st.set_page_config(layout='wide')
	# load_local_css('styles.css')
	# set_header()

	channel_data=pd.read_excel("Channel_wise_imp_click_spends_new.xlsx",sheet_name='Sheet3')
	target_column='Total Approved Accounts - Revenue'


	with eda_columns[1]:
	if st.button('Generate EDA Report'):
	def generate_report_with_target(channel_data, target_feature):
	report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature,verbose=False)
	temp_dir = tempfile.mkdtemp()
	report_path = os.path.join(temp_dir, "report.html")
	report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file
	return report_path

	report_file = generate_report_with_target(channel_data, target_column)

	if os.path.exists(report_file):
	with open(report_file, 'rb') as f:
	st.download_button(
	label="Download EDA Report",
	data=f.read(),
	file_name="report.html",
	mime="text/html"
	)
	else:
	st.warning("Report generation failed. Unable to find the report file.")


	st.title('Analysis of Result')

	st.write(model.summary(yname='Revenue'))

	metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model)
	metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model)

	metrics_table_train=metrics_table_train.set_index('Metric').transpose()
	metrics_table_train.index=['Train']
	metrics_table_test=metrics_table_test.set_index('Metric').transpose()
	metrics_table_test.index=['test']
	metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2)

	st.markdown('Result Overview')
	st.dataframe(np.round(metrics_table,2),use_container_width=True)

	st.subheader('Actual vs Predicted Plot Train')

	st.plotly_chart(fig_train,use_container_width=True)
	st.subheader('Actual vs Predicted Plot Test')
	st.plotly_chart(fig_test,use_container_width=True)

	st.markdown('## Residual Analysis')
	columns=st.columns(2)
	Xtrain1=X_train.copy()
	with columns[0]:
	fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1)
	st.plotly_chart(fig)

	with columns[1]:
	st.empty()
	fig = qqplot(y_train,model.predict(X_train))
	st.plotly_chart(fig)

	with columns[0]:
	fig=residual_distribution(y_train,model.predict(X_train))
	st.pyplot(fig)
	else:
	with open('mastercard_mmm_model_channel.pkl', 'rb') as file:
	model = pickle.load(file)
	train=pd.read_csv('train_mastercard_channel.csv')
	test=pd.read_csv('test_mastercard_channel.csv')
	# train['Date']=pd.to_datetime(train['Date'])
	# test['Date']=pd.to_datetime(test['Date'])
	# train.set_index('Date',inplace=True)
	# test.set_index('Date',inplace=True)
	test.dropna(inplace=True)
	X_train=train.drop(["total_approved_accounts_revenue"],axis=1)
	y_train=train['total_approved_accounts_revenue']
	X_test=test.drop(["total_approved_accounts_revenue"],axis=1)
	X_train=sm.add_constant(X_train)
	X_test=sm.add_constant(X_test)
	y_test=test['total_approved_accounts_revenue']



	channel_data=pd.read_excel("Channel_wise_imp_click_spends_new.xlsx",sheet_name='Sheet3')
	target_column='Total Approved Accounts - Revenue'
	with eda_columns[1]:
	if st.button('Generate EDA Report'):
	def generate_report_with_target(channel_data, target_feature):
	report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature)
	temp_dir = tempfile.mkdtemp()
	report_path = os.path.join(temp_dir, "report.html")
	report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file
	return report_path

	report_file = generate_report_with_target(channel_data, target_column)

	# Provide a link to download the generated report
	with open(report_file, 'rb') as f:
	st.download_button(
	label="Download EDA Report",
	data=f.read(),
	file_name="report.html",
	mime="text/html"
	)


	st.title('Analysis of Result')

	st.write(model.summary(yname='Revenue'))

	metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model)
	metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model)

	metrics_table_train=metrics_table_train.set_index('Metric').transpose()
	metrics_table_train.index=['Train']
	metrics_table_test=metrics_table_test.set_index('Metric').transpose()
	metrics_table_test.index=['test']
	metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2)

	st.markdown('Result Overview')
	st.dataframe(np.round(metrics_table,2),use_container_width=True)

	st.subheader('Actual vs Predicted Plot Train')

	st.plotly_chart(fig_train,use_container_width=True)
	st.subheader('Actual vs Predicted Plot Test')
	st.plotly_chart(fig_test,use_container_width=True)

	st.markdown('## Residual Analysis')
	columns=st.columns(2)
	Xtrain1=X_train.copy()
	with columns[0]:
	fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1)
	st.plotly_chart(fig)

	with columns[1]:
	st.empty()
	fig = qqplot(y_train,model.predict(X_train))
	st.plotly_chart(fig)

	with columns[0]:
	fig=residual_distribution(y_train,model.predict(X_train))
	st.pyplot(fig)

	elif auth_status == False:
	st.error('Username/Password is incorrect')

	if auth_status != True:
	try:
	username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
	if username_forgot_pw:
	st.success('New password sent securely')
	# Random password to be transferred to user securely
	elif username_forgot_pw == False:
	st.error('Username not found')
	except Exception as e:
	st.error(e)