Spaces:
Running
Running
| import streamlit as st | |
| import plotly.express as px | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from sklearn.metrics import r2_score | |
| from collections import OrderedDict | |
| import pickle | |
| import json | |
| import streamlit as st | |
| import plotly.express as px | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from sklearn.metrics import r2_score | |
| import pickle | |
| import json | |
| import pandas as pd | |
| import statsmodels.api as sm | |
| from sklearn.metrics import mean_absolute_percentage_error | |
| import sys | |
| import os | |
| from utilities import (set_header, | |
| initialize_data, | |
| load_local_css, | |
| create_channel_summary, | |
| create_contribution_pie, | |
| create_contribuion_stacked_plot, | |
| create_channel_spends_sales_plot, | |
| format_numbers, | |
| channel_name_formating, | |
| load_authenticator) | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import sweetviz as sv | |
| import tempfile | |
| original_stdout = sys.stdout | |
| sys.stdout = open('temp_stdout.txt', 'w') | |
| sys.stdout.close() | |
| sys.stdout = original_stdout | |
| st.set_page_config(layout='wide') | |
| load_local_css('styles.css') | |
| set_header() | |
| for k, v in st.session_state.items(): | |
| if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
| st.session_state[k] = v | |
| authenticator = st.session_state.get('authenticator') | |
| if authenticator is None: | |
| authenticator = load_authenticator() | |
| name, authentication_status, username = authenticator.login('Login', 'main') | |
| auth_status = st.session_state.get('authentication_status') | |
| if auth_status == True: | |
| is_state_initiaized = st.session_state.get('initialized',False) | |
| if not is_state_initiaized: | |
| a=1 | |
| def plot_residual_predicted(actual, predicted, df_): | |
| df_['Residuals'] = actual - pd.Series(predicted) | |
| df_['StdResidual'] = (df_['Residuals'] - df_['Residuals'].mean()) / df_['Residuals'].std() | |
| # Create a Plotly scatter plot | |
| fig = px.scatter(df_, x=predicted, y='StdResidual', opacity=0.5,color_discrete_sequence=["#11B6BD"]) | |
| # Add horizontal lines | |
| fig.add_hline(y=0, line_dash="dash", line_color="darkorange") | |
| fig.add_hline(y=2, line_color="red") | |
| fig.add_hline(y=-2, line_color="red") | |
| fig.update_xaxes(title='Predicted') | |
| fig.update_yaxes(title='Standardized Residuals (Actual - Predicted)') | |
| # Set the same width and height for both figures | |
| fig.update_layout(title='Residuals over Predicted Values', autosize=False, width=600, height=400) | |
| return fig | |
| def residual_distribution(actual, predicted): | |
| Residuals = actual - pd.Series(predicted) | |
| # Create a Seaborn distribution plot | |
| sns.set(style="whitegrid") | |
| plt.figure(figsize=(6, 4)) | |
| sns.histplot(Residuals, kde=True, color="#11B6BD") | |
| plt.title(' Distribution of Residuals') | |
| plt.xlabel('Residuals') | |
| plt.ylabel('Probability Density') | |
| return plt | |
| def qqplot(actual, predicted): | |
| Residuals = actual - pd.Series(predicted) | |
| Residuals = pd.Series(Residuals) | |
| Resud_std = (Residuals - Residuals.mean()) / Residuals.std() | |
| # Create a QQ plot using Plotly with custom colors | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter(x=sm.ProbPlot(Resud_std).theoretical_quantiles, | |
| y=sm.ProbPlot(Resud_std).sample_quantiles, | |
| mode='markers', | |
| marker=dict(size=5, color="#11B6BD"), | |
| name='QQ Plot')) | |
| # Add the 45-degree reference line | |
| diagonal_line = go.Scatter( | |
| x=[-2, 2], # Adjust the x values as needed to fit the range of your data | |
| y=[-2, 2], # Adjust the y values accordingly | |
| mode='lines', | |
| line=dict(color='red'), # Customize the line color and style | |
| name=' ' | |
| ) | |
| fig.add_trace(diagonal_line) | |
| # Customize the layout | |
| fig.update_layout(title='QQ Plot of Residuals',title_x=0.5, autosize=False, width=600, height=400, | |
| xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles') | |
| return fig | |
| def plot_actual_vs_predicted(date, y, predicted_values, model): | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter(x=date, y=y, mode='lines', name='Actual', line=dict(color='blue'))) | |
| fig.add_trace(go.Scatter(x=date, y=predicted_values, mode='lines', name='Predicted', line=dict(color='orange'))) | |
| # Calculate MAPE | |
| mape = mean_absolute_percentage_error(y, predicted_values)*100 | |
| # Calculate R-squared | |
| rss = np.sum((y - predicted_values) ** 2) | |
| tss = np.sum((y - np.mean(y)) ** 2) | |
| r_squared = 1 - (rss / tss) | |
| # Get the number of predictors | |
| num_predictors = model.df_model | |
| # Get the number of samples | |
| num_samples = len(y) | |
| # Calculate Adjusted R-squared | |
| adj_r_squared = 1 - ((1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1))) | |
| metrics_table = pd.DataFrame({ | |
| 'Metric': ['MAPE', 'R-squared', 'AdjR-squared'], | |
| 'Value': [mape, r_squared, adj_r_squared]}) | |
| fig.update_layout( | |
| xaxis=dict(title='Date'), | |
| yaxis=dict(title='Value'), | |
| title=f'MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}', | |
| xaxis_tickangle=-30 | |
| ) | |
| return metrics_table,fig | |
| # # Perform linear regression | |
| # model = sm.OLS(y, X).fit() | |
| eda_columns=st.columns(3) | |
| with eda_columns[0]: | |
| tactic=st.checkbox('Tactic Level Model') | |
| if tactic: | |
| with open('mastercard_mmm_model.pkl', 'rb') as file: | |
| model = pickle.load(file) | |
| train=pd.read_csv('train_mastercard.csv') | |
| test=pd.read_csv('test_mastercard.csv') | |
| train['Date']=pd.to_datetime(train['Date']) | |
| test['Date']=pd.to_datetime(test['Date']) | |
| train.set_index('Date',inplace=True) | |
| test.set_index('Date',inplace=True) | |
| test.dropna(inplace=True) | |
| X_train=train.drop(["total_approved_accounts_revenue"],axis=1) | |
| y_train=train['total_approved_accounts_revenue'] | |
| X_test=test.drop(["total_approved_accounts_revenue"],axis=1) | |
| X_train=sm.add_constant(X_train) | |
| X_test=sm.add_constant(X_test) | |
| y_test=test['total_approved_accounts_revenue'] | |
| # sys.stdout.close() | |
| # sys.stdout = original_stdout | |
| # st.set_page_config(layout='wide') | |
| # load_local_css('styles.css') | |
| # set_header() | |
| channel_data=pd.read_excel("Channel_wise_imp_click_spends_new.xlsx",sheet_name='Sheet3') | |
| target_column='Total Approved Accounts - Revenue' | |
| with eda_columns[1]: | |
| if st.button('Generate EDA Report'): | |
| def generate_report_with_target(channel_data, target_feature): | |
| report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature,verbose=False) | |
| temp_dir = tempfile.mkdtemp() | |
| report_path = os.path.join(temp_dir, "report.html") | |
| report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file | |
| return report_path | |
| report_file = generate_report_with_target(channel_data, target_column) | |
| if os.path.exists(report_file): | |
| with open(report_file, 'rb') as f: | |
| st.download_button( | |
| label="Download EDA Report", | |
| data=f.read(), | |
| file_name="report.html", | |
| mime="text/html" | |
| ) | |
| else: | |
| st.warning("Report generation failed. Unable to find the report file.") | |
| st.title('Analysis of Result') | |
| st.write(model.summary(yname='Revenue')) | |
| metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model) | |
| metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model) | |
| metrics_table_train=metrics_table_train.set_index('Metric').transpose() | |
| metrics_table_train.index=['Train'] | |
| metrics_table_test=metrics_table_test.set_index('Metric').transpose() | |
| metrics_table_test.index=['test'] | |
| metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2) | |
| st.markdown('Result Overview') | |
| st.dataframe(np.round(metrics_table,2),use_container_width=True) | |
| st.subheader('Actual vs Predicted Plot Train') | |
| st.plotly_chart(fig_train,use_container_width=True) | |
| st.subheader('Actual vs Predicted Plot Test') | |
| st.plotly_chart(fig_test,use_container_width=True) | |
| st.markdown('## Residual Analysis') | |
| columns=st.columns(2) | |
| Xtrain1=X_train.copy() | |
| with columns[0]: | |
| fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1) | |
| st.plotly_chart(fig) | |
| with columns[1]: | |
| st.empty() | |
| fig = qqplot(y_train,model.predict(X_train)) | |
| st.plotly_chart(fig) | |
| with columns[0]: | |
| fig=residual_distribution(y_train,model.predict(X_train)) | |
| st.pyplot(fig) | |
| else: | |
| with open('mastercard_mmm_model_channel.pkl', 'rb') as file: | |
| model = pickle.load(file) | |
| train=pd.read_csv('train_mastercard_channel.csv') | |
| test=pd.read_csv('test_mastercard_channel.csv') | |
| # train['Date']=pd.to_datetime(train['Date']) | |
| # test['Date']=pd.to_datetime(test['Date']) | |
| # train.set_index('Date',inplace=True) | |
| # test.set_index('Date',inplace=True) | |
| test.dropna(inplace=True) | |
| X_train=train.drop(["total_approved_accounts_revenue"],axis=1) | |
| y_train=train['total_approved_accounts_revenue'] | |
| X_test=test.drop(["total_approved_accounts_revenue"],axis=1) | |
| X_train=sm.add_constant(X_train) | |
| X_test=sm.add_constant(X_test) | |
| y_test=test['total_approved_accounts_revenue'] | |
| channel_data=pd.read_excel("Channel_wise_imp_click_spends_new.xlsx",sheet_name='Sheet3') | |
| target_column='Total Approved Accounts - Revenue' | |
| with eda_columns[1]: | |
| if st.button('Generate EDA Report'): | |
| def generate_report_with_target(channel_data, target_feature): | |
| report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature) | |
| temp_dir = tempfile.mkdtemp() | |
| report_path = os.path.join(temp_dir, "report.html") | |
| report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file | |
| return report_path | |
| report_file = generate_report_with_target(channel_data, target_column) | |
| # Provide a link to download the generated report | |
| with open(report_file, 'rb') as f: | |
| st.download_button( | |
| label="Download EDA Report", | |
| data=f.read(), | |
| file_name="report.html", | |
| mime="text/html" | |
| ) | |
| st.title('Analysis of Result') | |
| st.write(model.summary(yname='Revenue')) | |
| metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model) | |
| metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model) | |
| metrics_table_train=metrics_table_train.set_index('Metric').transpose() | |
| metrics_table_train.index=['Train'] | |
| metrics_table_test=metrics_table_test.set_index('Metric').transpose() | |
| metrics_table_test.index=['test'] | |
| metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2) | |
| st.markdown('Result Overview') | |
| st.dataframe(np.round(metrics_table,2),use_container_width=True) | |
| st.subheader('Actual vs Predicted Plot Train') | |
| st.plotly_chart(fig_train,use_container_width=True) | |
| st.subheader('Actual vs Predicted Plot Test') | |
| st.plotly_chart(fig_test,use_container_width=True) | |
| st.markdown('## Residual Analysis') | |
| columns=st.columns(2) | |
| Xtrain1=X_train.copy() | |
| with columns[0]: | |
| fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1) | |
| st.plotly_chart(fig) | |
| with columns[1]: | |
| st.empty() | |
| fig = qqplot(y_train,model.predict(X_train)) | |
| st.plotly_chart(fig) | |
| with columns[0]: | |
| fig=residual_distribution(y_train,model.predict(X_train)) | |
| st.pyplot(fig) | |
| elif auth_status == False: | |
| st.error('Username/Password is incorrect') | |
| if auth_status != True: | |
| try: | |
| username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password') | |
| if username_forgot_pw: | |
| st.success('New password sent securely') | |
| # Random password to be transferred to user securely | |
| elif username_forgot_pw == False: | |
| st.error('Username not found') | |
| except Exception as e: | |
| st.error(e) | |