import plotly.express as px import numpy as np import plotly.graph_objects as go import streamlit as st import pandas as pd import statsmodels.api as sm from sklearn.metrics import mean_absolute_percentage_error import sys import os from utilities import set_header, load_local_css import seaborn as sns import matplotlib.pyplot as plt import tempfile from sklearn.preprocessing import MinMaxScaler # from st_aggrid import AgGrid # from st_aggrid import GridOptionsBuilder, GridUpdateMode # from st_aggrid import GridOptionsBuilder import sys import re import pickle from sklearn.metrics import r2_score, mean_absolute_percentage_error from Data_prep_functions import plot_actual_vs_predicted import sqlite3 from utilities import set_header, load_local_css,update_db,project_selection sys.setrecursionlimit(10**6) original_stdout = sys.stdout sys.stdout = open("temp_stdout.txt", "w") sys.stdout.close() sys.stdout = original_stdout st.set_page_config(layout="wide") load_local_css("styles.css") set_header() # TODO : if 'username' not in st.session_state: st.session_state['username']=None if "project_name" not in st.session_state: st.session_state["project_name"] = None if "project_dct" not in st.session_state: project_selection() st.stop() try: with open(os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb") as f: data = pickle.load(f) st.session_state["bin_dict"] = data["bin_dict"] except Exception as e: st.warning('Save atleast one tuned model to proceed') st.stop() st.session_state["bin_dict"]['Panel Level 1']=st.session_state["bin_dict"].get('Panel Level 1',[]) if 'gd_table' not in st.session_state: st.session_state['gd_table']=pd.DataFrame() if 'username' in st.session_state and st.session_state['username'] is not None: conn = sqlite3.connect( r"DB/User.db", check_same_thread=False ) # connection with sql db c = conn.cursor() if not os.path.exists( os.path.join(st.session_state["project_path"], "tuned_model.pkl") ): st.error("Please save a tuned model") st.stop() if ( "session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] ): for key in ["used_response_metrics", "media_data", "bin_dict"]: if key not in st.session_state: st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ "session_state_saved" ][key] # st.session_state["bin_dict"] = st.session_state["project_dct"][ # "model_build" # ]["session_state_saved"]["bin_dict"] media_data = st.session_state["media_data"] #st.write(media_data.columns) # set the panel column is_panel = True if len(st.session_state["bin_dict"]['Panel Level 1']) > 0 else False #st.write(is_panel) if is_panel: panel_col = [ col.lower() .replace(".", "_") .replace("@", "_") .replace(" ", "_") .replace("-", "") .replace(":", "") .replace("__", "_") for col in st.session_state["bin_dict"]["Panel Level 1"] ][ 0 ] date_col = "date" def plot_residual_predicted(actual, predicted, df_): df_["Residuals"] = actual - pd.Series(predicted) df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[ "Residuals" ].std() # Create a Plotly scatter plot fig = px.scatter( df_, x=predicted, y="StdResidual", opacity=0.5, color_discrete_sequence=["#11B6BD"], ) # Add horizontal lines fig.add_hline(y=0, line_dash="dash", line_color="darkorange") fig.add_hline(y=2, line_color="red") fig.add_hline(y=-2, line_color="red") fig.update_xaxes(title="Predicted") fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)") # Set the same width and height for both figures fig.update_layout( title="Residuals over Predicted Values", autosize=False, width=600, height=400, ) return fig def residual_distribution(actual, predicted): Residuals = actual - pd.Series(predicted) # Create a Seaborn distribution plot sns.set(style="whitegrid") plt.figure(figsize=(6, 4)) sns.histplot(Residuals, kde=True, color="#11B6BD") plt.title(" Distribution of Residuals") plt.xlabel("Residuals") plt.ylabel("Probability Density") return plt def qqplot(actual, predicted): Residuals = actual - pd.Series(predicted) Residuals = pd.Series(Residuals) Resud_std = (Residuals - Residuals.mean()) / Residuals.std() # Create a QQ plot using Plotly with custom colors fig = go.Figure() fig.add_trace( go.Scatter( x=sm.ProbPlot(Resud_std).theoretical_quantiles, y=sm.ProbPlot(Resud_std).sample_quantiles, mode="markers", marker=dict(size=5, color="#11B6BD"), name="QQ Plot", ) ) # Add the 45-degree reference line diagonal_line = go.Scatter( x=[ -2, 2, ], # Adjust the x values as needed to fit the range of your data y=[-2, 2], # Adjust the y values accordingly mode="lines", line=dict(color="red"), # Customize the line color and style name=" ", ) fig.add_trace(diagonal_line) # Customize the layout fig.update_layout( title="QQ Plot of Residuals", title_x=0.5, autosize=False, width=600, height=400, xaxis_title="Theoretical Quantiles", yaxis_title="Sample Quantiles", ) return fig def get_random_effects(media_data, panel_col, mdf): random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) for i, market in enumerate(media_data[panel_col].unique()): print(i, end="\r") intercept = mdf.random_effects[market].values[0] random_eff_df.loc[i, "random_effect"] = intercept random_eff_df.loc[i, panel_col] = market return random_eff_df def mdf_predict(X_df, mdf, random_eff_df): X = X_df.copy() X = pd.merge( X, random_eff_df[[panel_col, "random_effect"]], on=panel_col, how="left", ) X["pred_fixed_effect"] = mdf.predict(X) X["pred"] = X["pred_fixed_effect"] + X["random_effect"] X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) return X def metrics_df_panel(model_dict, is_panel): metrics_df = pd.DataFrame( columns=[ "Model", "R2", "ADJR2", "Train Mape", "Test Mape", "Summary", "Model_object", ] ) i = 0 for key in model_dict.keys(): target = key.split("__")[1] metrics_df.at[i, "Model"] = target y = model_dict[key]["X_train_tuned"][target] feature_set = model_dict[key]["feature_set"] if is_panel: random_df = get_random_effects( media_data, panel_col, model_dict[key]["Model_object"] ) pred = mdf_predict( model_dict[key]["X_train_tuned"], model_dict[key]["Model_object"], random_df, )["pred"] else: pred = model_dict[key]["Model_object"].predict(model_dict[key]["X_train_tuned"][feature_set]) ytest = model_dict[key]["X_test_tuned"][target] if is_panel: predtest = mdf_predict( model_dict[key]["X_test_tuned"], model_dict[key]["Model_object"], random_df, )["pred"] else: predtest = model_dict[key]["Model_object"].predict(model_dict[key]["X_test_tuned"][feature_set]) metrics_df.at[i, "R2"] = r2_score(y, pred) metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * ( len(y) - 1 ) / (len(y) - len(model_dict[key]["feature_set"]) - 1) metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(y, pred) metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error( ytest, predtest ) metrics_df.at[i, "Summary"] = model_dict[key]["Model_object"].summary() metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"] i += 1 metrics_df = np.round(metrics_df, 2) return metrics_df with open( os.path.join(st.session_state["project_path"], "final_df_transformed.pkl"), "rb", ) as f: data = pickle.load(f) transformed_data = data["final_df_transformed"] with open( os.path.join(st.session_state["project_path"], "data_import.pkl"), "rb" ) as f: data = pickle.load(f) st.session_state["bin_dict"] = data["bin_dict"] with open( os.path.join(st.session_state["project_path"], "tuned_model.pkl"), "rb" ) as file: tuned_model_dict = pickle.load(file) feature_set_dct = { key.split("__")[1]: key_dict["feature_set"] for key, key_dict in tuned_model_dict.items() } # """ the above part should be modified so that we are fetching features set from the saved model""" if "contribution_df" not in st.session_state: st.session_state["contribution_df"] = None def map_channel(transformed_var, channel_dict): for key, value_list in channel_dict.items(): if any(raw_var in transformed_var for raw_var in value_list): return key return transformed_var # Return the original value if no match is found def contributions_nonpanel(model_dict): with open(os.path.join(st.session_state["project_path"], "channel_groups.pkl"), "rb") as f: channels = pickle.load(f) media_data = st.session_state["media_data"] contribution_df = pd.DataFrame(columns=["Channel"]) for key in model_dict.keys(): best_feature_set = model_dict[key]["feature_set"] model = model_dict[key]["Model_object"] target = key.split("__")[1] X_train = model_dict[key]["X_train_tuned"] contri_df = pd.DataFrame() y = [] y_pred = [] coef_df = pd.DataFrame(model.params) coef_df.reset_index(inplace=True) coef_df.columns = ["feature", "coef"] x_train_contribution = X_train.copy() x_train_contribution['pred'] = model.predict(X_train[best_feature_set]) for i in range(len(coef_df)): coef = coef_df.loc[i, "coef"] col = coef_df.loc[i, "feature"] # st.write(col, coef) if col != 'const': x_train_contribution[str(col) + "_contr"] = (coef * x_train_contribution[col]) else: x_train_contribution["const"] = coef tuning_cols = [c for c in x_train_contribution.filter(regex="contr").columns if c in ["Week_number_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr"]] flag_cols =[c for c in x_train_contribution.filter(regex="contr").columns if "_flag" in c] # add exogenous contribution to base all_exog_vars = st.session_state['bin_dict']['Exogenous'] all_exog_vars = [var.lower().replace(".", "_").replace("@", "_").replace(" ", "_").replace("-", "").replace(":", "").replace("__", "_") for var in all_exog_vars] exog_cols = [] if len(all_exog_vars)>0: for col in x_train_contribution.filter(regex="contr").columns: if len([exog_var for exog_var in all_exog_vars if exog_var in col])>0: exog_cols.append(col) base_cols = ["const"] + flag_cols + tuning_cols +exog_cols # st.write(base_cols) x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1) x_train_contribution.drop(columns=base_cols, inplace=True) # x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1) x_train_contribution.to_csv("Test/smr_x_train_contribution.csv", index=False) # x_train_contribution.drop(columns=['sum_contributions'],inplace=True) contri_df = pd.DataFrame( x_train_contribution.filter(regex="contr").sum(axis=0) ) contri_df.reset_index(inplace=True) contri_df.columns = ["Channel", target] contri_df["Channel"] = contri_df["Channel"].apply(lambda x : map_channel(x, channels)) contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() contri_df["Channel"].replace("base_contr", "base", inplace=True) contribution_df = pd.merge(contribution_df, contri_df, on="Channel", how="outer") # st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) return contribution_df def contributions_panel(model_dict): media_data = st.session_state["media_data"] contribution_df = pd.DataFrame(columns=["Channel"]) for key in model_dict.keys(): best_feature_set = model_dict[key]["feature_set"] model = model_dict[key]["Model_object"] target = key.split("__")[1] X_train = model_dict[key]["X_train_tuned"] contri_df = pd.DataFrame() y = [] y_pred = [] random_eff_df = get_random_effects(media_data, panel_col, model) random_eff_df["fixed_effect"] = model.fe_params["Intercept"] random_eff_df["panel_effect"] = ( random_eff_df["random_effect"] + random_eff_df["fixed_effect"] ) coef_df = pd.DataFrame(model.fe_params) coef_df.reset_index(inplace=True) coef_df.columns = ["feature", "coef"] x_train_contribution = X_train.copy() x_train_contribution = mdf_predict( x_train_contribution, model, random_eff_df ) x_train_contribution = pd.merge( x_train_contribution, random_eff_df[[panel_col, "panel_effect"]], on=panel_col, how="left", ) for i in range(len(coef_df))[1:]: coef = coef_df.loc[i, "coef"] col = coef_df.loc[i, "feature"] x_train_contribution[str(col) + "_contr"] = ( coef * x_train_contribution[col] ) # x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1) # x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution[ # 'panel_effect'] base_cols = ["panel_effect"] + [ c for c in x_train_contribution.filter(regex="contr").columns if c in [ "Week_number_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr", ] ] x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( axis=1 ) x_train_contribution.drop(columns=base_cols, inplace=True) # x_train_contribution.to_csv("Test/smr_x_train_contribution.csv", index=False) contri_df = pd.DataFrame( x_train_contribution.filter(regex="contr").sum(axis=0) ) contri_df.reset_index(inplace=True) contri_df.columns = ["Channel", target] contri_df["Channel"] = ( contri_df["Channel"] .str.split("(_impres|_clicks)") .apply(lambda c: c[0]) ) contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() contri_df["Channel"].replace("base_contr", "base", inplace=True) contribution_df = pd.merge( contribution_df, contri_df, on="Channel", how="outer" ) # st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) return contribution_df metrics_table = metrics_df_panel(tuned_model_dict,is_panel) cols1 = st.columns([2, 1]) with cols1[0]: st.markdown(f"**Welcome {st.session_state['username']}**") with cols1[1]: st.markdown( f"**Current Project: {st.session_state['project_name']}**" ) st.title("AI Model Results") st.header('Contribution Overview') options = st.session_state["used_response_metrics"] options = [ opt.lower() .replace(" ", "_") .replace("-", "") .replace(":", "") .replace("__", "_") for opt in options ] default_options = ( st.session_state["project_dct"]["saved_model_results"].get("selected_options") if st.session_state["project_dct"]["saved_model_results"].get( "selected_options" ) is not None else [options[-1]] ) for i in default_options: if i not in options: #st.write(i) default_options.remove(i) def format_display(inp): return inp.title().replace("_", " ").strip() contribution_selections = st.multiselect( "Select the Response Metrics to compare contributions", options, default=options[0], format_func=format_display, ) trace_data = [] if is_panel: st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) else: st.session_state["contribution_df"] = contributions_nonpanel(tuned_model_dict) #st.write(st.session_state["contribution_df"].columns) # for selection in contribution_selections: # trace = go.Bar( # x=st.session_state["contribution_df"]["Channel"], # y=st.session_state["contribution_df"][selection], # name=selection, # text=np.round(st.session_state["contribution_df"][selection], 0) # .astype(int) # .astype(str) # + "%", # textposition="outside", # ) # trace_data.append(trace) # layout = go.Layout( # title="Metrics Contribution by Channel", # xaxis=dict(title="Channel Name"), # yaxis=dict(title="Metrics Contribution"), # barmode="group", # ) # fig = go.Figure(data=trace_data, layout=layout) # st.plotly_chart(fig, use_container_width=True) def create_grouped_bar_plot(contribution_df, contribution_selections): # Extract the 'Channel' names channel_names = contribution_df["Channel"].tolist() # Dictionary to store all contributions except 'const' and 'base' all_contributions = { name: [] for name in channel_names if name not in ["const", "base"] } # Dictionary to store base sales for each selection base_sales_dict = {} # Accumulate contributions for each channel from each selection for selection in contribution_selections: contributions = contribution_df[selection].values.astype(float) base_sales = 0 # Initialize base sales for the current selection for channel_name, contribution in zip(channel_names, contributions): if channel_name in all_contributions: all_contributions[channel_name].append(contribution) elif channel_name == "base": base_sales = ( contribution # Capture base sales for the current selection ) # Store base sales for each selection base_sales_dict[selection] = base_sales # Calculate the average of contributions and sort by this average sorted_channels = sorted( all_contributions.items(), key=lambda x: -np.mean(x[1]) ) sorted_channel_names = [name for name, _ in sorted_channels] sorted_channel_names = [ "Base Sales" ] + sorted_channel_names # Adding 'Base Sales' at the start trace_data = [] max_value = ( 0 # Initialize max_value to find the highest bar for y-axis adjustment ) # Create traces for the grouped bar chart for i, selection in enumerate(contribution_selections): display_name = sorted_channel_names display_contribution = [base_sales_dict[selection]] + [ all_contributions[name][i] for name in sorted_channel_names[1:] ] # Start with base sales for the current selection # Generating text labels for each bar text_values = [ f"{val}%" for val in np.round(display_contribution, 0).astype(int) ] # Find the max value for y-axis calculation max_contribution = max(display_contribution) if max_contribution > max_value: max_value = max_contribution # Create a bar trace for each selection trace = go.Bar( x=display_name, y=display_contribution, name=selection, text=text_values, textposition="outside", ) trace_data.append(trace) # Define layout for the bar chart layout = go.Layout( title="Metrics Contribution by Channel", xaxis=dict(title="Channel Name"), yaxis=dict( title="Metrics Contribution", range=[0, max_value * 1.2] ), # Set y-axis 20% higher than the max bar barmode="group", plot_bgcolor="white", ) # Create the figure with trace data and layout fig = go.Figure(data=trace_data, layout=layout) return fig # Display the chart in Streamlit st.plotly_chart( create_grouped_bar_plot( st.session_state["contribution_df"], contribution_selections ), use_container_width=True, ) ############################################ Waterfall Chart ############################################ import plotly.graph_objects as go # # Initialize a Plotly figure # fig = go.Figure() # for selection in contribution_selections: # # Ensure contributions are numeric # contributions = ( # st.session_state["contribution_df"][selection].values.astype(float).tolist() # ) # channel_names = st.session_state["contribution_df"]["Channel"].tolist() # display_name, display_contribution, base_contribution = [], [], 0 # for channel_name, contribution in zip(channel_names, contributions): # if channel_name != "const" and channel_name != "base": # display_name.append(channel_name) # display_contribution.append(contribution) # else: # base_contribution = contribution # display_name = ["Base Sales"] + display_name # display_contribution = [base_contribution] + display_contribution # # Generating text labels for each bar, ensuring operations are compatible with string formats # text_values = [ # f"{val}%" for val in np.round(display_contribution, 0).astype(int) # ] # fig.add_trace( # go.Waterfall( # orientation="v", # measure=["relative"] * len(display_contribution), # x=display_name, # text=text_values, # textposition="outside", # y=display_contribution, # increasing={"marker": {"color": "green"}}, # decreasing={"marker": {"color": "red"}}, # totals={"marker": {"color": "blue"}}, # name=selection, # ) # ) # fig.update_layout( # title="Metrics Contribution by Channel", # xaxis={"title": "Channel Name"}, # yaxis={"title": "Metrics Contribution"}, # height=600, # ) # # Displaying the waterfall chart in Streamlit # st.plotly_chart(fig, use_container_width=True) def preprocess_and_plot(contribution_df, contribution_selections): # Extract the 'Channel' names channel_names = contribution_df["Channel"].tolist() # Dictionary to store all contributions except 'const' and 'base' all_contributions = { name: [] for name in channel_names if name not in ["const", "base"] } # Dictionary to store base sales for each selection base_sales_dict = {} # Accumulate contributions for each channel from each selection for selection in contribution_selections: contributions = contribution_df[selection].values.astype(float) base_sales = 0 # Initialize base sales for the current selection for channel_name, contribution in zip(channel_names, contributions): if channel_name in all_contributions: all_contributions[channel_name].append(contribution) elif channel_name == "base": base_sales = ( contribution # Capture base sales for the current selection ) # Store base sales for each selection base_sales_dict[selection] = base_sales # Calculate the average of contributions and sort by this average sorted_channels = sorted( all_contributions.items(), key=lambda x: -np.mean(x[1]) ) sorted_channel_names = [name for name, _ in sorted_channels] sorted_channel_names = [ "Base Sales" ] + sorted_channel_names # Adding 'Base Sales' at the start # Initialize a Plotly figure fig = go.Figure() for i, selection in enumerate(contribution_selections): display_name = ["Base Sales"] + sorted_channel_names[ 1: ] # Channel names for the plot display_contribution = [ base_sales_dict[selection] ] # Start with base sales for the current selection # Append average contributions for other channels for name in sorted_channel_names[1:]: display_contribution.append(all_contributions[name][i]) # Generating text labels for each bar text_values = [ f"{val}%" for val in np.round(display_contribution, 0).astype(int) ] # Add a waterfall trace for each selection fig.add_trace( go.Waterfall( orientation="v", measure=["relative"] * len(display_contribution), x=display_name, text=text_values, textposition="outside", y=display_contribution, increasing={"marker": {"color": "green"}}, decreasing={"marker": {"color": "red"}}, totals={"marker": {"color": "blue"}}, name=selection, ) ) # Update layout of the figure fig.update_layout( title="Metrics Contribution by Channel", xaxis={"title": "Channel Name"}, yaxis=dict(title="Metrics Contribution", range=[0, 100 * 1.2]), ) return fig # Displaying the waterfall chart st.plotly_chart( preprocess_and_plot( st.session_state["contribution_df"], contribution_selections ), use_container_width=True, ) ############################################ Waterfall Chart ############################################ st.header("Analysis of Models Result") # st.markdown() # previous_selection = st.session_state["project_dct"]["saved_model_results"].get( # "model_grid_sel", [1] # ) gd_table = metrics_table.iloc[:, :-2] gd_table['selected']=list([False]*(len(gd_table))) # sprint8 - by default always select 1st row target_column = gd_table.at[0, "Model"] # sprint8 if "selected_row_index_gd_table" not in st.session_state: st.session_state["selected_row_index_gd_table"] = None # if 'gd_table' not in st.session_state: # sprint8 - commented out, wasn't updating otherwise st.session_state['gd_table']=gd_table def selection_change(): edited_rows: dict = st.session_state.project_selection["edited_rows"] st.session_state["selected_row_index_gd_table"] = next(iter(edited_rows)) st.session_state["gd_table"] =st.session_state['gd_table'].assign(selected=False) update_dict = {idx: values for idx, values in edited_rows.items()} st.session_state["gd_table"].update( pd.DataFrame.from_dict(update_dict, orient="index") ) with st.container(): table = st.data_editor( st.session_state["gd_table"], hide_index=True, on_change=selection_change, key="project_selection", use_container_width=True, ) # gd = GridOptionsBuilder.from_dataframe(gd_table) # # gd.configure_pagination(enabled=True) # gd.configure_selection( # use_checkbox=True, # selection_mode="single", # pre_select_all_rows=False, # pre_selected_rows=previous_selection, # ) # gridoptions = gd.build() # table = AgGrid( # gd_table, # gridOptions=gridoptions, # fit_columns_on_grid_load=True, # height=200, # ) # table=metrics_table.iloc[:,:-2] # table.insert(0, "Select", False) # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)}) # if len(table.selected_rows) > 0: # st.session_state["project_dct"]["saved_model_results"]["model_grid_sel"] = ( # table.selected_rows[0]["_selectedRowNodeInfo"]["nodeRowIndex"] # ) if st.session_state["selected_row_index_gd_table"] is not None: # st.warning( # "Click on the checkbox to view comprehensive results of the selected model." # ) # st.stop() # else: target_column= st.session_state["gd_table"].at[ st.session_state["selected_row_index_gd_table"], "Model" ] #target_column = table.selected_rows[0]["Model"] feature_set = feature_set_dct[target_column] model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[ 0 ] target = metrics_table[metrics_table["Model"] == target_column]["Model"].iloc[0] st.header("Model Summary") st.write(model.summary()) sel_dict = tuned_model_dict[ [k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0] ] feature_set=sel_dict['feature_set'] X_train = sel_dict["X_train_tuned"] y_train = X_train[target] if is_panel: random_effects = get_random_effects(media_data, panel_col, model) pred = mdf_predict(X_train, model, random_effects)["pred"] else: pred=model.predict(X_train[feature_set]) X_test = sel_dict["X_test_tuned"] y_test = X_test[target] if is_panel: predtest = mdf_predict(X_test, model, random_effects)["pred"] else: predtest=model.predict(X_test[feature_set]) metrics_table_train, _, fig_train = plot_actual_vs_predicted( X_train[date_col], y_train, pred, model, target_column=target_column, flag=None, repeat_all_years=False, is_panel=is_panel, ) metrics_table_test, _, fig_test = plot_actual_vs_predicted( X_test[date_col], y_test, predtest, model, target_column=target_column, flag=None, repeat_all_years=False, is_panel=is_panel, ) metrics_table_train = metrics_table_train.set_index("Metric").transpose() metrics_table_train.index = ["Train"] metrics_table_test = metrics_table_test.set_index("Metric").transpose() metrics_table_test.index = ["Test"] metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2) st.markdown("Result Overview") st.dataframe(np.round(metrics_table, 2), use_container_width=True) st.subheader("Actual vs Predicted Plot Train") st.plotly_chart(fig_train, use_container_width=True) st.subheader("Actual vs Predicted Plot Test") st.plotly_chart(fig_test, use_container_width=True) st.markdown("## Residual Analysis") columns = st.columns(2) Xtrain1 = X_train.copy() with columns[0]: fig = plot_residual_predicted(y_train, pred, Xtrain1) st.plotly_chart(fig) with columns[1]: st.empty() fig = qqplot(y_train, pred) st.plotly_chart(fig) with columns[0]: fig = residual_distribution(y_train, pred) st.pyplot(fig) update_db("6_AI_Model_Result.py")