File size: 8,794 Bytes
94bbd2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f35613
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94bbd2b
8f35613
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673066e
8f35613
 
94bbd2b
 
 
8f35613
94bbd2b
8f35613
 
94bbd2b
8f35613
94bbd2b
8f35613
 
 
94bbd2b
 
8f35613
 
 
 
 
 
94bbd2b
8f35613
 
 
94bbd2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pickle
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import mean_absolute_error, r2_score,mean_absolute_percentage_error  
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from statsmodels.stats.outliers_influence import variance_inflation_factor
from plotly.subplots import make_subplots

st.set_option('deprecation.showPyplotGlobalUse', False)
from datetime import datetime
import seaborn as sns

def calculate_discount(promo_price_series, non_promo_price_series):
    # Calculate the 4-week moving average of non-promo price
    window_size = 4
    base_price = non_promo_price_series.rolling(window=window_size).mean()
    
    # Calculate discount_raw
    discount_raw_series = (1 - promo_price_series / base_price) * 100
    
    # Calculate discount_final
    discount_final_series = discount_raw_series.where(discount_raw_series >= 5, 0)
    
    return base_price, discount_raw_series, discount_final_series


def create_dual_axis_line_chart(date_series, promo_price_series, non_promo_price_series, base_price_series, discount_series):
    # Create traces for the primary axis (price vars)
    trace1 = go.Scatter(
        x=date_series,
        y=promo_price_series,
        name='Promo Price',
        yaxis='y1'
    )
    
    trace2 = go.Scatter(
        x=date_series,
        y=non_promo_price_series,
        name='Non-Promo Price',
        yaxis='y1'
    )

    trace3 = go.Scatter(
        x=date_series,
        y=base_price_series,
        name='Base Price',
        yaxis='y1'
    )
    
    # Create a trace for the secondary axis (discount)
    trace4 = go.Scatter(
        x=date_series,
        y=discount_series,
        name='Discount',
        yaxis='y2'
    )

    # Create the layout with dual axes
    layout = go.Layout(
        title='Price and Discount Over Time',
        yaxis=dict(
            title='Price',
            side='left'
        ),
        yaxis2=dict(
            title='Discount',
            side='right',
            overlaying='y',
            showgrid=False
        ),
        xaxis=dict(title='Date'),
    )

    # Create the figure with the defined traces and layout
    fig = go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)

    return fig


def to_percentage(value):
  return f'{value * 100:.1f}%'   

def plot_actual_vs_predicted(date, y, predicted_values, model,target_column=None, flag=None, repeat_all_years=False, is_panel=False):
    if flag is not None :
      fig = make_subplots(specs=[[{"secondary_y": True}]])
    else :
      fig = go.Figure()

    if is_panel :
      df=pd.DataFrame()
      df['date'] = date
      df['Actual'] = y
      df['Predicted'] = predicted_values
      df_agg = df.groupby('date').agg({'Actual':'sum', 'Predicted':'sum'}).reset_index()
      df_agg.columns = ['date', 'Actual', 'Predicted']
      assert len(df_agg) == pd.Series(date).nunique()
      # date = df_agg['date']
      # y = df_agg['Actual']
      # predicted_values = df_agg['Predicted']
      # ymax = df_agg['Actual'].max() # Sprint3 - ymax to set y value for flag

      fig.add_trace(go.Scatter(x=df_agg['date'], y=df_agg['Actual'], mode='lines', name='Actual', line=dict(color='#08083B')))
      fig.add_trace(go.Scatter(x=df_agg['date'], y=df_agg['Predicted'], mode='lines', name='Predicted', line=dict(color='#11B6BD')))

    else :
      fig.add_trace(go.Scatter(x=date, y=y, mode='lines', name='Actual', line=dict(color='#08083B')))
      fig.add_trace(go.Scatter(x=date, y=predicted_values, mode='lines', name='Predicted', line=dict(color='#11B6BD')))

    line_values=[]
    if flag:
      min_date, max_date = flag[0], flag[1]
      min_week = datetime.strptime(str(min_date), "%Y-%m-%d").strftime("%U")
      max_week = datetime.strptime(str(max_date), "%Y-%m-%d").strftime("%U")
      month=pd.to_datetime(min_date).month
      day=pd.to_datetime(min_date).day
      #st.write(pd.to_datetime(min_date).week)
      #st.write(min_week)
      # Initialize an empty list to store line values

      # Sprint3 change : put flags to secondary axis, & made their y value to 1 instead of 5M
      if repeat_all_years:
        #line_values=list(pd.to_datetime((pd.Series(date)).dt.week).map(lambda x: 10000 if x==min_week else 0  ))
        #st.write(pd.Series(date).map(lambda x: pd.Timestamp(x).week))
        line_values=list(pd.Series(date).map(lambda x: 1 if (pd.Timestamp(x).week >=int(min_week)) & (pd.Timestamp(x).week <=int(max_week)) else 0))
        assert len(line_values) == len(date)
        #st.write(line_values)
        fig.add_trace(go.Scatter(x=date, y=line_values, mode='lines', name='Flag', line=dict(color='#FF5733')),secondary_y=True)
      else:
        line_values = []

        line_values = list(pd.Series(date).map(lambda x: 1 if (pd.Timestamp(x) >= pd.Timestamp(min_date)) and (pd.Timestamp(x) <= pd.Timestamp(max_date)) else 0))

        #st.write(line_values)
        fig.add_trace(go.Scatter(x=date, y=line_values, mode='lines', name='Flag', line=dict(color='#FF5733')),secondary_y=True)


    # Calculate MAPE
    mape = mean_absolute_percentage_error(y, predicted_values)

    # Calculate AdjR2 # Assuming X is your feature matrix
    r2 = r2_score(y, predicted_values)
    adjr2 = 1 - (1 - r2) * (len(y) - 1) / (len(y) - len(model.fe_params) - 1)

    # Create a table to display the metrics
    metrics_table = pd.DataFrame({
      'Metric': ['MAPE', 'R-squared', 'AdjR-squared'],
      'Value': [mape, r2, adjr2]
    })
    # st.write(metrics_table)
    fig.update_layout(
      xaxis=dict(title='Date'),
      yaxis=dict(title=target_column),
      xaxis_tickangle=-30
    )
    fig.add_annotation(
    text=f"MAPE: {mape*100:0.1f}%,  Adjr2: {adjr2 *100:.1f}%",
    xref="paper",
    yref="paper",
    x=0.95,  # Adjust these values to position the annotation
    y=1.2,
    showarrow=False,
    )
    # print("{}{}"*20, len(line_values))
  #metrics_table.set_index(['Metric'],inplace=True)
    return metrics_table,line_values, fig

def plot_residual_predicted(actual, predicted, df):
        df_=df.copy()
        df_['Residuals'] = actual - pd.Series(predicted)
        df_['StdResidual'] = (df_['Residuals'] - df_['Residuals'].mean()) / df_['Residuals'].std()
        
        # Create a Plotly scatter plot
        fig = px.scatter(df_, x=predicted, y='StdResidual', opacity=0.5,color_discrete_sequence=["#11B6BD"])
        
        # Add horizontal lines
        fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
        fig.add_hline(y=2, line_color="red")
        fig.add_hline(y=-2, line_color="red")
        
        fig.update_xaxes(title='Predicted')
        fig.update_yaxes(title='Standardized Residuals (Actual - Predicted)')
        
        # Set the same width and height for both figures
        fig.update_layout(title='2.3.1 Residuals over Predicted Values', autosize=False, width=600, height=400)
        
        return fig

def residual_distribution(actual, predicted):
        Residuals = actual - pd.Series(predicted)
        
        # Create a Seaborn distribution plot
        sns.set(style="whitegrid")
        plt.figure(figsize=(6, 4))
        sns.histplot(Residuals, kde=True, color="#11B6BD")
        
        plt.title('2.3.3 Distribution of Residuals')
        plt.xlabel('Residuals')
        plt.ylabel('Probability Density')
        
        return plt


def qqplot(actual, predicted):
        Residuals = actual - pd.Series(predicted)
        Residuals = pd.Series(Residuals)
        Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
        
        # Create a QQ plot using Plotly with custom colors
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=sm.ProbPlot(Resud_std).theoretical_quantiles,
                                y=sm.ProbPlot(Resud_std).sample_quantiles,
                                mode='markers',
                                marker=dict(size=5, color="#11B6BD"),
                                name='QQ Plot'))
        
        # Add the 45-degree reference line
        diagonal_line = go.Scatter(
            x=[-2, 2],  # Adjust the x values as needed to fit the range of your data
            y=[-2, 2],  # Adjust the y values accordingly
            mode='lines',
            line=dict(color='red'),  # Customize the line color and style
            name=' '
        )
        fig.add_trace(diagonal_line)
        
        # Customize the layout
        fig.update_layout(title='2.3.2 QQ Plot of Residuals',title_x=0.5, autosize=False, width=600, height=400,
                          xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')
        
        return fig