Spaces:

BlendMMM
/

Mastercard

Sleeping

App Files Files Community

BlendMMM commited on Apr 16, 2024

Commit

7275079

verified ·

1 Parent(s): afc7982

Delete pages/2_Transformations.py

Browse files

Files changed (1) hide show

pages/2_Transformations.py +0 -522

pages/2_Transformations.py DELETED Viewed

@@ -1,522 +0,0 @@
-# Importing necessary libraries
-import streamlit as st
-st.set_page_config(
-    page_title="Transformations",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-import pickle
-import numpy as np
-import pandas as pd
-from utilities import set_header, load_local_css
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-load_local_css("styles.css")
-set_header()
-# Check for authentication status
-for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
-        st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if not is_state_initiaized:
-        if 'session_name' not in st.session_state:
-            st.session_state['session_name']=None
-# Deserialize and load the objects from the pickle file
-        with open("data_import.pkl", "rb") as f:
-            data = pickle.load(f)
-        # Accessing the loaded objects
-        final_df_loaded = data["final_df"]
-        bin_dict_loaded = data["bin_dict"]
-        # Initialize session state
-        if "transformed_columns_dict" not in st.session_state:
-            st.session_state["transformed_columns_dict"] = {}  # Default empty dictionary
-        if "final_df" not in st.session_state:
-            st.session_state["final_df"] = final_df_loaded  # Default as original dataframe
-        if "summary_string" not in st.session_state:
-            st.session_state["summary_string"] = None  # Default as None
-# Extract original columns for specified categories
-    original_columns = {
-        category: bin_dict_loaded[category]
-        for category in ["Media", "Internal", "Exogenous"]
-        if category in bin_dict_loaded
-    }
-    # Retrive Panel columns
-    panel_1 = bin_dict_loaded.get("Panel Level 1")
-    panel_2 = bin_dict_loaded.get("Panel Level 2")
-# # For testing on non panel level
-# final_df_loaded = final_df_loaded.drop("Panel_1", axis=1)
-# final_df_loaded = final_df_loaded.groupby("date").mean().reset_index()
-# panel_1 = None
-# Apply transformations on panel level
-    st.write("")
-    if panel_1:
-        panel = panel_1 + panel_2 if panel_2 else panel_1
-    else:
-        panel = []
-    # Function to build transformation widgets
-    def transformation_widgets(category, transform_params, date_granularity):
-        # Transformation Options
-        transformation_options = {
-            "Media": ["Lag", "Moving Average", "Saturation", "Power", "Adstock"],
-            "Internal": ["Lead", "Lag", "Moving Average"],
-            "Exogenous": ["Lead", "Lag", "Moving Average"],
-        }
-        with st.expander(f"{category} Transformations"):
-            # Let users select which transformations to apply
-            transformations_to_apply = st.multiselect(
-                "Select transformations to apply",
-                options=transformation_options[category],
-                default=[],
-                key=f"transformation_{category}",
-            )
-            # Determine the number of transformations to put in each column
-            transformations_per_column = (
-                len(transformations_to_apply) // 2 + len(transformations_to_apply) % 2
-            )
-            # Create two columns
-            col1, col2 = st.columns(2)
-            # Assign transformations to each column
-            transformations_col1 = transformations_to_apply[:transformations_per_column]
-            transformations_col2 = transformations_to_apply[transformations_per_column:]
-            # Define a helper function to create widgets for each transformation
-            def create_transformation_widgets(column, transformations):
-                with column:
-                    for transformation in transformations:
-                        # Conditionally create widgets for selected transformations
-                        if transformation == "Lead":
-                            st.markdown(f"**Lead ({date_granularity})**")
-                            lead = st.slider(
-                                "Lead periods",
-                                1,
-                                10,
-                                (1, 2),
-                                1,
-                                key=f"lead_{category}",
-                                label_visibility="collapsed",
-                            )
-                            start = lead[0]
-                            end = lead[1]
-                            step = 1
-                            transform_params[category]["Lead"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Lag":
-                            st.markdown(f"**Lag ({date_granularity})**")
-                            lag = st.slider(
-                                "Lag periods",
-                                1,
-                                10,
-                                (1, 2),
-                                1,
-                                key=f"lag_{category}",
-                                label_visibility="collapsed",
-                            )
-                            start = lag[0]
-                            end = lag[1]
-                            step = 1
-                            transform_params[category]["Lag"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Moving Average":
-                            st.markdown(f"**Moving Average ({date_granularity})**")
-                            window = st.slider(
-                                "Window size for Moving Average",
-                                1,
-                                10,
-                                (1, 2),
-                                1,
-                                key=f"ma_{category}",
-                                label_visibility="collapsed",
-                            )
-                            start = window[0]
-                            end = window[1]
-                            step = 1
-                            transform_params[category]["Moving Average"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Saturation":
-                            st.markdown("**Saturation (%)**")
-                            saturation_point = st.slider(
-                                f"Saturation Percentage",
-                                0,
-                                100,
-                                (10, 20),
-                                10,
-                                key=f"sat_{category}",
-                                label_visibility="collapsed",
-                            )
-                            start = saturation_point[0]
-                            end = saturation_point[1]
-                            step = 10
-                            transform_params[category]["Saturation"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Power":
-                            st.markdown("**Power**")
-                            power = st.slider(
-                                f"Power",
-                                0,
-                                10,
-                                (2, 4),
-                                1,
-                                key=f"power_{category}",
-                                label_visibility="collapsed",
-                            )
-                            start = power[0]
-                            end = power[1]
-                            step = 1
-                            transform_params[category]["Power"] = np.arange(
-                                start, end + step, step
-                            )
-                        if transformation == "Adstock":
-                            st.markdown("**Adstock**")
-                            rate = st.slider(
-                                f"Factor ({category})",
-                                0.0,
-                                1.0,
-                                (0.5, 0.7),
-                                0.05,
-                                key=f"adstock_{category}",
-                                label_visibility="collapsed",
-                            )
-                            start = rate[0]
-                            end = rate[1]
-                            step = 0.05
-                            adstock_range = [
-                                round(a, 3) for a in np.arange(start, end + step, step)
-                            ]
-                            transform_params[category]["Adstock"] = adstock_range
-            # Create widgets in each column
-            create_transformation_widgets(col1, transformations_col1)
-            create_transformation_widgets(col2, transformations_col2)
-    # Function to apply Lag transformation
-    def apply_lag(df, lag):
-        return df.shift(lag)
-    # Function to apply Lead transformation
-    def apply_lead(df, lead):
-        return df.shift(-lead)
-    # Function to apply Moving Average transformation
-    def apply_moving_average(df, window_size):
-        return df.rolling(window=window_size).mean()
-    # Function to apply Saturation transformation
-    def apply_saturation(df, saturation_percent_100):
-        # Convert saturation percentage from 100-based to fraction
-        saturation_percent = saturation_percent_100 / 100.0
-        # Calculate saturation point and steepness
-        column_max = df.max()
-        column_min = df.min()
-        saturation_point = (column_min + column_max) / 2
-        numerator = np.log(
-            (1 / (saturation_percent if saturation_percent != 1 else 1 - 1e-9)) - 1
-        )
-        denominator = np.log(saturation_point / max(column_max, 1e-9))
-        steepness = numerator / max(
-            denominator, 1e-9
-        )  # Avoid division by zero with a small constant
-        # Apply the saturation transformation
-        transformed_series = df.apply(
-            lambda x: (1 / (1 + (saturation_point / x) ** steepness)) * x
-        )
-        return transformed_series
-    # Function to apply Power transformation
-    def apply_power(df, power):
-        return df**power
-    # Function to apply Adstock transformation
-    def apply_adstock(df, factor):
-        x = 0
-        # Use the walrus operator to update x iteratively with the Adstock formula
-        adstock_var = [x := x * factor + v for v in df]
-        ans = pd.Series(adstock_var, index=df.index)
-        return ans
-    # Function to generate transformed columns names
-    @st.cache_resource(show_spinner=False)
-    def generate_transformed_columns(original_columns, transform_params):
-        transformed_columns, summary = {}, {}
-        for category, columns in original_columns.items():
-            for column in columns:
-                transformed_columns[column] = []
-                summary_details = (
-                    []
-                )  # List to hold transformation details for the current column
-                if category in transform_params:
-                    for transformation, values in transform_params[category].items():
-                        # Generate transformed column names for each value
-                        for value in values:
-                            transformed_name = f"{column}@{transformation}_{value}"
-                            transformed_columns[column].append(transformed_name)
-                        # Format the values list as a string with commas and "and" before the last item
-                        if len(values) > 1:
-                            formatted_values = (
-                                ", ".join(map(str, values[:-1])) + " and " + str(values[-1])
-                            )
-                        else:
-                            formatted_values = str(values[0])
-                        # Add transformation details
-                        summary_details.append(f"{transformation} ({formatted_values})")
-                # Only add to summary if there are transformation details for the column
-                if summary_details:
-                    formatted_summary = "⮕ ".join(summary_details)
-                    # Use <strong> tags to make the column name bold
-                    summary[column] = f"<strong>{column}</strong>: {formatted_summary}"
-        # Generate a comprehensive summary string for all columns
-        summary_items = [
-            f"{idx + 1}. {details}" for idx, details in enumerate(summary.values())
-        ]
-        summary_string = "\n".join(summary_items)
-        return transformed_columns, summary_string
-    # Function to apply transformations to DataFrame slices based on specified categories and parameters
-    @st.cache_resource(show_spinner=False)
-    def apply_category_transformations(df, bin_dict, transform_params, panel):
-        # Dictionary for function mapping
-        transformation_functions = {
-            "Lead": apply_lead,
-            "Lag": apply_lag,
-            "Moving Average": apply_moving_average,
-            "Saturation": apply_saturation,
-            "Power": apply_power,
-            "Adstock": apply_adstock,
-        }
-        # Initialize category_df as an empty DataFrame
-        category_df = pd.DataFrame()
-        # Iterate through each category specified in transform_params
-        for category in ["Media", "Internal", "Exogenous"]:
-            if (
-                category not in transform_params
-                or category not in bin_dict
-                or not transform_params[category]
-            ):
-                continue  # Skip categories without transformations
-            # Slice the DataFrame based on the columns specified in bin_dict for the current category
-            df_slice = df[bin_dict[category] + panel]
-            # Iterate through each transformation and its parameters for the current category
-            for transformation, parameters in transform_params[category].items():
-                transformation_function = transformation_functions[transformation]
-                # Check if there is panel data to group by
-                if len(panel) > 0:
-                    # Apply the transformation to each group
-                    category_df = pd.concat(
-                        [
-                            df_slice.groupby(panel)
-                            .transform(transformation_function, p)
-                            .add_suffix(f"@{transformation}_{p}")
-                            for p in parameters
-                        ],
-                        axis=1,
-                    )
-                    # Replace all NaN or null values in category_df with 0
-                    category_df.fillna(0, inplace=True)
-                    # Update df_slice
-                    df_slice = pd.concat(
-                        [df[panel], category_df],
-                        axis=1,
-                    )
-                else:
-                    for p in parameters:
-                        # Apply the transformation function to each column
-                        temp_df = df_slice.apply(
-                            lambda x: transformation_function(x, p), axis=0
-                        ).rename(lambda x: f"{x}@{transformation}_{p}", axis="columns")
-                        # Concatenate the transformed DataFrame slice to the category DataFrame
-                        category_df = pd.concat([category_df, temp_df], axis=1)
-                    # Replace all NaN or null values in category_df with 0
-                    category_df.fillna(0, inplace=True)
-                    # Update df_slice
-                    df_slice = pd.concat(
-                        [df[panel], category_df],
-                        axis=1,
-                    )
-        # If category_df has been modified, concatenate it with the panel and response metrics from the original DataFrame
-        if not category_df.empty:
-            final_df = pd.concat([df, category_df], axis=1)
-        else:
-            # If no transformations were applied, use the original DataFrame
-            final_df = df
-        return final_df
-    # Function to infers the granularity of the date column in a DataFrame
-    @st.cache_resource(show_spinner=False)
-    def infer_date_granularity(df):
-        # Find the most common difference
-        common_freq = pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
-        # Map the most common difference to a granularity
-        if common_freq == 1:
-            return "daily"
-        elif common_freq == 7:
-            return "weekly"
-        elif 28 <= common_freq <= 31:
-            return "monthly"
-        else:
-            return "irregular"
-    #########################################################################################################################################################
-    # User input for transformations
-    #########################################################################################################################################################
-    # Infer date granularity
-    date_granularity = infer_date_granularity(final_df_loaded)
-    # Initialize the main dictionary to store the transformation parameters for each category
-    transform_params = {"Media": {}, "Internal": {}, "Exogenous": {}}
-    # User input for transformations
-    st.markdown("### Select Transformations to Apply")
-    for category in ["Media", "Internal", "Exogenous"]:
-        # Skip Internal
-        if category == "Internal":
-            continue
-        transformation_widgets(category, transform_params, date_granularity)
-    #########################################################################################################################################################
-    # Apply transformations
-    #########################################################################################################################################################
-    # Apply category-based transformations to the DataFrame
-    if st.button("Accept and Proceed", use_container_width=True):
-        with st.spinner("Applying transformations..."):
-            final_df = apply_category_transformations(
-                final_df_loaded, bin_dict_loaded, transform_params, panel
-            )
-            # Generate a dictionary mapping original column names to lists of transformed column names
-            transformed_columns_dict, summary_string = generate_transformed_columns(
-                original_columns, transform_params
-            )
-            # Store into transformed dataframe and summary session state
-            st.session_state["final_df"] = final_df
-            st.session_state["summary_string"] = summary_string
-    #########################################################################################################################################################
-    # Display the transformed DataFrame and summary
-    #########################################################################################################################################################
-    # Display the transformed DataFrame in the Streamlit app
-    st.markdown("### Transformed DataFrame")
-    st.dataframe(st.session_state["final_df"], hide_index=True)
-    # Total rows and columns
-    total_rows, total_columns = st.session_state["final_df"].shape
-    st.markdown(
-        f"<p style='text-align: justify;'>The transformed DataFrame contains <strong>{total_rows}</strong> rows and <strong>{total_columns}</strong> columns.</p>",
-        unsafe_allow_html=True,
-    )
-    # Display the summary of transformations as markdown
-    if st.session_state["summary_string"]:
-        with st.expander("Summary of Transformations"):
-            st.markdown("### Summary of Transformations")
-            st.markdown(st.session_state["summary_string"], unsafe_allow_html=True)
-    @st.cache_resource(show_spinner=False)
-    def save_to_pickle(file_path, final_df):
-        # Open the file in write-binary mode and dump the objects
-        with open(file_path, "wb") as f:
-            pickle.dump({"final_df_transformed": final_df}, f)
-            # Data is now saved to file
-    if st.button("Accept and Save", use_container_width=True):
-        save_to_pickle(
-            "final_df_transformed.pkl", st.session_state["final_df"]
-        )
-        st.toast("💾 Saved Successfully!")