Spaces:

DavMelchi
/

db_query

Running

File size: 7,491 Bytes

a64569a

import pandas as pd


class GsmAnalysis:
    hf_rate_coef = {
        10: 1.1,
        20: 1.2,
        40: 1.4,
        60: 1.6,
        70: 1.7,
        80: 1.8,
        99: 2.0,
        100: 1.4,
    }
    erlangB_table = {
        1: 0.0204,
        2: 0.2234,
        3: 0.6022,
        4: 1.092,
        5: 1.657,
        6: 2.276,
        7: 2.935,
        8: 3.627,
        9: 4.345,
        10: 5.084,
        11: 5.841,
        12: 6.614,
        13: 7.401,
        14: 8.2,
        15: 9.009,
        16: 9.828,
        17: 10.66,
        18: 11.49,
        19: 12.33,
        20: 13.18,
        21: 14.04,
        22: 14.9,
        23: 15.76,
        24: 16.63,
        25: 17.5,
        26: 18.38,
        27: 19.26,
        28: 20.15,
        29: 21.04,
        30: 21.93,
        31: 22.83,
        32: 23.72,
        33: 24.63,
        34: 25.53,
        35: 26.43,
        36: 27.34,
        37: 28.25,
        38: 29.17,
        39: 30.08,
        40: 31,
        41: 31.91,
        42: 32.84,
        43: 33.76,
        44: 34.68,
        45: 35.61,
        46: 36.53,
        47: 37.46,
        48: 38.39,
        49: 39.32,
        50: 40.25,
        51: 41.19,
        52: 42.12,
        53: 43.06,
        54: 44,
        55: 44.93,
        56: 45.88,
        57: 46.81,
        58: 47.75,
        59: 48.7,
        60: 49.64,
        61: 50.59,
        62: 51.53,
        63: 52.48,
        64: 53.43,
        65: 54.38,
        66: 55.32,
        67: 56.27,
        68: 57.22,
        69: 58.18,
        70: 59.13,
        71: 60.08,
        72: 61.04,
        73: 61.99,
        74: 62.94,
        75: 63.9,
        76: 64.86,
        77: 65.81,
        78: 66.77,
        79: 67.73,
        80: 68.69,
        81: 69.64,
        82: 70.61,
        83: 71.57,
        84: 72.53,
        85: 73.49,
        86: 74.45,
        87: 75.41,
        88: 76.38,
        89: 77.34,
        90: 78.3,
        91: 79.27,
        92: 80.23,
        93: 81.2,
        94: 82.16,
        95: 83.13,
        96: 84.09,
        97: 85.06,
        98: 86.03,
        99: 87,
        100: 87.97,
        101: 88.94,
        102: 89.91,
        103: 90.88,
        104: 91.85,
        105: 92.82,
        106: 93.79,
        107: 94.76,
        108: 95.73,
        109: 96.71,
        110: 97.68,
        111: 98.65,
        112: 99.63,
        113: 100.6,
        114: 101.57,
        115: 102.54,
        116: 103.52,
        117: 104.49,
        118: 105.47,
        119: 106.44,
        120: 107.42,
        121: 108.4,
        122: 109.37,
        123: 110.35,
        124: 111.32,
        125: 112.3,
        126: 113.28,
        127: 114.25,
        128: 115.23,
        129: 116.21,
        130: 117.19,
        131: 118.17,
        132: 119.15,
        133: 120.12,
        134: 121.1,
        135: 122.08,
        136: 123.07,
        137: 124.04,
        138: 125.02,
        139: 126.01341,
        140: 127.00918,
        141: 127.96752,
        142: 128.98152,
        143: 129.92152,
        144: 130.88534,
        145: 131.96461,
        146: 132.89897,
        147: 133.86373,
        148: 134.82569,
        149: 135.76295,
        150: 136.82988,
        151: 137.79,
        152: 138.77,
        153: 139.75,
        154: 140.74,
        155: 141.72,
        156: 142.7,
        157: 143.69,
        158: 144.67,
        159: 145.66,
        160: 146.64,
        161: 147.63,
        162: 148.61,
        163: 149.6,
        164: 150.58,
        165: 151.57,
        166: 152.55,
        167: 153.54,
        168: 154.53,
        169: 155.51,
        170: 156.5,
        171: 157.48,
        172: 158.47,
        173: 159.46,
        174: 160.44,
        175: 161.43,
        176: 162.42,
        177: 163.41,
        178: 164.39,
        179: 165.38,
        180: 166.37,
        181: 167.36,
        182: 168.35,
        183: 169.33,
        184: 170.32,
        185: 171.31,
        186: 172.3,
        187: 173.29,
        188: 174.28,
        189: 175.27,
        190: 176.26,
        191: 177.25,
        192: 178.24,
        193: 179.23,
        194: 180.22,
        195: 181.21,
        196: 182.2,
        197: 183.19,
        198: 184.18,
        199: 185.17,
        200: 186.16,
    }


def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
    """
    Clean KPI column names by replacing special characters and standardizing format.

    Args:
        df: DataFrame with KPI column names to clean

    Returns:
        DataFrame with cleaned column names
    """
    name_df = df.copy()
    name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
    name_df.columns = name_df.columns.str.replace("___", "_")
    name_df.columns = name_df.columns.str.replace("__", "_")
    name_df.columns = name_df.columns.str.replace("%", "perc")
    name_df.columns = name_df.columns.str.rstrip("_")
    return name_df


def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
    """
    Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.

    Args:
        df: DataFrame containing PERIOD_START_TIME column

    Returns:
        DataFrame with new date column and unnecessary columns removed
    """
    date_df = df.copy()
    date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
        ".", expand=True
    )
    date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
    # Remove unnecessary columns
    date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
    return date_df


def create_hourly_date(df: pd.DataFrame):
    date_df = df
    date_df[["date_t", "hour"]] = date_df["PERIOD_START_TIME"].str.split(
        " ", expand=True
    )
    date_df[["mois", "jour", "annee"]] = date_df["date_t"].str.split(".", expand=True)
    date_df["datetime"] = (
        date_df["annee"]
        + "-"
        + date_df["mois"]
        + "-"
        + date_df["jour"]
        + " "
        + date_df["hour"]
    )

    date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]

    # Remove columns 'année' and 'mois'
    date_df = date_df.drop(
        ["annee", "mois", "jour", "date_t", "PERIOD_START_TIME"], axis=1
    )
    return date_df


def create_dfs_per_kpi(
    df: pd.DataFrame = None,
    pivot_date_column: str = "date",
    pivot_name_column: str = "BTS_name",
    kpi_columns_from: int = None,
) -> pd.DataFrame:
    """
    Create pivoted DataFrames for each KPI and perform analysis.

    Args:
        df: DataFrame containing KPI data
    Returns:
        DataFrame with combined analysis results
    """
    kpi_columns = df.columns[kpi_columns_from:]
    # print(kpi_columns)
    pivoted_kpi_dfs = {}

    # Loop through each KPI and create pivoted DataFrames
    for kpi in kpi_columns:
        temp_df = df[[pivot_date_column, pivot_name_column, kpi]].copy()
        # remove duplicates
        temp_df = temp_df.drop_duplicates(
            subset=[pivot_name_column, pivot_date_column], keep="first"
        )
        temp_df = temp_df.reset_index()
        # Pivot the dataframe
        pivot_df = temp_df.pivot(
            index=pivot_name_column, columns=pivot_date_column, values=kpi
        )
        # print(pivot_df)
        pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
        pivot_df.columns.names = ["KPI", "Date"]

        # Store in dictionary with KPI name as key
        pivoted_kpi_dfs[kpi] = pivot_df

    return pivoted_kpi_dfs