Spaces:

DavMelchi
/

db_query

Sleeping

App Files Files Community

DavMelchi commited on May 24

Commit

d1de5db

1 Parent(s): 5a8534e

Lte capacity V2

Browse files

Files changed (4) hide show

apps/kpi_analysis/lte_capacity.py +45 -6
process_kpi/process_lte_capacity.py +84 -12
utils/convert_to_excel.py +31 -9
utils/kpi_analysis_utils.py +13 -8

apps/kpi_analysis/lte_capacity.py CHANGED Viewed

@@ -34,6 +34,7 @@ with file2:
 # Parameters
 param_col1, param_col2 = st.columns(2)
 param_col3, param_col4 = st.columns(2)
 with param_col1:
     num_last_days = st.number_input(
@@ -55,12 +56,23 @@ with param_col4:
         "PRB usage threshold (%)", value=80.0, min_value=0.0, max_value=100.0
     )
-prb_diff_between_cells = st.number_input(
-    "Maximum PRB usage difference between cells (%)",
-    value=20.0,
-    min_value=0.0,
-    max_value=100.0,
-)
 if uploaded_dump is not None and uploaded_bh_report is not None:
     if st.button("Analyze Data", type="primary"):
@@ -73,6 +85,7 @@ if uploaded_dump is not None and uploaded_bh_report is not None:
                 availability_threshold=availability_threshold,
                 prb_usage_threshold=prb_usage_threshold,
                 prb_diff_between_cells_threshold=prb_diff_between_cells,
             )
         if results is not None:
             bh_report: pd.DataFrame = results[0]
@@ -205,3 +218,29 @@ if uploaded_dump is not None and uploaded_bh_report is not None:
                     texttemplate="%{value}", textfont_size=15, textposition="outside"
                 )
                 st.plotly_chart(fig, use_container_width=True)

 # Parameters
 param_col1, param_col2 = st.columns(2)
 param_col3, param_col4 = st.columns(2)
+param_col5, param_col6 = st.columns(2)
 with param_col1:
     num_last_days = st.number_input(
         "PRB usage threshold (%)", value=80.0, min_value=0.0, max_value=100.0
     )
+with param_col5:
+    prb_diff_between_cells = st.number_input(
+        "Maximum PRB usage difference between cells (%)",
+        value=20.0,
+        min_value=0.0,
+        max_value=100.0,
+    )
+with param_col6:
+    #     DL PRB Util p TTI Lev_10
+    # E-UTRAN Avg PRB usage per TTI DL
+    main_prb_to_use = st.selectbox(
+        "Main PRB to use",
+        ["DL PRB Util p TTI Lev_10", "E-UTRAN Avg PRB usage per TTI DL"],
+        index=1,
+    )
 if uploaded_dump is not None and uploaded_bh_report is not None:
     if st.button("Analyze Data", type="primary"):
                 availability_threshold=availability_threshold,
                 prb_usage_threshold=prb_usage_threshold,
                 prb_diff_between_cells_threshold=prb_diff_between_cells,
+                main_prb_to_use=main_prb_to_use,
             )
         if results is not None:
             bh_report: pd.DataFrame = results[0]
                     texttemplate="%{value}", textfont_size=15, textposition="outside"
                 )
                 st.plotly_chart(fig, use_container_width=True)
+            # create a map plot with scatter_map with code ,Longitude,Latitude,final_comments
+            st.markdown("***")
+            st.markdown(":blue[**Final comments distribution**]")
+            map_df = lte_analysis_df[
+                ["code", "Longitude", "Latitude", "final_comments"]
+            ].dropna(subset=["code", "Longitude", "Latitude", "final_comments"])
+            # add size column equalt to 20
+            map_df["size"] = 20
+            fig = px.scatter_map(
+                map_df,
+                lat="Latitude",
+                lon="Longitude",
+                color="final_comments",
+                size="size",
+                zoom=10,
+                height=600,
+                title="Final comments distribution",
+                hover_data={
+                    "code": True,
+                    "final_comments": True,
+                },
+                hover_name="code",
+            )
+            fig.update_layout(mapbox_style="open-street-map")
+            st.plotly_chart(fig, use_container_width=True)

process_kpi/process_lte_capacity.py CHANGED Viewed

@@ -17,6 +17,8 @@ LTE_ANALYSIS_COLUMNS = [
     "code_sector",
     "Region",
     "site_config_band",
     "LNCEL_name_l800",
     "LNCEL_name_l1800",
     "LNCEL_name_l2300",
@@ -27,6 +29,26 @@ LTE_ANALYSIS_COLUMNS = [
     "avg_prb_usage_bh_l2300",
     "avg_prb_usage_bh_l2600",
     "avg_prb_usage_bh_l1800s",
     "num_congested_cells",
     "num_cells",
     "num_cell_with_kpi",
@@ -42,6 +64,8 @@ LTE_DATABASE_COLUMNS = [
     "Region",
     "site_config_band",
     "final_name",
 ]
 KPI_COLUMNS = [
@@ -50,11 +74,17 @@ KPI_COLUMNS = [
     "Cell_Avail_excl_BLU",
     "E_UTRAN_Avg_PRB_usage_per_TTI_DL",
     "DL_PRB_Util_p_TTI_Lev_10",
 ]
 PRB_COLUMNS = [
     "LNCEL_name",
     "avg_prb_usage_bh",
-    # "avg_prb_usage_bh_lev_10",
 ]
@@ -221,7 +251,9 @@ def lte_analysis_logic(
 def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
     # Base DataFrame with unique codes, Region, and site_config_band
-    all_codes_df = df[["code", "Region", "site_config_band"]].drop_duplicates()
     # Configuration for sector groups and their respective LNCEL patterns and column suffixes
     # Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
@@ -265,11 +297,23 @@ def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
             # Select relevant columns and rename them for the merge
             # This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
             df_to_merge = filtered_band_df[
-                ["code", "LNCEL_name", "avg_prb_usage_bh"]
             ].rename(
                 columns={
                     "LNCEL_name": f"LNCEL_name_{column_suffix}",
                     "avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
                 }
             )
@@ -282,6 +326,7 @@ def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
     # Concatenate all the processed sector DataFrames
     all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
     return all_sectors_dfs
@@ -311,6 +356,7 @@ def lte_bh_dfs_per_kpi(
     prb_usage_threshold: int = 80,
     prb_diff_between_cells_threshold: int = 20,
     number_of_threshold_days: int = 3,
 ) -> pd.DataFrame:
     # print(df.columns)
@@ -326,22 +372,46 @@ def lte_bh_dfs_per_kpi(
         days=number_of_kpi_days,
         availability_threshold=availability_threshold,
     )
-    # prb_usage_df = analyze_prb_usage(
-    #     df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
-    #     number_of_kpi_days=number_of_kpi_days,
-    #     prb_usage_threshold=prb_usage_threshold,
-    #     analysis_type="BH",
-    #     number_of_threshold_days=number_of_threshold_days,
-    # )
     prb_lev10_usage_df = analyze_prb_usage(
         df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
         number_of_kpi_days=number_of_kpi_days,
         prb_usage_threshold=prb_usage_threshold,
         analysis_type="BH",
         number_of_threshold_days=number_of_threshold_days,
     )
-    bh_kpi_df = pd.concat([cell_availability_df, prb_lev10_usage_df], axis=1)
     bh_kpi_df = bh_kpi_df.reset_index()
     prb_df = bh_kpi_df[PRB_COLUMNS]
@@ -382,6 +452,7 @@ def process_lte_bh_report(
     availability_threshold: float,
     prb_usage_threshold: float,
     prb_diff_between_cells_threshold: float,
 ) -> dict:
     """
     Process LTE Busy Hour report and perform capacity analysis
@@ -414,6 +485,7 @@ def process_lte_bh_report(
         prb_usage_threshold=prb_usage_threshold,
         prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
         number_of_threshold_days=num_threshold_days,
     )
     # save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")

     "code_sector",
     "Region",
     "site_config_band",
+    "Longitude",
+    "Latitude",
     "LNCEL_name_l800",
     "LNCEL_name_l1800",
     "LNCEL_name_l2300",
     "avg_prb_usage_bh_l2300",
     "avg_prb_usage_bh_l2600",
     "avg_prb_usage_bh_l1800s",
+    "avg_prb_usage_bh_l800_2nd",
+    "avg_prb_usage_bh_l1800_2nd",
+    "avg_prb_usage_bh_l2300_2nd",
+    "avg_prb_usage_bh_l2600_2nd",
+    "avg_prb_usage_bh_l1800s_2nd",
+    "avg_act_ues_l800",
+    "avg_act_ues_l1800",
+    "avg_act_ues_l2300",
+    "avg_act_ues_l2600",
+    "avg_act_ues_l1800s",
+    "avg_dl_thp_l800",
+    "avg_dl_thp_l1800",
+    "avg_dl_thp_l2300",
+    "avg_dl_thp_l2600",
+    "avg_dl_thp_l1800s",
+    "avg_ul_thp_l800",
+    "avg_ul_thp_l1800",
+    "avg_ul_thp_l2300",
+    "avg_ul_thp_l2600",
+    "avg_ul_thp_l1800s",
     "num_congested_cells",
     "num_cells",
     "num_cell_with_kpi",
     "Region",
     "site_config_band",
     "final_name",
+    "Longitude",
+    "Latitude",
 ]
 KPI_COLUMNS = [
     "Cell_Avail_excl_BLU",
     "E_UTRAN_Avg_PRB_usage_per_TTI_DL",
     "DL_PRB_Util_p_TTI_Lev_10",
+    "Avg_PDCP_cell_thp_UL",
+    "Avg_PDCP_cell_thp_DL",
+    "Avg_act_UEs_DL",
 ]
 PRB_COLUMNS = [
     "LNCEL_name",
     "avg_prb_usage_bh",
+    "avg_prb_usage_bh_2nd",
+    "avg_act_ues",
+    "avg_dl_thp",
+    "avg_ul_thp",
 ]
 def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
     # Base DataFrame with unique codes, Region, and site_config_band
+    all_codes_df = df[
+        ["code", "Region", "site_config_band", "Longitude", "Latitude"]
+    ].drop_duplicates()
     # Configuration for sector groups and their respective LNCEL patterns and column suffixes
     # Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
             # Select relevant columns and rename them for the merge
             # This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
             df_to_merge = filtered_band_df[
+                [
+                    "code",
+                    "LNCEL_name",
+                    "avg_prb_usage_bh",
+                    "avg_prb_usage_bh_2nd",
+                    "avg_act_ues",
+                    "avg_dl_thp",
+                    "avg_ul_thp",
+                ]
             ].rename(
                 columns={
                     "LNCEL_name": f"LNCEL_name_{column_suffix}",
                     "avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
+                    "avg_prb_usage_bh_2nd": f"avg_prb_usage_bh_{column_suffix}_2nd",
+                    "avg_act_ues": f"avg_act_ues_{column_suffix}",
+                    "avg_dl_thp": f"avg_dl_thp_{column_suffix}",
+                    "avg_ul_thp": f"avg_ul_thp_{column_suffix}",
                 }
             )
     # Concatenate all the processed sector DataFrames
     all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
+    # save_dataframe(all_sectors_dfs, "all_sectors_dfs.csv")
     return all_sectors_dfs
     prb_usage_threshold: int = 80,
     prb_diff_between_cells_threshold: int = 20,
     number_of_threshold_days: int = 3,
+    main_prb_to_use: str = "",
 ) -> pd.DataFrame:
     # print(df.columns)
         days=number_of_kpi_days,
         availability_threshold=availability_threshold,
     )
+    prb_usage_df = analyze_prb_usage(
+        df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
+        number_of_kpi_days=number_of_kpi_days,
+        prb_usage_threshold=prb_usage_threshold,
+        analysis_type="BH",
+        number_of_threshold_days=number_of_threshold_days,
+        suffix="" if main_prb_to_use == "E-UTRAN Avg PRB usage per TTI DL" else "_2nd",
+    )
     prb_lev10_usage_df = analyze_prb_usage(
         df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
         number_of_kpi_days=number_of_kpi_days,
         prb_usage_threshold=prb_usage_threshold,
         analysis_type="BH",
         number_of_threshold_days=number_of_threshold_days,
+        suffix="" if main_prb_to_use == "DL PRB Util p TTI Lev_10" else "_2nd",
+    )
+    act_ues_df = pivoted_kpi_dfs["Avg_act_UEs_DL"]
+    # Add Max and avg columns for act_ues_df
+    act_ues_df["max_act_ues"] = act_ues_df.max(axis=1)
+    act_ues_df["avg_act_ues"] = act_ues_df.mean(axis=1)
+    dl_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_DL"]
+    # Add Max and avg columns for dl_thp_df
+    dl_thp_df["max_dl_thp"] = dl_thp_df.max(axis=1)
+    dl_thp_df["avg_dl_thp"] = dl_thp_df.mean(axis=1)
+    ul_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_UL"]
+    # Add Max and avg columns for ul_thp_df
+    ul_thp_df["max_ul_thp"] = ul_thp_df.max(axis=1)
+    ul_thp_df["avg_ul_thp"] = ul_thp_df.mean(axis=1)
+    bh_kpi_df = pd.concat(
+        [
+            cell_availability_df,
+            prb_lev10_usage_df,
+            prb_usage_df,
+            act_ues_df,
+            dl_thp_df,
+            ul_thp_df,
+        ],
+        axis=1,
     )
     bh_kpi_df = bh_kpi_df.reset_index()
     prb_df = bh_kpi_df[PRB_COLUMNS]
     availability_threshold: float,
     prb_usage_threshold: float,
     prb_diff_between_cells_threshold: float,
+    main_prb_to_use: str,
 ) -> dict:
     """
     Process LTE Busy Hour report and perform capacity analysis
         prb_usage_threshold=prb_usage_threshold,
         prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
         number_of_threshold_days=num_threshold_days,
+        main_prb_to_use=main_prb_to_use,
     )
     # save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")

utils/convert_to_excel.py CHANGED Viewed

@@ -149,16 +149,38 @@ def get_format_map_by_format_type(formats: dict, format_type: str) -> dict:
             "code_sector": formats["blue"],
             "Region": formats["blue"],
             "site_config_band": formats["blue"],
-            "LNCEL_name_l800": formats["beurre"],
-            "LNCEL_name_l1800": formats["purple5"],
-            "LNCEL_name_l2300": formats["purple6"],
-            "LNCEL_name_l2600": formats["blue_light"],
-            "LNCEL_name_l1800s": formats["gray"],
             "avg_prb_usage_bh_l800": formats["beurre"],
-            "avg_prb_usage_bh_l1800": formats["purple5"],
-            "avg_prb_usage_bh_l2300": formats["purple6"],
-            "avg_prb_usage_bh_l2600": formats["blue_light"],
-            "avg_prb_usage_bh_l1800s": formats["gray"],
             "num_congested_cells": formats["orange"],
             "num_cells": formats["orange"],
             "num_cell_with_kpi": formats["orange"],

             "code_sector": formats["blue"],
             "Region": formats["blue"],
             "site_config_band": formats["blue"],
+            "Longitude": formats["blue"],
+            "Latitude": formats["blue"],
+            # "LNCEL_name_l800": formats["beurre"],
+            # "LNCEL_name_l1800": formats["purple5"],
+            # "LNCEL_name_l2300": formats["purple6"],
+            # "LNCEL_name_l2600": formats["blue_light"],
+            # "LNCEL_name_l1800s": formats["gray"],
             "avg_prb_usage_bh_l800": formats["beurre"],
+            "avg_prb_usage_bh_l1800": formats["beurre"],
+            "avg_prb_usage_bh_l2300": formats["beurre"],
+            "avg_prb_usage_bh_l2600": formats["beurre"],
+            "avg_prb_usage_bh_l1800s": formats["beurre"],
+            "avg_prb_usage_bh_l800_2nd": formats["purple5"],
+            "avg_prb_usage_bh_l1800_2nd": formats["purple5"],
+            "avg_prb_usage_bh_l2300_2nd": formats["purple5"],
+            "avg_prb_usage_bh_l2600_2nd": formats["purple5"],
+            "avg_prb_usage_bh_l1800s_2nd": formats["purple5"],
+            "avg_act_ues_l800": formats["purple6"],
+            "avg_act_ues_l1800": formats["purple6"],
+            "avg_act_ues_l2300": formats["purple6"],
+            "avg_act_ues_l2600": formats["purple6"],
+            "avg_act_ues_l1800s": formats["purple6"],
+            "avg_dl_thp_l800": formats["blue_light"],
+            "avg_dl_thp_l1800": formats["blue_light"],
+            "avg_dl_thp_l2300": formats["blue_light"],
+            "avg_dl_thp_l2600": formats["blue_light"],
+            "avg_dl_thp_l1800s": formats["blue_light"],
+            "avg_ul_thp_l800": formats["gray"],
+            "avg_ul_thp_l1800": formats["gray"],
+            "avg_ul_thp_l2300": formats["gray"],
+            "avg_ul_thp_l2600": formats["gray"],
+            "avg_ul_thp_l1800s": formats["gray"],
             "num_congested_cells": formats["orange"],
             "num_cells": formats["orange"],
             "num_cell_with_kpi": formats["orange"],

utils/kpi_analysis_utils.py CHANGED Viewed

@@ -561,25 +561,30 @@ def analyze_prb_usage(
     prb_usage_threshold: int,
     analysis_type: str,
     number_of_threshold_days: int,
 ) -> pd.DataFrame:
     result_df = df.copy()
     last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
     # last_days_df = last_days_df.fillna(0)
-    result_df[f"avg_prb_usage_{analysis_type.lower()}"] = last_days_df.mean(
         axis=1
     ).round(2)
-    result_df[f"max_prb_usage_{analysis_type.lower()}"] = last_days_df.max(axis=1)
     # Count the number of days above threshold
-    result_df[f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}"] = (
-        last_days_df.apply(
-            lambda row: sum(1 for x in row if x >= prb_usage_threshold), axis=1
-        )
     )
     # Add the daily_prb_comment : if number_of_days_with_prb_usage_exceeded_daily is >= number_of_threshold_days : prb usage exceeded threshold , else : None
-    result_df[f"prb_usage_{analysis_type.lower()}_comment"] = np.where(
-        result_df[f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}"]
         >= number_of_threshold_days,
         "PRB usage exceeded threshold",
         None,

     prb_usage_threshold: int,
     analysis_type: str,
     number_of_threshold_days: int,
+    suffix: str = "",
 ) -> pd.DataFrame:
     result_df = df.copy()
     last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
     # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.mean(
         axis=1
     ).round(2)
+    result_df[f"max_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.max(
+        axis=1
+    )
     # Count the number of days above threshold
+    result_df[
+        f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
+    ] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= prb_usage_threshold), axis=1
     )
     # Add the daily_prb_comment : if number_of_days_with_prb_usage_exceeded_daily is >= number_of_threshold_days : prb usage exceeded threshold , else : None
+    result_df[f"prb_usage_{analysis_type.lower()}{suffix}_comment"] = np.where(
+        result_df[
+            f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
+        ]
         >= number_of_threshold_days,
         "PRB usage exceeded threshold",
         None,