Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on May 3

Commit

bd3da99

1 Parent(s): 9bd7cb4

Adding comments to 2G kpi analysis part1

Browse files

Files changed (4) hide show

apps/kpi_analysis/gsm_capacity.py +9 -7
process_kpi/process_gsm_capacity.py +159 -34
process_kpi/process_wbts_capacity.py +2 -60
utils/kpi_analysis_utils.py +68 -2

apps/kpi_analysis/gsm_capacity.py CHANGED Viewed

@@ -15,9 +15,9 @@ with doc_col:
     st.write(
         """
         The report should be run with a minimum of 3 days of data.
-        - Daily Aggregated
-        - Site level
-        - Exported in CSV format.
         """
     )
@@ -73,7 +73,7 @@ if (
             "TCH ABIS Fails Threshold", min_value=0, value=10
         )
     with threshold_col3:
-        sddch_blocking_threshold = st.number_input(
             "SDDCH Blocking Threshold", min_value=0.1, value=0.5
         )
     with threshold_col4:
@@ -90,15 +90,17 @@ if (
             number_of_threshold_days=number_of_threshold_days,
             availability_threshold=availability_threshold,
             tch_abis_fails_threshold=tch_abis_fails_threshold,
-            sddch_blocking_threshold=sddch_blocking_threshold,
             tch_blocking_threshold=tch_blocking_threshold,
         )
         if dfs is not None:
             gsm_analysis_df = dfs[0]
             bh_kpi_df = dfs[1]
             GsmCapacity.final_results = convert_gsm_dfs(
-                [gsm_analysis_df, bh_kpi_df], ["GSM_Analysis", "BH_KPI_Analysis"]
             )
             # GsmCapacity.final_results = convert_gsm_dfs(
@@ -115,4 +117,4 @@ if (
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                 )
-        st.write(gsm_analysis_df)

     st.write(
         """
         The report should be run with a minimum of 3 days of data.
+        - Dump file required
+        - Daily Cell level KPI report in CSV format
+        - BH Cell level KPI report in CSV format
         """
     )
             "TCH ABIS Fails Threshold", min_value=0, value=10
         )
     with threshold_col3:
+        sdcch_blocking_threshold = st.number_input(
             "SDDCH Blocking Threshold", min_value=0.1, value=0.5
         )
     with threshold_col4:
             number_of_threshold_days=number_of_threshold_days,
             availability_threshold=availability_threshold,
             tch_abis_fails_threshold=tch_abis_fails_threshold,
+            sdcch_blocking_threshold=sdcch_blocking_threshold,
             tch_blocking_threshold=tch_blocking_threshold,
         )
         if dfs is not None:
             gsm_analysis_df = dfs[0]
             bh_kpi_df = dfs[1]
+            daily_kpi_df = dfs[2]
             GsmCapacity.final_results = convert_gsm_dfs(
+                [gsm_analysis_df, bh_kpi_df, daily_kpi_df],
+                ["GSM_Analysis", "BH_KPI_Analysis", "Daily_KPI_Analysis"],
             )
             # GsmCapacity.final_results = convert_gsm_dfs(
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                 )
+        st.write(daily_kpi_df)

process_kpi/process_gsm_capacity.py CHANGED Viewed

@@ -6,6 +6,8 @@ from utils.check_sheet_exist import execute_checks_sheets_exist
 from utils.convert_to_excel import convert_dfs, save_dataframe
 from utils.kpi_analysis_utils import (
     GsmAnalysis,
     create_daily_date,
     create_dfs_per_kpi,
     create_hourly_date,
@@ -66,39 +68,89 @@ KPI_COLUMNS = [
 BH_COLUMNS_FOR_CAPACITY = [
     "Max_Traffic BH",
     "Avg_Traffic BH",
-    "Max_tch_call_blocking BH",
-    "Avg_tch_call_blocking BH",
-    "number_of_days_with_tch_blocking_exceeded",
-    "Max_sdcch_real_blocking BH",
-    "Avg_sdcch_real_blocking BH",
-    "number_of_days_with_sdcch_blocking_exceeded",
 ]
-def bh_tch_call_blocking_analysis(
     df: pd.DataFrame,
     number_of_kpi_days: int,
-    tch_blocking_threshold: int,
     number_of_threshold_days: int,
 ) -> pd.DataFrame:
     result_df = df.copy()
     last_days_df = result_df.iloc[:, -number_of_kpi_days:]
     # last_days_df = last_days_df.fillna(0)
-    result_df["Avg_tch_call_blocking BH"] = last_days_df.mean(axis=1).round(2)
-    result_df["Max_tch_call_blocking BH"] = last_days_df.max(axis=1)
     # Count the number of days above threshold
-    result_df["number_of_days_with_tch_blocking_exceeded"] = last_days_df.apply(
-        lambda row: sum(1 for x in row if x >= tch_blocking_threshold), axis=1
     )
     return result_df
-def bh_sdcch_call_blocking_analysis(
     df: pd.DataFrame,
     number_of_kpi_days: int,
     sdcch_blocking_threshold: int,
     number_of_threshold_days: int,
 ) -> pd.DataFrame:
@@ -106,12 +158,29 @@ def bh_sdcch_call_blocking_analysis(
     last_days_df = result_df.iloc[:, -number_of_kpi_days:]
     # last_days_df = last_days_df.fillna(0)
-    result_df["Avg_sdcch_real_blocking BH"] = last_days_df.mean(axis=1).round(2)
-    result_df["Max_sdcch_real_blocking BH"] = last_days_df.max(axis=1)
     # Count the number of days above threshold
-    result_df["number_of_days_with_sdcch_blocking_exceeded"] = last_days_df.apply(
         lambda row: sum(1 for x in row if x >= sdcch_blocking_threshold), axis=1
     )
     return result_df
@@ -164,18 +233,20 @@ def bh_dfs_per_kpi(
     # ANALISYS
-    tch_call_blocking_df = bh_tch_call_blocking_analysis(
         df=tch_call_blocking_df,
         number_of_kpi_days=number_of_kpi_days,
-        tch_blocking_threshold=tch_blocking_threshold,
         number_of_threshold_days=number_of_threshold_days,
     )
-    sdcch_real_blocking_df = bh_sdcch_call_blocking_analysis(
         df=sdcch_real_blocking_df,
         number_of_kpi_days=number_of_kpi_days,
         sdcch_blocking_threshold=sdcch_blocking_threshold,
         number_of_threshold_days=number_of_threshold_days,
     )
     Carried_Traffic_df = bh_traffic_analysis(
@@ -183,9 +254,6 @@ def bh_dfs_per_kpi(
         number_of_kpi_days=number_of_kpi_days,
     )
-    # Carried_Traffic_df["Max_Traffic BH"] = Carried_Traffic_df.max(axis=1)
-    # Carried_Traffic_df["Avg_Traffic BH"] = Carried_Traffic_df.mean(axis=1)
     bh_kpi_df = pd.concat(
         [
             tch_availability_ratio_df,
@@ -195,8 +263,6 @@ def bh_dfs_per_kpi(
         ],
         axis=1,
     )
-    # print(Carried_Traffic_df)
     return bh_kpi_df
@@ -216,7 +282,6 @@ def analyse_bh_data(
         number_of_kpi_days=number_of_kpi_days,
         tch_blocking_threshold=tch_blocking_threshold,
         sdcch_blocking_threshold=sdcch_blocking_threshold,
-        number_of_threshold_days=number_of_threshold_days,
     )
     bh_df_for_capacity = df.copy()
@@ -243,6 +308,8 @@ def daily_dfs_per_kpi(
     availability_threshold: int = 95,
     number_of_threshold_days: int = 3,
     tch_abis_fails_threshold: int = 10,
 ) -> pd.DataFrame:
     """
     Create pivoted DataFrames for each KPI and perform analysis.
@@ -271,6 +338,61 @@ def daily_dfs_per_kpi(
     tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
     tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
 def analyse_daily_data(
     daily_report_path: str,
@@ -278,6 +400,8 @@ def analyse_daily_data(
     tch_abis_fails_threshold: int,
     availability_threshold: int,
     number_of_threshold_days: int,
 ) -> pd.DataFrame:
     df = pd.read_csv(daily_report_path, delimiter=";")
     df = kpi_naming_cleaning(df)
@@ -289,8 +413,10 @@ def analyse_daily_data(
         availability_threshold=availability_threshold,
         tch_abis_fails_threshold=tch_abis_fails_threshold,
         number_of_threshold_days=number_of_threshold_days,
     )
-    # print(df)
 def get_gsm_databases(dump_path: str) -> pd.DataFrame:
@@ -340,19 +466,18 @@ def analyze_gsm_data(
     number_of_threshold_days: int,
     availability_threshold: int,
     tch_abis_fails_threshold: int,
-    sddch_blocking_threshold: float,
     tch_blocking_threshold: float,
 ):
-    # print("Analyzing data...")
-    # print(f"Number of days: {number_of_kpi_days}")
-    # print(f"availability_threshold: {availability_threshold}")
-    analyse_daily_data(
         daily_report_path=daily_report_path,
         number_of_kpi_days=number_of_kpi_days,
         availability_threshold=availability_threshold,
         tch_abis_fails_threshold=tch_abis_fails_threshold,
         number_of_threshold_days=number_of_threshold_days,
     )
     gsm_database_df: pd.DataFrame = get_gsm_databases(dump_path)
@@ -361,7 +486,7 @@ def analyze_gsm_data(
         bh_report_path=bh_report_path,
         number_of_kpi_days=number_of_kpi_days,
         tch_blocking_threshold=tch_blocking_threshold,
-        sdcch_blocking_threshold=sddch_blocking_threshold,
         number_of_threshold_days=number_of_threshold_days,
     )
@@ -377,7 +502,7 @@ def analyze_gsm_data(
     # Add "ERLANGB value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
     gsm_analysis_df["ErlabngB_value"] = gsm_analysis_df["Max_Traffic BH"] / (
-        1 - (gsm_analysis_df["Max_tch_call_blocking BH"] / 200)
     )
     # - Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
@@ -405,4 +530,4 @@ def analyze_gsm_data(
         gsm_analysis_df["Target TRXs"] - gsm_analysis_df["number_trx_per_cell"]
     )
-    return [gsm_analysis_df, bh_kpi_full_df]

 from utils.convert_to_excel import convert_dfs, save_dataframe
 from utils.kpi_analysis_utils import (
     GsmAnalysis,
+    cell_availability_analysis,
+    combine_comments,
     create_daily_date,
     create_dfs_per_kpi,
     create_hourly_date,
 BH_COLUMNS_FOR_CAPACITY = [
     "Max_Traffic BH",
     "Avg_Traffic BH",
+    "max_tch_call_blocking_bh",
+    "avg_tch_call_blocking_bh",
+    "number_of_days_with_tch_blocking_exceeded_bh",
+    "max_sdcch_real_blocking_bh",
+    "avg_sdcch_real_blocking_bh",
+    "number_of_days_with_sdcch_blocking_exceeded_bh",
 ]
+def analyze_tch_abis_fails(
     df: pd.DataFrame,
     number_of_kpi_days: int,
+    analysis_type: str,
     number_of_threshold_days: int,
+    tch_abis_fails_threshold: int,
 ) -> pd.DataFrame:
     result_df = df.copy()
     last_days_df = result_df.iloc[:, -number_of_kpi_days:]
     # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.max(axis=1)
     # Count the number of days above threshold
+    result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"] = (
+        last_days_df.apply(
+            lambda row: sum(1 for x in row if x >= tch_abis_fails_threshold), axis=1
+        )
     )
+    # Add the daily_tch_comment : if number_of_days_with_tch_abis_fail_exceeded_daily is >= number_of_threshold_days : tch abis fail exceeded treshold , else : None
+    result_df[f"tch_abis_fail_{analysis_type.lower()}_comment"] = np.where(
+        result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"]
+        >= number_of_threshold_days,
+        "tch abis fail exceeded treshold",
+        None,
+    )
     return result_df
+def analyze_tch_call_blocking(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    analysis_type: str,
+    number_of_threshold_days: int,
+    tch_blocking_threshold: int,
+) -> pd.DataFrame:
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.max(
+        axis=1
+    )
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"] = (
+        last_days_df.apply(
+            lambda row: sum(1 for x in row if x >= tch_blocking_threshold), axis=1
+        )
+    )
+    # Add the daily_tch_comment : if number_of_days_with_tch_blocking_exceeded_daily is >= number_of_threshold_days : tch blocking exceeded treshold , else : None
+    result_df[f"tch_call_blocking_{analysis_type.lower()}_comment"] = np.where(
+        result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"]
+        >= number_of_threshold_days,
+        "TCH blocking exceeded threshold",
+        None,
+    )
+    return result_df
+def analyze_sdcch_call_blocking(
     df: pd.DataFrame,
     number_of_kpi_days: int,
     sdcch_blocking_threshold: int,
+    analysis_type: str,
     number_of_threshold_days: int,
 ) -> pd.DataFrame:
     last_days_df = result_df.iloc[:, -number_of_kpi_days:]
     # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.max(
+        axis=1
+    )
     # Count the number of days above threshold
+    result_df[
+        f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
+    ] = last_days_df.apply(
         lambda row: sum(1 for x in row if x >= sdcch_blocking_threshold), axis=1
     )
+    # add daily_sdcch_comment : if number_of_days_with_sdcch_blocking_exceeded_daily is >= number_of_threshold_days : sdcch blocking exceeded treshold , else : None
+    result_df[f"sdcch_real_blocking_{analysis_type.lower()}_comment"] = np.where(
+        result_df[
+            f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
+        ]
+        >= number_of_threshold_days,
+        "SDCCH blocking exceeded threshold",
+        None,
+    )
     return result_df
     # ANALISYS
+    tch_call_blocking_df = analyze_tch_call_blocking(
         df=tch_call_blocking_df,
         number_of_kpi_days=number_of_kpi_days,
         number_of_threshold_days=number_of_threshold_days,
+        tch_blocking_threshold=tch_blocking_threshold,
+        analysis_type="BH",
     )
+    sdcch_real_blocking_df = analyze_sdcch_call_blocking(
         df=sdcch_real_blocking_df,
         number_of_kpi_days=number_of_kpi_days,
         sdcch_blocking_threshold=sdcch_blocking_threshold,
         number_of_threshold_days=number_of_threshold_days,
+        analysis_type="BH",
     )
     Carried_Traffic_df = bh_traffic_analysis(
         number_of_kpi_days=number_of_kpi_days,
     )
     bh_kpi_df = pd.concat(
         [
             tch_availability_ratio_df,
         ],
         axis=1,
     )
     return bh_kpi_df
         number_of_kpi_days=number_of_kpi_days,
         tch_blocking_threshold=tch_blocking_threshold,
         sdcch_blocking_threshold=sdcch_blocking_threshold,
     )
     bh_df_for_capacity = df.copy()
     availability_threshold: int = 95,
     number_of_threshold_days: int = 3,
     tch_abis_fails_threshold: int = 10,
+    sdcch_blocking_threshold: int = 0.5,
+    tch_blocking_threshold: int = 0.5,
 ) -> pd.DataFrame:
     """
     Create pivoted DataFrames for each KPI and perform analysis.
     tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
     tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
+    tch_availability_ratio_df = cell_availability_analysis(
+        df=tch_availability_ratio_df,
+        days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+    )
+    sdcch_real_blocking_df = analyze_sdcch_call_blocking(
+        df=sdcch_real_blocking_df,
+        number_of_kpi_days=number_of_kpi_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        analysis_type="Daily",
+    )
+    tch_call_blocking_df = analyze_tch_call_blocking(
+        df=tch_call_blocking_df,
+        number_of_kpi_days=number_of_kpi_days,
+        number_of_threshold_days=number_of_threshold_days,
+        tch_blocking_threshold=tch_blocking_threshold,
+        analysis_type="Daily",
+    )
+    tch_abis_fails_df = analyze_tch_abis_fails(
+        df=tch_abis_fails_df,
+        number_of_kpi_days=number_of_kpi_days,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        analysis_type="Daily",
+    )
+    daily_kpi_df = pd.concat(
+        [
+            tch_availability_ratio_df,
+            Carried_Traffic_df,
+            tch_call_blocking_df,
+            sdcch_real_blocking_df,
+            tch_abis_fails_df,
+        ],
+        axis=1,
+    )
+    daily_kpi_df = combine_comments(
+        daily_kpi_df,
+        "availability_comment",
+        "tch_abis_fail_daily_comment",
+        "sdcch_real_blocking_daily_comment",
+        new_column="sdcch_comments",
+    )
+    daily_kpi_df = combine_comments(
+        daily_kpi_df,
+        "availability_comment",
+        "tch_abis_fail_daily_comment",
+        "tch_call_blocking_daily_comment",
+        new_column="tch_comments",
+    )
+    return daily_kpi_df
 def analyse_daily_data(
     daily_report_path: str,
     tch_abis_fails_threshold: int,
     availability_threshold: int,
     number_of_threshold_days: int,
+    sdcch_blocking_threshold: int,
+    tch_blocking_threshold: int,
 ) -> pd.DataFrame:
     df = pd.read_csv(daily_report_path, delimiter=";")
     df = kpi_naming_cleaning(df)
         availability_threshold=availability_threshold,
         tch_abis_fails_threshold=tch_abis_fails_threshold,
         number_of_threshold_days=number_of_threshold_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        tch_blocking_threshold=tch_blocking_threshold,
     )
+    return df
 def get_gsm_databases(dump_path: str) -> pd.DataFrame:
     number_of_threshold_days: int,
     availability_threshold: int,
     tch_abis_fails_threshold: int,
+    sdcch_blocking_threshold: float,
     tch_blocking_threshold: float,
 ):
+    daily_kpi_df: pd.DataFrame = analyse_daily_data(
         daily_report_path=daily_report_path,
         number_of_kpi_days=number_of_kpi_days,
         availability_threshold=availability_threshold,
         tch_abis_fails_threshold=tch_abis_fails_threshold,
         number_of_threshold_days=number_of_threshold_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        tch_blocking_threshold=tch_blocking_threshold,
     )
     gsm_database_df: pd.DataFrame = get_gsm_databases(dump_path)
         bh_report_path=bh_report_path,
         number_of_kpi_days=number_of_kpi_days,
         tch_blocking_threshold=tch_blocking_threshold,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
         number_of_threshold_days=number_of_threshold_days,
     )
     # Add "ERLANGB value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
     gsm_analysis_df["ErlabngB_value"] = gsm_analysis_df["Max_Traffic BH"] / (
+        1 - (gsm_analysis_df["max_tch_call_blocking_bh"] / 200)
     )
     # - Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
         gsm_analysis_df["Target TRXs"] - gsm_analysis_df["number_trx_per_cell"]
     )
+    return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_df]

process_kpi/process_wbts_capacity.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import pandas as pd
 from utils.kpi_analysis_utils import (
     create_daily_date,
     create_dfs_per_kpi,
     kpi_naming_cleaning,
@@ -78,39 +80,6 @@ def max_used_bb_subunits_analysis(
     return result_df
-def cell_availability_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
-    """
-    Analyze cell availability and categorize sites based on availability metrics.
-    Args:
-        df: DataFrame containing cell availability data
-        days: Number of days to analyze
-    Returns:
-        DataFrame with availability analysis and site status comments
-    """
-    result_df = df.copy().fillna(0)
-    last_days_df = result_df.iloc[:, -days:]
-    result_df["Average_cell_availability"] = last_days_df.mean(axis=1).round(2)
-    # Categorize sites based on availability
-    def categorize_availability(x: float) -> str:
-        if x == 0 or pd.isnull(x):
-            return "Down Site"
-        elif 0 < x <= 70:
-            return "critical instability"
-        elif 70 < x <= 95:
-            return "instability"
-        else:
-            return "Site Ok"
-    result_df["availability_comment"] = result_df["Average_cell_availability"].apply(
-        categorize_availability
-    )
-    return result_df
 def max_used_ce_analysis(
     df: pd.DataFrame,
     days: int = 7,
@@ -190,33 +159,6 @@ def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
     return result_df
-def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
-    """
-    Combine comments from multiple columns into one column.
-    Args:
-        df: DataFrame containing comment columns
-        *columns: Variable number of column names containing comments
-        new_column: Name for the new combined comments column
-    Returns:
-        DataFrame with a new column containing combined comments
-    """
-    result_df = df.copy()
-    result_df[new_column] = result_df[list(columns)].apply(
-        lambda row: ", ".join([x for x in row if x]), axis=1
-    )
-    # Trim all trailing commas
-    result_df[new_column] = result_df[new_column].str.replace(
-        r"^[,\s]+|[,\s]+$", "", regex=True
-    )
-    # Replace multiple commas with a single comma
-    result_df[new_column] = result_df[new_column].str.replace(
-        r",\s*,", ", ", regex=True
-    )
-    return result_df
 def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
     """
     Combine baseband related comments into a single column.

 import pandas as pd
 from utils.kpi_analysis_utils import (
+    cell_availability_analysis,
+    combine_comments,
     create_daily_date,
     create_dfs_per_kpi,
     kpi_naming_cleaning,
     return result_df
 def max_used_ce_analysis(
     df: pd.DataFrame,
     days: int = 7,
     return result_df
 def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
     """
     Combine baseband related comments into a single column.

utils/kpi_analysis_utils.py CHANGED Viewed

@@ -216,6 +216,33 @@ class GsmAnalysis:
     }
 def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
     """
     Clean KPI column names by replacing special characters and standardizing format.
@@ -295,7 +322,7 @@ def create_dfs_per_kpi(
         DataFrame with combined analysis results
     """
     kpi_columns = df.columns[kpi_columns_from:]
-    # print(kpi_columns)
     pivoted_kpi_dfs = {}
     # Loop through each KPI and create pivoted DataFrames
@@ -310,7 +337,6 @@ def create_dfs_per_kpi(
         pivot_df = temp_df.pivot(
             index=pivot_name_column, columns=pivot_date_column, values=kpi
         )
-        # print(pivot_df)
         pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
         pivot_df.columns.names = ["KPI", "Date"]
@@ -318,3 +344,43 @@ def create_dfs_per_kpi(
         pivoted_kpi_dfs[kpi] = pivot_df
     return pivoted_kpi_dfs

     }
+def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
+    """
+    Combine comments from multiple columns into one column.
+    Args:
+        df: DataFrame containing comment columns
+        *columns: Variable number of column names containing comments
+        new_column: Name for the new combined comments column
+    Returns:
+        DataFrame with a new column containing combined comments
+    """
+    result_df = df.copy()
+    result_df[new_column] = result_df[list(columns)].apply(
+        lambda row: ", ".join([x for x in row if x]), axis=1
+    )
+    # Trim all trailing commas
+    result_df[new_column] = result_df[new_column].str.replace(
+        r"^[,\s]+|[,\s]+$", "", regex=True
+    )
+    # Replace multiple commas with a single comma
+    result_df[new_column] = result_df[new_column].str.replace(
+        r",\s*,", ", ", regex=True
+    )
+    return result_df
 def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
     """
     Clean KPI column names by replacing special characters and standardizing format.
         DataFrame with combined analysis results
     """
     kpi_columns = df.columns[kpi_columns_from:]
     pivoted_kpi_dfs = {}
     # Loop through each KPI and create pivoted DataFrames
         pivot_df = temp_df.pivot(
             index=pivot_name_column, columns=pivot_date_column, values=kpi
         )
         pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
         pivot_df.columns.names = ["KPI", "Date"]
         pivoted_kpi_dfs[kpi] = pivot_df
     return pivoted_kpi_dfs
+def cell_availability_analysis(
+    df: pd.DataFrame, days: int = 7, availability_threshold: int = 95
+) -> pd.DataFrame:
+    """
+    Analyze cell availability and categorize sites based on availability metrics.
+    Args:
+        df: DataFrame containing cell availability data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with availability analysis and site status comments
+    """
+    result_df = df.copy().fillna(0)
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["Average_cell_availability"] = last_days_df.mean(axis=1).round(2)
+    # Count the number of days above threshold
+    result_df["number_of_days_exceeding_threshold"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x <= availability_threshold), axis=1
+    )
+    # Categorize sites based on availability
+    def categorize_availability(x: float) -> str:
+        if x == 0 or pd.isnull(x):
+            return "Down Site"
+        elif 0 < x <= 70:
+            return "critical instability"
+        elif 70 < x <= availability_threshold:
+            return "instability"
+        else:
+            return "Availability OK"
+    result_df["availability_comment"] = result_df["Average_cell_availability"].apply(
+        categorize_availability
+    )
+    return result_df