Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on Mar 22

Commit

13a5750

1 Parent(s): 53acc57

Adding WBTS KPI analysis

Browse files

Files changed (5) hide show

app.py +6 -0
apps/kpi_analysis/wbts_capacty.py +107 -0
assets/wbts_capacity.png +0 -0
process_kpi/process_wbts_capacity.py +422 -0
utils/convert_to_excel.py +1 -1

app.py CHANGED Viewed

@@ -27,6 +27,12 @@ pages = {
         ),
         st.Page("apps/import_physical_db.py", title="🌏Physical Database Verification"),
     ],
     "Documentations": [
         st.Page("documentations/database_doc.py", title="📚Databases Documentation"),
         st.Page("documentations/core_dump_doc.py", title="📗Dump core Documentation"),

         ),
         st.Page("apps/import_physical_db.py", title="🌏Physical Database Verification"),
     ],
+    "KPI Analysis": [
+        st.Page(
+            "apps/kpi_analysis/wbts_capacty.py",
+            title=" 📊 WBTS Capacity BB and CE Analysis",
+        ),
+    ],
     "Documentations": [
         st.Page("documentations/database_doc.py", title="📚Databases Documentation"),
         st.Page("documentations/core_dump_doc.py", title="📗Dump core Documentation"),

apps/kpi_analysis/wbts_capacty.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from process_kpi.process_wbts_capacity import WbtsCapacity, load_data
+from utils.convert_to_excel import convert_dfs
+# Streamlit UI
+st.title(" 📊 WBTS Capacity Analysis")
+doc_col, image_col = st.columns(2)
+with doc_col:
+    st.write(
+        """This app allows you to analyze the capacity of WBTSs in a network.
+        It provides insights into the utilization of BB and CE resources,
+        helping you identify potential capacity issues and plan for upgrades.
+        The report should be run with a minimum of 3 days of data.
+        - Daily Aggregated
+        - WBTS level
+        - Exported in CSV format.
+        """
+    )
+with image_col:
+    st.image("./assets/wbts_capacity.png")
+uploaded_file = st.file_uploader(
+    "Upload WBTS capacity report in CSV format", type="csv"
+)
+col1, col2, col3 = st.columns(3)
+if uploaded_file is not None:
+    WbtsCapacity.final_results = None
+    with col1:
+        num_days = st.number_input(
+            "Number of days for analysis",
+            min_value=3,
+            max_value=30,
+            value=7,
+        )
+    with col2:
+        number_of_threshold_days = st.number_input(
+            "Number of days for threshold",
+            min_value=1,
+            max_value=30,
+            value=3,
+        )
+    with col3:
+        threshold = st.number_input("Threshold", min_value=1, max_value=100, value=80)
+    if st.button("Analyze Data", type="primary"):
+        try:
+            df = load_data(uploaded_file, num_days, threshold, number_of_threshold_days)
+            WbtsCapacity.final_results = convert_dfs([df], ["WBTS_Analysis"])
+            if WbtsCapacity.final_results is not None:
+                st.download_button(
+                    on_click="ignore",
+                    type="primary",
+                    label="Download the Analysis Report",
+                    data=WbtsCapacity.final_results,
+                    file_name="WBTS_Analysis_Report.xlsx",
+                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                )
+            st.write(df)
+            # BB comments analysis and visualization
+            bb_comments_df = df["bb_comments"].value_counts().reset_index()
+            bb_comments_df.columns = ["bb_comments", "count"]
+            bb_col1, bb_col2 = st.columns(2)
+            with bb_col1:
+                st.write(bb_comments_df)
+            # BB comments chart
+            fig = px.bar(
+                bb_comments_df,
+                x="bb_comments",
+                y="count",
+                title="BB Comments Distribution",
+            )
+            fig.update_traces(texttemplate="%{value}", textposition="outside")
+            with bb_col2:
+                st.plotly_chart(fig)
+            # CE comments analysis and visualization
+            ce_comments_df = df["ce_comments"].value_counts().reset_index()
+            ce_comments_df.columns = ["ce_comments", "count"]
+            ce_col1, ce_col2 = st.columns(2)
+            with ce_col1:
+                st.write(ce_comments_df)
+            # CE comments chart
+            fig = px.bar(
+                ce_comments_df,
+                x="ce_comments",
+                y="count",
+                title="CE Comments Distribution",
+            )
+            fig.update_traces(texttemplate="%{value}", textposition="outside")
+            with ce_col2:
+                st.plotly_chart(fig)
+        except Exception as e:
+            st.error(f"An error occurred. Error: {e}")

assets/wbts_capacity.png ADDED Viewed

process_kpi/process_wbts_capacity.py ADDED Viewed

	@@ -0,0 +1,422 @@

+import pandas as pd
+class WbtsCapacity:
+    final_results: pd.DataFrame = None
+def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str:
+    """
+    Check if any value in the row deviates more than max_diff from the most common value.
+    Args:
+        row: Series of values to check for deviation
+        max_diff: Maximum allowed difference from the most common value
+        type: Type identifier for the deviation message
+    Returns:
+        A message indicating deviation if found, otherwise an empty string
+    """
+    numeric_row = row.astype(float)  # Ensure numeric
+    mode_series = numeric_row.mode()
+    # Safe fallback in case mode is empty
+    most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0]
+    diffs = abs(numeric_row - most_common)
+    if (diffs > max_diff).any():
+        return f"{type} Deviation > {max_diff} detected"
+    else:
+        return ""
+def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
+    Args:
+        df: DataFrame containing PERIOD_START_TIME column
+    Returns:
+        DataFrame with new date column and unnecessary columns removed
+    """
+    date_df = df.copy()
+    date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
+        ".", expand=True
+    )
+    date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
+    # Remove unnecessary columns
+    date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
+    return date_df
+def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Clean KPI column names by replacing special characters and standardizing format.
+    Args:
+        df: DataFrame with KPI column names to clean
+    Returns:
+        DataFrame with cleaned column names
+    """
+    name_df = df.copy()
+    name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
+    name_df.columns = name_df.columns.str.replace("___", "_")
+    name_df.columns = name_df.columns.str.replace("__", "_")
+    name_df.columns = name_df.columns.str.replace("%", "perc")
+    name_df.columns = name_df.columns.str.rstrip("_")
+    return name_df
+def create_wbts_index(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a custom index by combining date and DN columns.
+    Args:
+        df: DataFrame containing date and DN columns
+    Returns:
+        DataFrame with new custom_index column
+    """
+    wbts_index_df = df.copy()
+    wbts_index_df["custom_index"] = wbts_index_df["date"] + "_" + wbts_index_df["DN"]
+    return wbts_index_df
+def max_used_bb_subunits_analysis(
+    df: pd.DataFrame,
+    days: int = 7,
+    threshold: int = 80,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    """
+    Analyze maximum used baseband subunits and identify sites needing upgrades.
+    Args:
+        df: DataFrame containing baseband utilization data
+        days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with analysis results and upgrade recommendations
+    """
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -days:]
+    last_days_df = last_days_df.fillna(0)
+    result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2)
+    # Count the number of days above threshold
+    result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= threshold), axis=1
+    )
+    # Initialize comment column
+    result_df["Average_used_bb_ratio_comment"] = ""
+    # Apply condition for upgrade recommendation
+    result_df.loc[
+        (result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
+        & (result_df["Average_used_bb_ratio"] >= threshold),
+        "Average_used_bb_ratio_comment",
+    ] = "need BB upgrade"
+    return result_df
+def cell_availability_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
+    """
+    Analyze cell availability and categorize sites based on availability metrics.
+    Args:
+        df: DataFrame containing cell availability data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with availability analysis and site status comments
+    """
+    result_df = df.copy().fillna(0)
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["Average_cell_availability"] = last_days_df.mean(axis=1).round(2)
+    # Categorize sites based on availability
+    def categorize_availability(x: float) -> str:
+        if x == 0 or pd.isnull(x):
+            return "Down Site"
+        elif 0 < x <= 70:
+            return "critical instability"
+        elif 70 < x <= 95:
+            return "instability"
+        else:
+            return "Site Ok"
+    result_df["availability_comment"] = result_df["Average_cell_availability"].apply(
+        categorize_availability
+    )
+    return result_df
+def max_used_ce_analysis(
+    df: pd.DataFrame,
+    days: int = 7,
+    threshold: int = 80,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    """
+    Analyze maximum used channel elements and identify sites needing upgrades.
+    Args:
+        df: DataFrame containing channel element utilization data
+        days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with analysis results and upgrade recommendations
+    """
+    result_df = df.copy().fillna(0)
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2)
+    # Count the number of days above threshold
+    result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= threshold), axis=1
+    )
+    # Initialize comment column
+    result_df["Average_used_ce_ratio_comment"] = ""
+    # Apply condition for upgrade recommendation
+    result_df.loc[
+        (result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
+        & (result_df["Average_used_ce_ratio"] >= threshold),
+        "Average_used_ce_ratio_comment",
+    ] = "need CE upgrade"
+    return result_df
+def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame:
+    """
+    Analyze baseband subunit count for deviations.
+    Args:
+        df: DataFrame containing baseband subunit count data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with deviation analysis comments
+    """
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["num_bb_subunits_comment"] = last_days_df.apply(
+        lambda row: check_deviation(row, type="bb"), axis=1
+    )
+    return result_df
+def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
+    """
+    Analyze available channel elements for deviations.
+    Args:
+        df: DataFrame containing available channel element data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with deviation analysis comments
+    """
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["avail_ce_comment"] = last_days_df.apply(
+        lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1
+    )
+    return result_df
+def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
+    """
+    Combine comments from multiple columns into one column.
+    Args:
+        df: DataFrame containing comment columns
+        *columns: Variable number of column names containing comments
+        new_column: Name for the new combined comments column
+    Returns:
+        DataFrame with a new column containing combined comments
+    """
+    result_df = df.copy()
+    result_df[new_column] = result_df[list(columns)].apply(
+        lambda row: ", ".join([x for x in row if x]), axis=1
+    )
+    # Trim all trailing commas
+    result_df[new_column] = result_df[new_column].str.replace(
+        r"^[,\s]+|[,\s]+$", "", regex=True
+    )
+    # Replace multiple commas with a single comma
+    result_df[new_column] = result_df[new_column].str.replace(
+        r",\s*,", ", ", regex=True
+    )
+    return result_df
+def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Combine baseband related comments into a single column.
+    Args:
+        df: DataFrame containing baseband comment columns
+    Returns:
+        DataFrame with combined baseband comments
+    """
+    return combine_comments(
+        df,
+        "num_bb_subunits_comment",
+        "Average_used_bb_ratio_comment",
+        "availability_comment",
+        new_column="bb_comments",
+    )
+def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Combine channel element related comments into a single column.
+    Args:
+        df: DataFrame containing channel element comment columns
+    Returns:
+        DataFrame with combined channel element comments
+    """
+    return combine_comments(
+        df,
+        "avail_ce_comment",
+        "Average_used_ce_ratio_comment",
+        "availability_comment",
+        new_column="ce_comments",
+    )
+def create_dfs_per_kpi(
+    df: pd.DataFrame,
+    num_days: int = 7,
+    threshold: int = 80,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    """
+    Create pivoted DataFrames for each KPI and perform analysis.
+    Args:
+        df: DataFrame containing KPI data
+        num_days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with combined analysis results
+    """
+    kpi_columns = df.columns[5:]
+    pivoted_kpi_dfs = {}
+    # Loop through each KPI and create pivoted DataFrames
+    for kpi in kpi_columns:
+        temp_df = df[["date", "DN", kpi]].copy()
+        # Pivot the dataframe
+        pivot_df = temp_df.pivot(index="DN", columns="date", values=kpi)
+        pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
+        pivot_df.columns.names = ["KPI", "Date"]
+        # Store in dictionary with KPI name as key
+        pivoted_kpi_dfs[kpi] = pivot_df
+    # Extract individual KPI DataFrames
+    wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
+    licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"]
+    max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"]
+    max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"]
+    max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"]
+    max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"]
+    num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"]
+    max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"]
+    cell_availability_df = pivoted_kpi_dfs[
+        "Cell_Availability_excluding_blocked_by_user_state_BLU"
+    ]
+    total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
+    total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"]
+    max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"]
+    # Perform analysis on each KPI DataFrame
+    max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis(
+        max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days
+    )
+    cell_availability_df = cell_availability_analysis(cell_availability_df, num_days)
+    max_used_ce_ratio_flexi_df = max_used_ce_analysis(
+        max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days
+    )
+    num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days)
+    licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days)
+    # Concatenate all DataFrames
+    result_df = pd.concat(
+        [
+            wbts_name_df,
+            licensed_ce_df,
+            max_used_ce_dl_df,
+            max_used_ce_ul_df,
+            max_avail_ce_df,
+            max_used_bb_subunits_df,
+            num_bb_subunits_df,
+            max_bb_sus_util_ratio_df,
+            cell_availability_df,
+            total_cs_traffic_df,
+            total_data_traffic_df,
+            max_used_ce_ratio_flexi_df,
+        ],
+        axis=1,
+    )
+    # Add combined comments analysis
+    result_df = bb_comments_analysis(result_df)
+    result_df = ce_comments_analysis(result_df)
+    return result_df
+def load_data(
+    filepath: str,
+    num_days: int,
+    threshold: int,
+    number_of_threshold_days: int,
+) -> pd.DataFrame:
+    """
+    Load data from CSV file and perform preprocessing and analysis.
+    Args:
+        filepath: Path to CSV file or uploaded file object
+        num_days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with processed and analyzed data
+    """
+    df = pd.read_csv(filepath, delimiter=";")
+    # Preprocess data
+    df = create_daily_date(df)
+    df = kpi_naming_cleaning(df)
+    # Reorder columns for better organization
+    df = df[["date"] + [col for col in df.columns if col not in ["date"]]]
+    df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
+    # Perform KPI analysis
+    df = create_dfs_per_kpi(df, num_days, threshold, number_of_threshold_days)
+    return df

utils/convert_to_excel.py CHANGED Viewed

@@ -15,7 +15,7 @@ def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
     # Write the dataframes to the BytesIO object
     with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
         for df, sheet_name in zip(dfs, sheet_names):
-            df.to_excel(writer, sheet_name=sheet_name, index=False)
     # Get the bytes data
     bytes_data = bytes_io.getvalue()

     # Write the dataframes to the BytesIO object
     with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
         for df, sheet_name in zip(dfs, sheet_names):
+            df.to_excel(writer, sheet_name=sheet_name, index=True)
     # Get the bytes data
     bytes_data = bytes_io.getvalue()