Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on Jun 4

Commit

4d0848d

1 Parent(s): 5113cfc

Add LCG analysis Part1

Browse files

Files changed (4) hide show

app.py +4 -0
apps/kpi_analysis/lcg_analysis.py +202 -0
process_kpi/process_lcg_capacity.py +286 -0
utils/kpi_analysis_utils.py +28 -0

app.py CHANGED Viewed

@@ -146,6 +146,10 @@ if check_password():
                 "apps/kpi_analysis/wcel_capacity.py",
                 title=" 📊 WCEL Capacity Analysis",
             ),
             st.Page(
                 "apps/kpi_analysis/lte_capacity.py",
                 title=" 📊 LTE Capacity Analysis",

                 "apps/kpi_analysis/wcel_capacity.py",
                 title=" 📊 WCEL Capacity Analysis",
             ),
+            st.Page(
+                "apps/kpi_analysis/lcg_analysis.py",
+                title=" 📊 LCG Capacity Analysis",
+            ),
             st.Page(
                 "apps/kpi_analysis/lte_capacity.py",
                 title=" 📊 LTE Capacity Analysis",

apps/kpi_analysis/lcg_analysis.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from process_kpi.process_lcg_capacity import load_and_process_lcg_data
+from utils.convert_to_excel import convert_dfs
+class LcgCapacity:
+    final_results = None
+# Streamlit UI
+st.title(" 📊 LCG Analysis")
+doc_col, image_col = st.columns(2)
+with doc_col:
+    st.write(
+        """This app allows you to analyze the LCG of a network.
+        It provides insights into the utilization of LCG resources,
+        helping you identify potential capacity issues and plan for upgrades.
+        The report should be run with a minimum of 3 days of data.
+        - Daily Aggregated
+        - LCG level
+        - Exported in CSV format.
+        """
+    )
+with image_col:
+    st.image("./assets/wcel_capacity.png", width=400)
+uploaded_file = st.file_uploader("Upload LCG report in CSV format", type="csv")
+param_col1, param_col2, param_col3 = st.columns(3)
+param_col4, param_col5, param_col6 = st.columns(3)
+# num_last_days
+# num_threshold_days
+# lcg_utilization_threshold
+# difference_between_lcgs
+if uploaded_file is not None:
+    LcgCapacity.final_results = None
+    with param_col1:
+        num_last_days = st.number_input(
+            "Number of days for analysis",
+            min_value=3,
+            max_value=30,
+            value=7,
+        )
+    with param_col2:
+        num_threshold_days = st.number_input(
+            "Number of days for threshold",
+            min_value=1,
+            max_value=30,
+            value=2,
+        )
+    with param_col3:
+        lcg_utilization_threshold = st.number_input(
+            "LCG Utilization Threshold (%)",
+            min_value=0,
+            max_value=100,
+            value=80,
+        )
+    with param_col4:
+        difference_between_lcgs = st.number_input(
+            "Difference between LCgs (%)",
+            min_value=0,
+            max_value=100,
+            value=20,
+        )
+    if st.button("Analyze Data", type="primary"):
+        # Input validation
+        try:
+            if num_threshold_days > num_last_days:
+                st.warning("Number of threshold days cannot be greater than number of analysis days")
+                st.stop()
+            if num_last_days < 3:
+                st.warning("Analysis period should be at least 3 days for meaningful results")
+                st.stop()
+            if lcg_utilization_threshold <= 0 or lcg_utilization_threshold > 100:
+                st.warning("LCG utilization threshold must be between 1 and 100")
+                st.stop()
+            with st.spinner("Processing data..."):
+                results = load_and_process_lcg_data(
+                    uploaded_file,
+                    num_last_days,
+                    num_threshold_days,
+                    lcg_utilization_threshold,
+                    difference_between_lcgs,
+                )
+        except Exception as e:
+            st.error(f"An error occurred during input validation: {str(e)}")
+            st.stop()
+        if results is not None:
+            lcg_analysis_df = results[0]
+            kpi_df = results[1]
+            LcgCapacity.final_results = convert_dfs(
+                [lcg_analysis_df, kpi_df], ["lcg_analysis", "kpi"]
+            )
+            st.download_button(
+                on_click="ignore",
+                type="primary",
+                label="Download the Analysis Report",
+                data=LcgCapacity.final_results,
+                file_name="LCG_Capacity_Report.xlsx",
+                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            )
+        st.write(lcg_analysis_df)
+        # Add dataframe and Pie chart with "final_comments" distribution
+        st.markdown("***")
+        st.markdown(":blue[**Final comment distribution**]")
+        final_comments_df = (
+            lcg_analysis_df.groupby("final_comments")
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        final_comments_df["percent"] = (
+            final_comments_df["count"] / final_comments_df["count"].sum()
+        ) * 100
+        final_comments_col1, final_comments_col2 = st.columns((1, 3))
+        with final_comments_col1:
+            st.write(final_comments_df)
+        with final_comments_col2:
+            fig = px.pie(
+                final_comments_df,
+                names="final_comments",
+                values="count",
+                hover_name="final_comments",
+                hover_data=["count", "percent"],
+                title="Final Comments Distribution",
+            )
+            fig.update_layout(height=600)
+            fig.update_traces(
+                texttemplate="<b>%{label}</b><br> %{value}  <b>(%{customdata[1]:.1f}%)</b>",
+                textfont_size=15,
+                textposition="outside",
+            )
+            st.plotly_chart(fig)
+        # Add dataframe and Bar chart with "final_comments" distribution per Region
+        st.markdown("***")
+        st.markdown(":blue[**Final comment distribution per Region**]")
+        final_comments_df = (
+            lcg_analysis_df.groupby(["Region", "final_comments"])
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        final_comments_col1, final_comments_col2 = st.columns((1, 3))
+        with final_comments_col1:
+            st.write(final_comments_df)
+        with final_comments_col2:
+            fig = px.bar(
+                final_comments_df,
+                x="Region",
+                y="count",
+                color="final_comments",
+                title="Final Comments Distribution per Region",
+                text="count",
+            )
+            fig.update_traces(textposition="outside")
+            fig.update_layout(height=600)
+            st.plotly_chart(fig)
+        # Add map plot with scatter_map with code ,Longitude,Latitude,final_comments
+        st.markdown("***")
+        st.markdown(":blue[**Final comments distribution**]")
+        final_comments_map_df = lcg_analysis_df[
+            ["code", "Longitude", "Latitude", "final_comments"]
+        ].dropna(subset=["code", "Longitude", "Latitude", "final_comments"])
+        # replace empty strings with "Cell OK"
+        # final_comments_map_df["final_comments"] = final_comments_map_df[
+        #     "final_comments"
+        # ].replace("", "Cell OK")
+        # add size column equalt to 20
+        final_comments_map_df["size"] = 20
+        fig = px.scatter_map(
+            final_comments_map_df,
+            lat="Latitude",
+            lon="Longitude",
+            color="final_comments",
+            size="size",
+            zoom=10,
+            height=600,
+            title="Final Comments Distribution",
+            hover_data={
+                "code": True,
+                "final_comments": True,
+            },
+            hover_name="code",
+        )
+        fig.update_layout(mapbox_style="open-street-map")
+        st.plotly_chart(fig)

process_kpi/process_lcg_capacity.py ADDED Viewed

	@@ -0,0 +1,286 @@

+import numpy as np
+import pandas as pd
+from utils.kpi_analysis_utils import (
+    analyze_lcg_utilization,
+    combine_comments,
+    create_daily_date,
+    create_dfs_per_kpi,
+    kpi_naming_cleaning,
+)
+from utils.utils_vars import get_physical_db
+lcg_comments_mapping = {
+    "2": "No Congestion",
+    "1": "No Congestion",
+    "lcg1 exceeded threshold, lcg2 exceeded threshold, 2": "Need BB SU upgrage",
+    "lcg1 exceeded threshold, 2": "Need LCG balancing",
+    "lcg1 exceeded threshold,  1": "Need BB SU upgrage",
+    "lcg2 exceeded threshold, 2": "Need LCG balancing",
+}
+KPI_COLUMNS = [
+    "date",
+    "WBTS_name",
+    "lcg_id",
+    "BB_SU_LCG_MAX_R",
+]
+LCG_ANALYSIS_COLUMNS = [
+    "WBTS_name",
+    "lcg1_utilisation",
+    "avg_lcg1",
+    "max_lcg1",
+    "number_of_days_with_lcg1_exceeded",
+    "lcg1_comment",
+    "lcg2_utilisation",
+    "avg_lcg2",
+    "max_lcg2",
+    "number_of_days_with_lcg2_exceeded",
+    "lcg2_comment",
+    "difference_between_lcgs",
+    "difference_between_lcgs_comment",
+    "lcg_comment",
+    "number_of_lcg",
+    "final_comments",
+]
+def lcg_kpi_analysis(
+    df,
+    num_last_days,
+    num_threshold_days,
+    lcg_utilization_threshold,
+    difference_between_lcgs,
+) -> pd.DataFrame:
+    """
+    Analyze LCG capacity data.
+    Args:
+        df: DataFrame containing LCG capacity data
+        num_last_days: Number of days for analysis
+        num_threshold_days: Minimum days above threshold to flag for upgrade
+        lcg_utilization_threshold: Utilization threshold percentage for flagging
+        difference_between_lcgs: Difference between LCGs for flagging
+    Returns:
+        Processed DataFrame with LCG capacity analysis results
+    """
+    lcg1_df = df[df["lcg_id"] == 1]
+    lcg2_df = df[df["lcg_id"] == 2]
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="WBTS_name",
+        kpi_columns_from=2,
+    )
+    pivoted_lcg1_df = create_dfs_per_kpi(
+        df=lcg1_df,
+        pivot_date_column="date",
+        pivot_name_column="WBTS_name",
+        kpi_columns_from=2,
+    )
+    pivoted_lcg2_df = create_dfs_per_kpi(
+        df=lcg2_df,
+        pivot_date_column="date",
+        pivot_name_column="WBTS_name",
+        kpi_columns_from=2,
+    )
+    # BB_SU_LCG_MAX_R to have all site with LCG 1 and/ or LCG 2
+    BB_SU_LCG_MAX_R_df = pivoted_kpi_dfs["BB_SU_LCG_MAX_R"]
+    pivoted_lcg1_df = pivoted_lcg1_df["BB_SU_LCG_MAX_R"]
+    pivoted_lcg2_df = pivoted_lcg2_df["BB_SU_LCG_MAX_R"]
+    # rename column
+    pivoted_lcg1_df = pivoted_lcg1_df.rename(
+        columns={"BB_SU_LCG_MAX_R": "lcg1_utilisation"}
+    )
+    pivoted_lcg2_df = pivoted_lcg2_df.rename(
+        columns={"BB_SU_LCG_MAX_R": "lcg2_utilisation"}
+    )
+    # analyze lcg utilization for each site per number_of_kpi_days and number_of_threshold_days
+    pivoted_lcg1_df = analyze_lcg_utilization(
+        df=pivoted_lcg1_df,
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=lcg_utilization_threshold,
+        kpi_column_name="lcg1",
+    )
+    pivoted_lcg2_df = analyze_lcg_utilization(
+        df=pivoted_lcg2_df,
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=lcg_utilization_threshold,
+        kpi_column_name="lcg2",
+    )
+    kpi_df = pd.concat(
+        [
+            BB_SU_LCG_MAX_R_df,
+            pivoted_lcg1_df,
+            pivoted_lcg2_df,
+        ],
+        axis=1,
+    )
+    kpi_df = kpi_df.reset_index()
+    # Number of available lcgs
+    # kpi_df = pd.merge(kpi_df, available_lcgs_df, on="WBTS_name", how="left")
+    # calculate difference between lcg1 and lcg2
+    kpi_df["difference_between_lcgs"] = kpi_df[["avg_lcg1", "avg_lcg2"]].apply(
+        lambda row: max(row) - min(row), axis=1
+    )
+    # flag if difference between lcg1 and lcg2 is above threshold
+    kpi_df["difference_between_lcgs_comment"] = np.where(
+        kpi_df["difference_between_lcgs"] > difference_between_lcgs,
+        "difference between lcgs exceeded threshold",
+        None,
+    )
+    # Combine comments
+    kpi_df = combine_comments(
+        kpi_df,
+        "lcg1_comment",
+        "lcg2_comment",
+        # "difference_between_lcgs_comment",
+        new_column="lcg_comment",
+    )
+    # Replace if "lcg_comment" contains "nan" and ", nan" and "nan, " with None
+    kpi_df["lcg_comment"] = kpi_df["lcg_comment"].replace("nan", None)
+    # Remove "nan" from comma-separated strings
+    kpi_df["lcg_comment"] = (
+        kpi_df["lcg_comment"].str.replace(r"\bnan\b,?\s?", "", regex=True).str.strip()
+    )
+    kpi_df["number_of_lcg"] = np.where(
+        kpi_df["avg_lcg1"].notna() & kpi_df["avg_lcg2"].notna(),
+        2,
+        np.where(kpi_df["avg_lcg1"].notna() | kpi_df["avg_lcg2"].notna(), 1, 0),
+    )
+    # Combine comments
+    kpi_df = combine_comments(
+        kpi_df,
+        "lcg_comment",
+        "number_of_lcg",
+        new_column="final_comments",
+    )
+    kpi_df["final_comments"] = kpi_df["final_comments"].apply(
+        lambda x: lcg_comments_mapping.get(x, x)
+    )
+    kpi_df = kpi_df[LCG_ANALYSIS_COLUMNS]
+    lcg_analysis_df = kpi_df.copy()
+    lcg_analysis_df = lcg_analysis_df[
+        [
+            "WBTS_name",
+            "avg_lcg1",
+            "max_lcg1",
+            "number_of_days_with_lcg1_exceeded",
+            "lcg1_comment",
+            "avg_lcg2",
+            "max_lcg2",
+            "number_of_days_with_lcg2_exceeded",
+            "lcg2_comment",
+            "difference_between_lcgs",
+            "final_comments",
+        ]
+    ]
+    lcg_analysis_df = lcg_analysis_df.droplevel(level=1, axis=1)
+    # Remove row if code less than 5 characters
+    lcg_analysis_df = lcg_analysis_df[lcg_analysis_df["WBTS_name"].str.len() >= 5]
+    # Add code
+    lcg_analysis_df["code"] = lcg_analysis_df["WBTS_name"].str.split("_").str[0]
+    lcg_analysis_df["code"] = (
+        pd.to_numeric(lcg_analysis_df["code"], errors="coerce").fillna(0).astype(int)
+    )
+    lcg_analysis_df["Region"] = (
+        lcg_analysis_df["WBTS_name"].str.split("_").str[1:2].str.join("_")
+    )
+    lcg_analysis_df["Region"] = lcg_analysis_df["Region"].fillna("UNKNOWN")
+    # move code to the first column
+    lcg_analysis_df = lcg_analysis_df[
+        ["code", "Region"]
+        + [col for col in lcg_analysis_df if col != "code" and col != "Region"]
+    ]
+    # Load physical database
+    physical_db: pd.DataFrame = get_physical_db()
+    # Convert code_sector to code
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    # remove duplicates
+    physical_db = physical_db.drop_duplicates(subset="code")
+    # keep only code and longitude and latitude
+    physical_db = physical_db[["code", "Longitude", "Latitude"]]
+    physical_db["code"] = (
+        pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
+    )
+    lcg_analysis_df = pd.merge(
+        lcg_analysis_df,
+        physical_db,
+        on="code",
+        how="left",
+    )
+    return [lcg_analysis_df, kpi_df]
+def load_and_process_lcg_data(
+    uploaded_file,
+    num_last_days,
+    num_threshold_days,
+    lcg_utilization_threshold,
+    difference_between_lcgs,
+) -> pd.DataFrame:
+    """Load and process data for LCG capacity analysis."""
+    try:
+        # Load data
+        df = pd.read_csv(uploaded_file, delimiter=";")
+        if df.empty:
+            raise ValueError("Uploaded file is empty")
+        df = kpi_naming_cleaning(df)
+        df = create_daily_date(df)
+        # Validate required columns
+        missing_cols = [col for col in KPI_COLUMNS if col not in df.columns]
+        if missing_cols:
+            raise ValueError(f"Missing required columns: {', '.join(missing_cols)}")
+        df = df[KPI_COLUMNS]
+        # Process the data
+        dfs = lcg_kpi_analysis(
+            df,
+            num_last_days,
+            num_threshold_days,
+            lcg_utilization_threshold,
+            difference_between_lcgs,
+        )
+        return dfs
+    except Exception as e:
+        # Log the error and re-raise with a user-friendly message
+        error_msg = f"Error processing LCG data: {str(e)}"
+        st.error(error_msg)
+        raise

utils/kpi_analysis_utils.py CHANGED Viewed

@@ -636,3 +636,31 @@ def analyze_fails_kpi(
         None,
     )
     return result_df

         None,
     )
     return result_df
+def analyze_lcg_utilization(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    number_of_threshold_days: int,
+    kpi_threshold: int,
+    kpi_column_name: str,
+) -> pd.DataFrame:
+    result_df: pd.DataFrame = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
+    result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
+    )
+    # Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
+    result_df[f"{kpi_column_name}_comment"] = np.where(
+        result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
+        >= number_of_threshold_days,
+        f"{kpi_column_name} exceeded threshold",
+        None,
+    )
+    return result_df