Spaces:

DavMelchi
/

db_query

Sleeping

App Files Files Community

DavMelchi commited on May 20

Commit

027f03b

1 Parent(s): 81c766f

LTE capacity 1st commit

Browse files

Files changed (9) hide show

app.py +8 -0
apps/kpi_analysis/gsm_capacity.py +25 -2
apps/kpi_analysis/lte_capacity.py +207 -0
assets/lte_capacity.png +0 -0
documentations/lte_capacity_docs.py +221 -0
process_kpi/lte_kpi_requirements.md +46 -0
process_kpi/process_lte_capacity.py +420 -0
utils/convert_to_excel.py +30 -8
utils/kpi_analysis_utils.py +47 -0

app.py CHANGED Viewed

@@ -142,6 +142,10 @@ if check_password():
                 "apps/kpi_analysis/gsm_capacity.py",
                 title=" 📊 GSM Capacity Analysis",
             ),
         ],
         "Documentations": [
             st.Page(
@@ -154,6 +158,10 @@ if check_password():
                 "documentations/gsm_capacity_docs.py",
                 title="📘GSM Capacity Documentation",
             ),
         ],
     }

                 "apps/kpi_analysis/gsm_capacity.py",
                 title=" 📊 GSM Capacity Analysis",
             ),
+            st.Page(
+                "apps/kpi_analysis/lte_capacity.py",
+                title=" 📊 LTE Capacity Analysis",
+            ),
         ],
         "Documentations": [
             st.Page(
                 "documentations/gsm_capacity_docs.py",
                 title="📘GSM Capacity Documentation",
             ),
+            st.Page(
+                "documentations/lte_capacity_docs.py",
+                title="📘LTE Capacity Documentation",
+            ),
         ],
     }

apps/kpi_analysis/gsm_capacity.py CHANGED Viewed

@@ -2,11 +2,11 @@ import pandas as pd
 import plotly.express as px
 import streamlit as st
-from process_kpi.process_gsm_capacity import GsmCapacity, analyze_gsm_data
 from utils.convert_to_excel import (  # Import convert_dfs from the appropriate module
-    convert_dfs,
     convert_gsm_dfs,
 )
 st.title(" 📊 GSM Capacity Analysis")
 doc_col, image_col = st.columns(2)
@@ -241,3 +241,26 @@ if (
                 textposition="outside",
             )
             st.plotly_chart(fig, use_container_width=True)

 import plotly.express as px
 import streamlit as st
+from process_kpi.process_gsm_capacity import analyze_gsm_data
 from utils.convert_to_excel import (  # Import convert_dfs from the appropriate module
     convert_gsm_dfs,
 )
+from utils.kpi_analysis_utils import GsmCapacity
 st.title(" 📊 GSM Capacity Analysis")
 doc_col, image_col = st.columns(2)
                 textposition="outside",
             )
             st.plotly_chart(fig, use_container_width=True)
+        # create a map plotly with gsm_analysis_df and max_tch_call_blocking_bh
+        st.markdown("***")
+        st.markdown(":blue[**Max TCH Call Blocking BH distribution**]")
+        fig = px.scatter_mapbox(
+            gsm_analysis_df.dropna(
+                subset=["max_tch_call_blocking_bh", "Latitude", "Longitude"]
+            ),
+            lat="Latitude",
+            lon="Longitude",
+            color=[
+                "red" if val > tch_blocking_threshold else "green"
+                for val in gsm_analysis_df[
+                    "max_tch_call_blocking_bh"
+                ].dropna()  # .values
+            ],
+            size="max_tch_call_blocking_bh",
+            zoom=10,
+            height=600,
+            title="Max TCH Call Blocking BH distribution",
+        )
+        fig.update_layout(mapbox_style="open-street-map")
+        st.plotly_chart(fig, use_container_width=True)

apps/kpi_analysis/lte_capacity.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from process_kpi.process_lte_capacity import process_lte_bh_report
+from utils.convert_to_excel import convert_lte_analysis_dfs
+from utils.kpi_analysis_utils import LteCapacity
+st.title("📊 LTE Capacity Analysis")
+doc_col, image_col = st.columns(2)
+with doc_col:
+    st.write(
+        """
+        The report analyzes LTE capacity based on:
+        - Dump file required
+        - BH Cell level KPI report in CSV format
+        - Availability and PRB usage thresholds
+        """
+    )
+with image_col:
+    st.image("./assets/lte_capacity.png", width=250)
+file1, file2 = st.columns(2)
+with file1:
+    uploaded_dump = st.file_uploader("Upload Dump file in xlsb format", type="xlsb")
+with file2:
+    uploaded_bh_report = st.file_uploader(
+        "Upload LTE Busy Hour Report in CSV format", type="csv"
+    )
+# Parameters
+param_col1, param_col2 = st.columns(2)
+param_col3, param_col4 = st.columns(2)
+with param_col1:
+    num_last_days = st.number_input(
+        "Number of last days for analysis", value=7, min_value=1
+    )
+with param_col2:
+    num_threshold_days = st.number_input(
+        "Number of days for threshold", value=3, min_value=1
+    )
+with param_col3:
+    availability_threshold = st.number_input(
+        "Availability threshold (%)", value=95.0, min_value=0.0, max_value=100.0
+    )
+with param_col4:
+    prb_usage_threshold = st.number_input(
+        "PRB usage threshold (%)", value=80.0, min_value=0.0, max_value=100.0
+    )
+prb_diff_between_cells = st.number_input(
+    "Maximum PRB usage difference between cells (%)",
+    value=20.0,
+    min_value=0.0,
+    max_value=100.0,
+)
+if uploaded_dump is not None and uploaded_bh_report is not None:
+    if st.button("Analyze Data", type="primary"):
+        with st.spinner("Processing data..."):
+            results = process_lte_bh_report(
+                dump_path=uploaded_dump,
+                bh_report_path=uploaded_bh_report,
+                num_last_days=num_last_days,
+                num_threshold_days=num_threshold_days,
+                availability_threshold=availability_threshold,
+                prb_usage_threshold=prb_usage_threshold,
+                prb_diff_between_cells_threshold=prb_diff_between_cells,
+            )
+        if results is not None:
+            bh_report: pd.DataFrame = results[0]
+            lte_analysis_df: pd.DataFrame = results[1]
+            LteCapacity.final_results = convert_lte_analysis_dfs(
+                [lte_analysis_df, bh_report], ["LTE_Analysis", "LTE_BH_Report"]
+            )
+            st.download_button(
+                on_click="ignore",
+                type="primary",
+                label="Download the Analysis Report",
+                data=LteCapacity.final_results,
+                file_name="LTE_Analysis_Report.xlsx",
+                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            )
+            st.write(lte_analysis_df)
+            # Add dataframe and Pie chart with "final_comments" distribution
+            st.markdown("***")
+            st.markdown(":blue[**Final comment distribution**]")
+            final_comments_df = (
+                lte_analysis_df.groupby("final_comments")
+                .size()
+                .reset_index(name="count")
+                .sort_values(by="count", ascending=False)
+            )
+            final_comments_col1, final_comments_col2 = st.columns((1, 3))
+            with final_comments_col1:
+                st.write(final_comments_df)
+            with final_comments_col2:
+                fig = px.pie(
+                    final_comments_df,
+                    names="final_comments",
+                    values="count",
+                    hover_name="final_comments",
+                    hover_data=["count"],
+                    title="Final comment distribution",
+                )
+                fig.update_layout(height=600)
+                fig.update_traces(
+                    texttemplate="%{label}: %{value}",
+                    textfont_size=15,
+                    textposition="outside",
+                )
+                st.plotly_chart(fig, use_container_width=True)
+            # Add dataframe and Pie chart with "final_comments" distribution where num_congested_cells > 0
+            st.markdown("***")
+            st.markdown(":blue[**Congested cells distribution**]")
+            congested_cells_df = (
+                lte_analysis_df[lte_analysis_df["num_congested_cells"] > 0]
+                .groupby("final_comments")
+                .size()
+                .reset_index(name="count")
+                .sort_values(by="count", ascending=False)
+            )
+            congested_cells_col1, congested_cells_col2 = st.columns((1, 3))
+            with congested_cells_col1:
+                st.write(congested_cells_df)
+            with congested_cells_col2:
+                fig = px.pie(
+                    congested_cells_df,
+                    names="final_comments",
+                    values="count",
+                    hover_name="final_comments",
+                    hover_data=["count"],
+                    title="Congested cells distribution",
+                )
+                fig.update_layout(height=600)
+                fig.update_traces(
+                    texttemplate="%{label}: %{value}",
+                    textfont_size=15,
+                    textposition="outside",
+                )
+                st.plotly_chart(fig, use_container_width=True)
+            # Add dataframe and Bar chart with "final_comments" distribution where num_congested_cells > 0 per Region
+            st.markdown("***")
+            st.markdown(":blue[**Congested cells distribution per Region**]")
+            congested_cells_region_df = (
+                lte_analysis_df[lte_analysis_df["num_congested_cells"] > 0]
+                .groupby(["Region", "final_comments"])
+                .size()
+                .reset_index(name="count")
+                .sort_values(by="count", ascending=False)
+            )
+            congested_cells_region_col1, congested_cells_region_col2 = st.columns(
+                (1, 3)
+            )
+            with congested_cells_region_col1:
+                st.write(congested_cells_region_df)
+            with congested_cells_region_col2:
+                fig = px.bar(
+                    congested_cells_region_df,
+                    x="Region",
+                    y="count",
+                    color="final_comments",
+                    title="Congested cells distribution per Region",
+                )
+                fig.update_layout(height=600)
+                fig.update_traces(
+                    texttemplate="%{value}", textfont_size=15, textposition="outside"
+                )
+                st.plotly_chart(fig, use_container_width=True)
+            # Add dataframe and Bar chart with "final_comments" distribution where num_congested_cells > 0 per Region groupby region
+            st.markdown("***")
+            st.markdown(
+                ":blue[**Congested cells distribution per Region groupby Region**]"
+            )
+            congested_cells_region_groupby_region_df = (
+                lte_analysis_df[lte_analysis_df["num_congested_cells"] > 0]
+                .groupby(["Region"])
+                .size()
+                .reset_index(name="count")
+                .sort_values(by="count", ascending=False)
+            )
+            (
+                congested_cells_region_groupby_region_col1,
+                congested_cells_region_groupby_region_col2,
+            ) = st.columns((1, 3))
+            with congested_cells_region_groupby_region_col1:
+                st.write(congested_cells_region_groupby_region_df)
+            with congested_cells_region_groupby_region_col2:
+                fig = px.bar(
+                    congested_cells_region_groupby_region_df,
+                    x="Region",
+                    y="count",
+                    title="Congested cells distribution per Region groupby Region",
+                )
+                fig.update_layout(height=600)
+                fig.update_traces(
+                    texttemplate="%{value}", textfont_size=15, textposition="outside"
+                )
+                st.plotly_chart(fig, use_container_width=True)

assets/lte_capacity.png ADDED Viewed

documentations/lte_capacity_docs.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import streamlit as st
+st.markdown(
+    """
+# LTE Capacity Analysis Documentation
+This documentation provides a technical and practical reference for the LTE Capacity Analysis application, detailing input/output columns, processing workflow, and key metrics as implemented in:
+- apps/kpi_analysis/lte_capacity.py
+- process_kpi/process_lte_capacity.py
+- utils/kpi_analysis_utils.py
+---
+## 1. Input Files and Expected Columns
+### a. Dump File (XLSB)
+- Contains network configuration and site data.
+- Expected columns (see `LTE_DATABASE_COLUMNS` in `process_lte_capacity.py`):
+    - code: Unique site identifier
+    - Region: Geographical region of the site
+    - site_config_band: Configured frequency bands at the site
+    - final_name: Formatted site name
+### b. Busy Hour (BH) KPI Report (CSV)
+- Contains performance metrics for LTE cells during busy hours.
+- Key columns (see `KPI_COLUMNS` in `process_lte_capacity.py`):
+    - date: Timestamp of the measurement
+    - LNCEL_name: Cell identifier (format: SiteName_LBand_CellID)
+    - Cell_Avail_excl_BLU: Cell availability percentage excluding BLU
+    - E_UTRAN_Avg_PRB_usage_per_TTI_DL: Average Physical Resource Block usage in downlink
+---
+## 2. Output Columns and Their Meaning
+### a. LTE Analysis Output (`LTE_ANALYSIS_COLUMNS`):
+- **Site Information**:
+  - code: Site identifier
+  - Region: Geographical region
+  - site_config_band: Configured frequency bands
+- **Cell Configuration**:
+  - LNCEL_name_l800: Cell name for 800MHz band
+  - LNCEL_name_l1800: Cell name for 1800MHz band
+  - LNCEL_name_l2300: Cell name for 2300MHz band
+  - LNCEL_name_l2600: Cell name for 2600MHz band
+  - LNCEL_name_l1800s: Cell name for 1800MHz supplementary band
+- **PRB Usage Metrics**:
+  - avg_prb_usage_bh_l800: Average PRB usage for 800MHz band
+  - avg_prb_usage_bh_l1800: Average PRB usage for 1800MHz band
+  - avg_prb_usage_bh_l2300: Average PRB usage for 2300MHz band
+  - avg_prb_usage_bh_l2600: Average PRB usage for 2600MHz band
+  - avg_prb_usage_bh_l1800s: Average PRB usage for 1800s band
+- **Cell Status**:
+  - num_congested_cells: Number of cells exceeding PRB usage threshold
+  - num_cells: Total number of cells at the site
+  - num_cell_with_kpi: Number of cells with valid KPI data
+  - num_down_or_no_kpi_cells: Number of down or non-reporting cells
+  - prb_diff_between_cells: Maximum PRB usage difference between cells at the site
+  - load_balance_required: Flag indicating if load balancing is needed
+- **Analysis Results**:
+  - congestion_comment: Comments on cell congestion status
+  - final_comments: Summary of site status and recommendations
+---
+## 3. Processing Workflow
+1. **Data Loading and Validation**:
+   - Load and validate the dump file and BH report
+   - Check for required columns and data integrity
+2. **Data Processing**:
+   - Parse site and cell information from the dump file
+   - Process KPI data from the BH report
+   - Calculate average PRB usage per cell and band
+3. **Analysis**:
+   - Identify congested cells based on PRB usage threshold
+   - Calculate load balancing requirements
+   - Determine site-level congestion status
+   - Generate recommendations for capacity expansion
+4. **Reporting**:
+   - Combine all analysis results into a comprehensive DataFrame
+   - Generate final comments and recommendations
+   - Prepare data for visualization and export
+---
+## 4. Key Functions
+### a. `process_lte_bh_report` (in `process_lte_capacity.py`)
+- Main function that orchestrates the LTE capacity analysis
+- Parameters:
+  - dump_path: Path to the site dump file
+  - bh_report_path: Path to the Busy Hour KPI report
+  - num_last_days: Number of days to analyze
+  - num_threshold_days: Number of days for threshold calculations
+  - availability_threshold: Minimum required cell availability (%)
+  - prb_usage_threshold: Threshold for PRB usage (%)
+  - prb_diff_between_cells_threshold: Maximum allowed PRB difference between cells (%)
+### b. `lte_analysis_logic` (in `process_lte_capacity.py`)
+- Core logic for analyzing LTE capacity
+- Identifies congested cells and calculates load balancing requirements
+- Generates comments and recommendations
+### c. analyze_prb_usage (in kpi_analysis_utils.py)
+- Analyzes PRB usage patterns
+- Identifies cells with high PRB utilization
+- Generates comments on congestion status
+### d. cell_availability_analysis (in kpi_analysis_utils.py)
+- Analyzes cell availability metrics
+- Identifies cells with availability issues
+- Generates availability-related comments
+---
+## 5. Configuration Parameters
+### a. Band Mapping (from LteCapacity class):
+- Defines the recommended next band for capacity expansion
+- Example: L1800 → L800, L800 → L1800, etc.
+### b. Thresholds (configurable via UI/parameters):
+- Availability Threshold: Default 95%
+- PRB Usage Threshold: Default 80%
+- PRB Difference Threshold: Default 20%
+- Analysis Period: Default 7 days
+- Threshold Days: Default 3 days
+---
+## 6. Example Usage and Output Analysis
+### Basic Usage
+```python
+from process_kpi.process_lte_capacity import process_lte_bh_report
+import pandas as pd
+# Process LTE capacity analysis
+results = process_lte_bh_report(
+    dump_path="network_dump_202305.xlsb",
+    bh_report_path="lte_bh_report_20230501_20230507.csv",
+    num_last_days=7,                    # Analyze last 7 days
+    num_threshold_days=3,                # Consider threshold violations if seen on ≥3 days
+    availability_threshold=95.0,          # Minimum acceptable cell availability (%)
+    prb_usage_threshold=80.0,            # PRB usage threshold for congestion (%)
+    prb_diff_between_cells_threshold=20.0 # Max allowed PRB difference between cells (%)
+)
+# Unpack results
+bh_report_df, lte_analysis_df = results
+# Example: Display sites with congestion
+congested_sites = lte_analysis_df[lte_analysis_df['num_congested_cells'] > 0]
+print(f"Found {len(congested_sites)} sites with congestion")
+# Example: Export results to Excel
+with pd.ExcelWriter('lte_capacity_analysis.xlsx') as writer:
+    lte_analysis_df.to_excel(writer, sheet_name='LTE_Analysis', index=False)
+    bh_report_df.to_excel(writer, sheet_name='BH_Report', index=False)
+```
+### Understanding the Output
+- `lte_analysis_df`: Contains per-site analysis with capacity recommendations
+- `bh_report_df`: Raw busy hour metrics for detailed investigation
+## 7. Column Reference Table
+### Site Information
+| Column | Type | Description | Example |
+|--------|------|-------------|---------|
+| code | str | Unique site identifier | SITE123 |
+| Region | str | Mali Geographical region | CENTRAL |
+| site_config_band | str | Configured frequency bands | L1800/L800 |
+### Cell Configuration
+| Column | Type | Description | Example |
+|--------|------|-------------|---------|
+| LNCEL_name_l800 | str | 800MHz cell name | SITE123_L800_1 |
+| LNCEL_name_l1800 | str | 1800MHz cell name | SITE123_L1800_1 |
+| LNCEL_name_l2300 | str | 2300MHz cell name | SITE123_L2300_1 |
+| LNCEL_name_l2600 | str | 2600MHz cell name | SITE123_L2600_1 |
+| LNCEL_name_l1800s | str | 1800s cell name | SITE123_L1800S_1 |
+### PRB Usage Metrics
+| Column | Type | Description | Range |
+|--------|------|-------------|-------|
+| avg_prb_usage_bh_l800 | float | Avg PRB usage 800MHz | 0-100% |
+| avg_prb_usage_bh_l1800 | float | Avg PRB usage 1800MHz | 0-100% |
+| avg_prb_usage_bh_l2300 | float | Avg PRB usage 2300MHz | 0-100% |
+| avg_prb_usage_bh_l2600 | float | Avg PRB usage 2600MHz | 0-100% |
+| avg_prb_usage_bh_l1800s | float | Avg PRB usage 1800s | 0-100% |
+### Cell Status
+| Column | Type | Description |
+|--------|------|-------------|
+| num_cells | int | Total cells at site |
+| num_cell_with_kpi | int | Cells with valid KPI data |
+| num_down_or_no_kpi_cells | int | Non-reporting cells |
+| num_congested_cells | int | Cells exceeding PRB threshold |
+| prb_diff_between_cells | float | Max PRB difference between cells |
+| load_balance_required | bool | If load balancing is needed |
+### Analysis Results
+| Column | Type | Description |
+|--------|------|-------------|
+| congestion_comment | str | Analysis of congestion status |
+| final_comments | str | Summary and recommendations |
+| recommended_action | str | Suggested capacity actions |
+| next_band | str | Recommended band for expansion |
+"""
+)

process_kpi/lte_kpi_requirements.md ADDED Viewed

	@@ -0,0 +1,46 @@

+# LTE CAPACITY REPORT
+Based on gsm and wcdma exemple let's build LTE capacity report
+## Required Input
+- File : LTE BH report with columns :
+  - PERIOD_START_TIME
+  - MRBTS/SBTS name
+  - LNBTS name
+  - LNCEL name
+  - DN
+  - Cell Avail excl BLU
+  - E-UTRAN Avg PRB usage per TTI DL
+- Number of last day for the analysis
+- Number of days for threshold
+- Availability threshold
+- PRB usage per TTI DL threshold
+- Max difference between PRB usage over cells of the same BTS
+### TASK
+- Pivot KPI in BH report per KPI (Cell Avail excl BLU, E-UTRAN Avg PRB usage per TTI DL)
+- Calculate Average and Max of PRB usage per TTI DL
+- Calculate Average and Max of Cell Avail excl BLU
+- Count number of Days with Cell Avail excl BLU below Availability threshold
+- Count number of Days with PRB usage per TTI DL exceeded PRB usage per TTI DL threshold
+- Create separate DF per sector and band based on LNCEL name
+  - _1_L800: column_name = Sector_1_L800
+  - _2_L800: column_name = Sector_2_L800
+  - _3_L800: column_name = Sector_3_L800
+  - _1_L1800: column_name = Sector_1_L1800
+  - _2_L1800: column_name = Sector_2_L1800
+  - _3_L1800: column_name = Sector_3_L1800
+  - _1_L2300: column_name = Sector_1_L2300
+  - _2_L2300: column_name = Sector_2_L2300
+  - _3_L2300: column_name = Sector_3_L2300
+  - _1_L2600: column_name = Sector_1_L2600
+  - _2_L2600: column_name = Sector_2_L2600
+  - _3_L2600: column_name = Sector_3_L2600
+  - _1S_L1800: column_name = Sector_1S_L1800
+  - _2S_L1800: column_name = Sector_2S_L1800
+  - _3S_L1800: column_name = Sector_3S_L1800
+- Merge DFs per sector LNBTS name
+- Concat dfs per Bands

process_kpi/process_lte_capacity.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import numpy as np
+import pandas as pd
+from queries.process_lte import process_lte_data
+from utils.convert_to_excel import save_dataframe
+from utils.kpi_analysis_utils import (
+    LteCapacity,
+    analyze_prb_usage,
+    cell_availability_analysis,
+    create_dfs_per_kpi,
+    create_hourly_date,
+    kpi_naming_cleaning,
+)
+LTE_ANALYSIS_COLUMNS = [
+    "code",
+    "code_sector",
+    "Region",
+    "site_config_band",
+    "LNCEL_name_l800",
+    "LNCEL_name_l1800",
+    "LNCEL_name_l2300",
+    "LNCEL_name_l2600",
+    "LNCEL_name_l1800s",
+    "avg_prb_usage_bh_l800",
+    "avg_prb_usage_bh_l1800",
+    "avg_prb_usage_bh_l2300",
+    "avg_prb_usage_bh_l2600",
+    "avg_prb_usage_bh_l1800s",
+    "num_congested_cells",
+    "num_cells",
+    "num_cell_with_kpi",
+    "num_down_or_no_kpi_cells",
+    "prb_diff_between_cells",
+    "load_balance_required",
+    "congestion_comment",
+    "final_comments",
+]
+LTE_DATABASE_COLUMNS = [
+    "code",
+    "Region",
+    "site_config_band",
+    "final_name",
+]
+KPI_COLUMNS = [
+    "date",
+    "LNCEL_name",
+    "Cell_Avail_excl_BLU",
+    "E_UTRAN_Avg_PRB_usage_per_TTI_DL",
+    "DL_PRB_Util_p_TTI_Lev_10",
+]
+PRB_COLUMNS = [
+    "LNCEL_name",
+    "avg_prb_usage_bh",
+    # "avg_prb_usage_bh_lev_10",
+]
+def lte_analysis_logic(
+    df: pd.DataFrame,
+    prb_usage_threshold: int,
+    prb_diff_between_cells_threshold: int,
+) -> pd.DataFrame:
+    lte_analysis_logic_df = df.copy()
+    lte_analysis_logic_df["num_congested_cells"] = (
+        lte_analysis_logic_df[
+            [
+                "avg_prb_usage_bh_l800",
+                "avg_prb_usage_bh_l1800",
+                "avg_prb_usage_bh_l2300",
+                "avg_prb_usage_bh_l2600",
+                "avg_prb_usage_bh_l1800s",
+            ]
+        ]
+        >= prb_usage_threshold
+    ).sum(axis=1)
+    # Add Number of cells  LNCEL_name_l800	LNCEL_name_l1800	LNCEL_name_l2300	LNCEL_name_l2600	LNCEL_name_l1800s
+    lte_analysis_logic_df["num_cells"] = lte_analysis_logic_df[
+        [
+            "LNCEL_name_l800",
+            "LNCEL_name_l1800",
+            "LNCEL_name_l2300",
+            "LNCEL_name_l2600",
+            "LNCEL_name_l1800s",
+        ]
+    ].count(axis=1)
+    # Add Number of cell with KPI
+    lte_analysis_logic_df["num_cell_with_kpi"] = lte_analysis_logic_df[
+        [
+            "avg_prb_usage_bh_l800",
+            "avg_prb_usage_bh_l1800",
+            "avg_prb_usage_bh_l2300",
+            "avg_prb_usage_bh_l2600",
+            "avg_prb_usage_bh_l1800s",
+        ]
+    ].count(axis=1)
+    # Number of Down or No KPI cells = num_cells -num_cell_with_kpi
+    lte_analysis_logic_df["num_down_or_no_kpi_cells"] = (
+        lte_analysis_logic_df["num_cells"] - lte_analysis_logic_df["num_cell_with_kpi"]
+    )
+    # Check Max difference between avg_prb_usage_bh_l800 avg_prb_usage_bh_l1800 avg_prb_usage_bh_l2300 avg_prb_usage_bh_l2600 avg_prb_usage_bh_l1800s
+    lte_analysis_logic_df["prb_diff_between_cells"] = lte_analysis_logic_df[
+        [
+            "avg_prb_usage_bh_l800",
+            "avg_prb_usage_bh_l1800",
+            "avg_prb_usage_bh_l2300",
+            "avg_prb_usage_bh_l2600",
+            "avg_prb_usage_bh_l1800s",
+        ]
+    ].apply(lambda row: max(row) - min(row), axis=1)
+    # Add Load balance required column =  Yes if prb_diff_between_cells > prb_diff_between_cells_threshold else No
+    lte_analysis_logic_df["load_balance_required"] = lte_analysis_logic_df[
+        "prb_diff_between_cells"
+    ].apply(lambda x: "Yes" if x > prb_diff_between_cells_threshold else "No")
+    # Add Next band column
+    lte_analysis_logic_df["next_band"] = lte_analysis_logic_df["site_config_band"].map(
+        LteCapacity.next_band_mapping
+    )
+    # Add congestion comments
+    # if  num_congested_cells == 0 and num_down_or_no_kpi_cells == 0 = " No Congestion"
+    # if  num_congested_cells == 0 and num_down_or_no_kpi_cells > 0 = "No congestion but Down cell"
+    # if  num_congested_cells > 0 and num_down_or_no_kpi_cells > 0 = "Congestion but Colocated Down Cell"
+    # Else Need Action
+    conditions = [
+        (lte_analysis_logic_df["num_congested_cells"] == 0)
+        & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] == 0),
+        (lte_analysis_logic_df["num_congested_cells"] == 0)
+        & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
+        (lte_analysis_logic_df["num_congested_cells"] > 0)
+        & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
+    ]
+    choices = [
+        "No Congestion",
+        "No congestion but Down cell",
+        "Congestion but Colocated Down Cell",
+    ]
+    lte_analysis_logic_df["congestion_comment"] = np.select(
+        conditions, choices, default="Need Action"
+    )
+    # Add "Actions" column
+    # if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Load Balancing parameter tuning required"
+    # if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Add Layer"
+    # Else keep congestion_comment
+    conditions = [
+        (lte_analysis_logic_df["load_balance_required"] == "Yes")
+        & (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
+        (lte_analysis_logic_df["load_balance_required"] == "No")
+        & (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
+    ]
+    choices = [
+        "Load Balancing parameter tuning required",
+        "Add Layer",
+    ]
+    lte_analysis_logic_df["actions"] = np.select(
+        conditions, choices, default=lte_analysis_logic_df["congestion_comment"]
+    )
+    # Add Final Comments
+    # if "actions" = "Add Layer" then "'Add' + 'next_band''
+    # Else keep "actions" as it is
+    lte_analysis_logic_df["final_comments"] = lte_analysis_logic_df.apply(
+        lambda row: (
+            f"Add {row['next_band']}"
+            if row["actions"] == "Add Layer"
+            else row["actions"]
+        ),
+        axis=1,
+    )
+    # create column "sector" equal to conteent of  "LNCEL_name_l800" if not empty else "LNCEL_name_l1800" if not empty else "LNCEL_name_l2300"
+    lte_analysis_logic_df["sector"] = (
+        lte_analysis_logic_df["LNCEL_name_l800"]
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l1800"])
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l2300"])
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l2600"])
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l1800s"])
+    )
+    # remove rows where sector is empty
+    lte_analysis_logic_df = lte_analysis_logic_df[
+        lte_analysis_logic_df["sector"].notna()
+    ]
+    # Add sector_id column if sector contains : '_1_" then 1 elif sector contains : '_2_" then 2 elif sector contains : '_3_" then 3
+    lte_analysis_logic_df["sector_id"] = np.where(
+        lte_analysis_logic_df["sector"].str.contains("_1_"),
+        1,
+        np.where(
+            lte_analysis_logic_df["sector"].str.contains("_2_"),
+            2,
+            np.where(lte_analysis_logic_df["sector"].str.contains("_3_"), 3, np.nan),
+        ),
+    )
+    # add code_sector column by combine code and sector_id
+    lte_analysis_logic_df["code_sector"] = (
+        lte_analysis_logic_df["code"].astype(str)
+        + "_"
+        + lte_analysis_logic_df["sector_id"].astype(str)
+    )
+    # remove '.0' from code_sector
+    lte_analysis_logic_df["code_sector"] = lte_analysis_logic_df[
+        "code_sector"
+    ].str.replace(".0", "")
+    # lte_analysis_logic_df = lte_analysis_logic_df[LTE_ANALYSIS_COLUMNS]
+    return lte_analysis_logic_df
+def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
+    # Base DataFrame with unique codes, Region, and site_config_band
+    all_codes_df = df[["code", "Region", "site_config_band"]].drop_duplicates()
+    # Configuration for sector groups and their respective LNCEL patterns and column suffixes
+    # Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
+    # lncel_name_pattern_part will be combined with "_<group_key>" or similar
+    # Example: for group "1", pattern "_1_L800" gives suffix "l800"
+    sector_groups_config = {
+        "1": [
+            ("_1_L800", "l800"),
+            ("_1_L1800", "l1800"),
+            ("_1_L2300", "l2300"),
+            ("_1_L2600", "l2600"),
+            ("_1S_L1800", "l1800s"),
+        ],
+        "2": [
+            ("_2_L800", "l800"),
+            ("_2_L1800", "l1800"),
+            ("_2_L2300", "l2300"),
+            ("_2_L2600", "l2600"),
+            ("_2S_L1800", "l1800s"),
+        ],
+        "3": [
+            ("_3_L800", "l800"),
+            ("_3_L1800", "l1800"),
+            ("_3_L2300", "l2300"),
+            ("_3_L2600", "l2600"),
+            ("_3S_L1800", "l1800s"),
+        ],
+    }
+    all_processed_sectors_dfs = []
+    for sector_group_key, band_configurations in sector_groups_config.items():
+        # Start with the base DataFrame for the current sector group
+        current_sector_group_df = all_codes_df.copy()
+        for lncel_name_pattern, column_suffix in band_configurations:
+            # Filter the original DataFrame for the current LNCEL pattern
+            # The pattern assumes LNCEL_name contains something like "SITENAME<lncel_name_pattern>"
+            filtered_band_df = df[df["LNCEL_name"].str.contains(lncel_name_pattern)]
+            # Select relevant columns and rename them for the merge
+            # This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
+            df_to_merge = filtered_band_df[
+                ["code", "LNCEL_name", "avg_prb_usage_bh"]
+            ].rename(
+                columns={
+                    "LNCEL_name": f"LNCEL_name_{column_suffix}",
+                    "avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
+                }
+            )
+            # Perform a left merge
+            current_sector_group_df = pd.merge(
+                current_sector_group_df, df_to_merge, on="code", how="left"
+            )
+        all_processed_sectors_dfs.append(current_sector_group_df)
+    # Concatenate all the processed sector DataFrames
+    all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
+    return all_sectors_dfs
+def lte_database_for_capacity(dump_path: str):
+    dfs = process_lte_data(dump_path)
+    lte_fdd = dfs[0]
+    lte_tdd = dfs[1]
+    lte_fdd = lte_fdd[LTE_DATABASE_COLUMNS]
+    lte_tdd = lte_tdd[LTE_DATABASE_COLUMNS]
+    lte_db = pd.concat([lte_fdd, lte_tdd], axis=0)
+    # rename final_name to LNCEL_name
+    lte_db = lte_db.rename(columns={"final_name": "LNCEL_name"})
+    # save_dataframe(lte_db, "LTE_Database.csv")
+    return lte_db
+def lte_bh_dfs_per_kpi(
+    dump_path: str,
+    df: pd.DataFrame,
+    number_of_kpi_days: int = 7,
+    availability_threshold: int = 95,
+    prb_usage_threshold: int = 80,
+    prb_diff_between_cells_threshold: int = 20,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    # print(df.columns)
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="LNCEL_name",
+        kpi_columns_from=2,
+    )
+    cell_availability_df = cell_availability_analysis(
+        df=pivoted_kpi_dfs["Cell_Avail_excl_BLU"],
+        days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+    )
+    # prb_usage_df = analyze_prb_usage(
+    #     df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
+    #     number_of_kpi_days=number_of_kpi_days,
+    #     prb_usage_threshold=prb_usage_threshold,
+    #     analysis_type="BH",
+    #     number_of_threshold_days=number_of_threshold_days,
+    # )
+    prb_lev10_usage_df = analyze_prb_usage(
+        df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
+        number_of_kpi_days=number_of_kpi_days,
+        prb_usage_threshold=prb_usage_threshold,
+        analysis_type="BH",
+        number_of_threshold_days=number_of_threshold_days,
+    )
+    bh_kpi_df = pd.concat([cell_availability_df, prb_lev10_usage_df], axis=1)
+    bh_kpi_df = bh_kpi_df.reset_index()
+    prb_df = bh_kpi_df[PRB_COLUMNS]
+    # drop row if lnCEL_name is empty or 1
+    prb_df = prb_df[prb_df["LNCEL_name"].str.len() > 3]
+    # prb_df = prb_df.reset_index()
+    prb_df = prb_df.droplevel(level=1, axis=1)  # Drop the first level (date)
+    # prb_df = prb_df.reset_index()
+    # prb_df["code"] = prb_df["LNCEL_name"].str.split("_").str[0]
+    lte_db = lte_database_for_capacity(dump_path)
+    db_and_prb = pd.merge(lte_db, prb_df, on="LNCEL_name", how="left")
+    # if avg_prb_usage_bh is "" then set it to "cell exists in dump but not in BH report"
+    # db_and_prb.loc[db_and_prb["avg_prb_usage_bh"].isnull(), "avg_prb_usage_bh"] = (
+    #     "cell exists in dump but not in BH report"
+    # )
+    # drop row if lnCEL_name is empty or 1
+    db_and_prb = db_and_prb[db_and_prb["LNCEL_name"].str.len() > 3]
+    lte_analysis_df = dfs_per_band_cell(db_and_prb)
+    lte_analysis_df = lte_analysis_logic(
+        lte_analysis_df,
+        prb_usage_threshold,
+        prb_diff_between_cells_threshold,
+    )
+    lte_analysis_df = lte_analysis_df[LTE_ANALYSIS_COLUMNS]
+    return [bh_kpi_df, lte_analysis_df]
+def process_lte_bh_report(
+    dump_path: str,
+    bh_report_path: str,
+    num_last_days: int,
+    num_threshold_days: int,
+    availability_threshold: float,
+    prb_usage_threshold: float,
+    prb_diff_between_cells_threshold: float,
+) -> dict:
+    """
+    Process LTE Busy Hour report and perform capacity analysis
+    Args:
+        bh_report_path: Path to BH report CSV file
+        num_last_days: Number of last days for analysis
+        num_threshold_days: Number of days for threshold calculation
+        availability_threshold: Minimum required availability
+        prb_usage_threshold: Maximum allowed PRB usage
+        prb_diff_between_cells_threshold: Maximum allowed PRB usage difference between cells
+    Returns:
+        Dictionary containing analysis results and DataFrames
+    """
+    LteCapacity.final_results = None
+    # lte_db_dfs = lte_database_for_capacity(dump_path)
+    # Read BH report
+    df = pd.read_csv(bh_report_path, delimiter=";")
+    df = kpi_naming_cleaning(df)
+    # print(df.columns)
+    df = create_hourly_date(df)
+    df = df[KPI_COLUMNS]
+    pivoted_kpi_dfs = lte_bh_dfs_per_kpi(
+        dump_path=dump_path,
+        df=df,
+        number_of_kpi_days=num_last_days,
+        availability_threshold=availability_threshold,
+        prb_usage_threshold=prb_usage_threshold,
+        prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
+        number_of_threshold_days=num_threshold_days,
+    )
+    # save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")
+    return pivoted_kpi_dfs

utils/convert_to_excel.py CHANGED Viewed

@@ -143,14 +143,31 @@ def get_format_map_by_format_type(formats: dict, format_type: str) -> dict:
             "number_trx_per_bcf": formats["blue_light"],
             "number_trx_per_site": formats["blue_light"],
         }
-    # elif format_type == "LTE":
-    #     return {
-    #         "DL PRB Utilization": formats["orange"],
-    #         "UL PRB Utilization": formats["orange"],
-    #         "RSRP": formats["blue_light"],
-    #         "RSRQ": formats["blue_light"],
-    #         "Throughput (Mbps)": formats["green"],
-    #     }
     else:
         return {}  # No formatting if format_type not matched
@@ -193,6 +210,11 @@ def convert_gsm_dfs(dfs, sheet_names) -> bytes:
     return _write_to_excel(dfs, sheet_names, index=True, format_type="GSM_Analysis")
 @st.cache_data
 def convert_database_dfs(dfs, sheet_names) -> bytes:
     return _write_to_excel(dfs, sheet_names, index=True, format_type="database")

             "number_trx_per_bcf": formats["blue_light"],
             "number_trx_per_site": formats["blue_light"],
         }
+    elif format_type == "LTE_Analysis":
+        return {
+            "code": formats["blue"],
+            "code_sector": formats["blue"],
+            "Region": formats["blue"],
+            "site_config_band": formats["blue"],
+            "LNCEL_name_l800": formats["beurre"],
+            "LNCEL_name_l1800": formats["purple5"],
+            "LNCEL_name_l2300": formats["purple6"],
+            "LNCEL_name_l2600": formats["blue_light"],
+            "LNCEL_name_l1800s": formats["gray"],
+            "avg_prb_usage_bh_l800": formats["beurre"],
+            "avg_prb_usage_bh_l1800": formats["purple5"],
+            "avg_prb_usage_bh_l2300": formats["purple6"],
+            "avg_prb_usage_bh_l2600": formats["blue_light"],
+            "avg_prb_usage_bh_l1800s": formats["gray"],
+            "num_congested_cells": formats["orange"],
+            "num_cells": formats["orange"],
+            "num_cell_with_kpi": formats["orange"],
+            "num_down_or_no_kpi_cells": formats["orange"],
+            "prb_diff_between_cells": formats["orange"],
+            "load_balance_required": formats["orange"],
+            "congestion_comment": formats["orange"],
+            "final_comments": formats["green"],
+        }
     else:
         return {}  # No formatting if format_type not matched
     return _write_to_excel(dfs, sheet_names, index=True, format_type="GSM_Analysis")
+@st.cache_data
+def convert_lte_analysis_dfs(dfs, sheet_names) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True, format_type="LTE_Analysis")
 @st.cache_data
 def convert_database_dfs(dfs, sheet_names) -> bytes:
     return _write_to_excel(dfs, sheet_names, index=True, format_type="database")

utils/kpi_analysis_utils.py CHANGED Viewed

@@ -538,3 +538,50 @@ def analyze_sdcch_call_blocking(
     )
     return result_df

     )
     return result_df
+class LteCapacity:
+    final_results = None
+    # Next band mapping
+    next_band_mapping = {
+        "L1800": "L800",
+        "L800": "L1800",
+        "L1800/L800": "L2600",
+        "L1800/L2300/L800": "L2600",
+        "L2300/L800": "L2600",
+        "L1800/L2600/L800": "New site/Dual Beam",
+        "L1800/L2300/L2600/L800": "New site/Dual Beam",
+        "L2300": "FDD H// colocated site",
+    }
+def analyze_prb_usage(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    prb_usage_threshold: int,
+    analysis_type: str,
+    number_of_threshold_days: int,
+) -> pd.DataFrame:
+    result_df = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_prb_usage_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_prb_usage_{analysis_type.lower()}"] = last_days_df.max(axis=1)
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}"] = (
+        last_days_df.apply(
+            lambda row: sum(1 for x in row if x >= prb_usage_threshold), axis=1
+        )
+    )
+    # Add the daily_prb_comment : if number_of_days_with_prb_usage_exceeded_daily is >= number_of_threshold_days : prb usage exceeded threshold , else : None
+    result_df[f"prb_usage_{analysis_type.lower()}_comment"] = np.where(
+        result_df[f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}"]
+        >= number_of_threshold_days,
+        "PRB usage exceeded threshold",
+        None,
+    )
+    return result_df