Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on May 28

Commit

031d4db

1 Parent(s): 5d12505

Wcel capacity V1 completed

Browse files

Files changed (2) hide show

apps/kpi_analysis/wcel_capacity.py +219 -4
process_kpi/process_wcel_capacity.py +115 -7

apps/kpi_analysis/wcel_capacity.py CHANGED Viewed

@@ -89,10 +89,12 @@ if uploaded_file is not None:
             )
         if results is not None:
-            kpi_df = results[0]
-            WcelCapacity.final_results = convert_dfs([kpi_df], ["kpi_df"])
             st.download_button(
                 on_click="ignore",
                 type="primary",
@@ -101,4 +103,217 @@ if uploaded_file is not None:
                 file_name="WCEL_Capacity_Report.xlsx",
                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
             )
-        st.write(kpi_df)

             )
         if results is not None:
+            wcel_analysis_df = results[0]
+            kpi_df = results[1]
+            WcelCapacity.final_results = convert_dfs(
+                [wcel_analysis_df, kpi_df], ["wcel_analysis", "kpi"]
+            )
             st.download_button(
                 on_click="ignore",
                 type="primary",
                 file_name="WCEL_Capacity_Report.xlsx",
                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
             )
+        st.write(wcel_analysis_df)
+        # Add dataframe and Bar chart with "final_comments" distribution
+        st.markdown("***")
+        st.markdown(":blue[**Final comment distribution**]")
+        final_comments_df = (
+            wcel_analysis_df.groupby("final_comments")
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        final_comments_col1, final_comments_col2 = st.columns((1, 3))
+        with final_comments_col1:
+            st.write(final_comments_df)
+        with final_comments_col2:
+            fig = px.bar(
+                final_comments_df,
+                x="final_comments",
+                y="count",
+                title="Final Comments Distribution",
+                text="count",
+            )
+            fig.update_traces(textposition="outside")
+            fig.update_layout(height=600)
+            st.plotly_chart(fig)
+        # Add dataframe and Pie chart with "operational_comments" distribution
+        st.markdown("***")
+        st.markdown(":blue[**Operational comment distribution**]")
+        operational_comments_df = (
+            wcel_analysis_df.groupby("operational_comments")
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        operational_comments_df["percent"] = (
+            operational_comments_df["count"] / operational_comments_df["count"].sum()
+        ) * 100
+        operational_comments_col1, operational_comments_col2 = st.columns((1, 3))
+        with operational_comments_col1:
+            st.write(operational_comments_df)
+        with operational_comments_col2:
+            fig = px.pie(
+                operational_comments_df,
+                names="operational_comments",
+                values="count",
+                hover_name="operational_comments",
+                hover_data=["count", "percent"],
+                title="Operational Comments Distribution",
+            )
+            fig.update_layout(height=600)
+            fig.update_traces(
+                texttemplate="<b>%{label}</b><br> %{value}  <b>(%{customdata[1]:.1f}%)</b>",
+                textfont_size=15,
+                textposition="outside",
+            )
+            st.plotly_chart(fig)
+        # Add dataframe and Bar chart with "operational_comments" distribution per Region
+        st.markdown("***")
+        st.markdown(":blue[**Operational comment distribution per Region**]")
+        operational_comments_df = (
+            wcel_analysis_df.groupby(["Region", "operational_comments"])
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        operational_comments_col1, operational_comments_col2 = st.columns((1, 3))
+        with operational_comments_col1:
+            st.write(operational_comments_df)
+        with operational_comments_col2:
+            fig = px.bar(
+                operational_comments_df,
+                x="Region",
+                y="count",
+                color="operational_comments",
+                title="Operational Comments Distribution per Region",
+                text="count",
+            )
+            fig.update_traces(textposition="outside")
+            fig.update_layout(height=600)
+            st.plotly_chart(fig)
+        # Add dataframe and Pie chart with "fails_comments" distribution
+        st.markdown("***")
+        st.markdown(":blue[**Fails comment distribution**]")
+        fails_comments_df = (
+            wcel_analysis_df.groupby("fails_comments")
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        # replace empty strings with "Cell OK"
+        fails_comments_df["fails_comments"] = fails_comments_df[
+            "fails_comments"
+        ].replace("", "Cell OK")
+        fails_comments_df["percent"] = (
+            fails_comments_df["count"] / fails_comments_df["count"].sum()
+        ) * 100
+        fails_comments_col1, fails_comments_col2 = st.columns((1, 3))
+        with fails_comments_col1:
+            st.write(fails_comments_df)
+        with fails_comments_col2:
+            fig = px.pie(
+                fails_comments_df,
+                names="fails_comments",
+                values="count",
+                hover_name="fails_comments",
+                hover_data=["count", "percent"],
+                title="Fails Comments Distribution",
+            )
+            fig.update_layout(height=600)
+            fig.update_traces(
+                texttemplate="<b>%{label}</b><br> %{value}  <b>(%{customdata[1]:.1f}%)</b>",
+                textfont_size=15,
+                textposition="outside",
+            )
+            st.plotly_chart(fig)
+        # Add dataframe and Bar chart with "fails_comments" distribution per Region
+        st.markdown("***")
+        st.markdown(":blue[**Fails comment distribution per Region**]")
+        fails_comments_df = (
+            wcel_analysis_df.groupby(["Region", "fails_comments"])
+            .size()
+            .reset_index(name="count")
+            .sort_values(by="count", ascending=False)
+        )
+        # replace empty strings with "Cell OK"
+        fails_comments_df["fails_comments"] = fails_comments_df[
+            "fails_comments"
+        ].replace("", "Cell OK")
+        fails_comments_col1, fails_comments_col2 = st.columns((1, 3))
+        with fails_comments_col1:
+            st.write(fails_comments_df)
+        with fails_comments_col2:
+            fig = px.bar(
+                fails_comments_df,
+                x="Region",
+                y="count",
+                color="fails_comments",
+                title="Fails Comments Distribution per Region",
+                text="count",
+            )
+            fig.update_traces(textposition="outside", textfont_size=15)
+            fig.update_layout(height=600)
+            st.plotly_chart(fig)
+        # create a map plot with scatter_map with code ,Longitude,Latitude,fails_comments
+        st.markdown("***")
+        st.markdown(":blue[**Fails comments distribution**]")
+        fails_comments_map_df = wcel_analysis_df[
+            ["code", "Longitude", "Latitude", "fails_comments"]
+        ].dropna(subset=["code", "Longitude", "Latitude", "fails_comments"])
+        # replace empty strings with "Cell OK"
+        fails_comments_map_df["fails_comments"] = fails_comments_map_df[
+            "fails_comments"
+        ].replace("", "Cell OK")
+        # add size column equalt to 20
+        fails_comments_map_df["size"] = 20
+        fig = px.scatter_map(
+            fails_comments_map_df,
+            lat="Latitude",
+            lon="Longitude",
+            color="fails_comments",
+            size="size",
+            zoom=10,
+            height=600,
+            title="Fails comments distribution",
+            hover_data={
+                "code": True,
+                "fails_comments": True,
+            },
+            hover_name="code",
+        )
+        fig.update_layout(mapbox_style="open-street-map")
+        st.plotly_chart(fig, use_container_width=True)
+        # create a map plot with scatter_map with code ,Longitude,Latitude,operational_comments
+        operational_comments_map_df = wcel_analysis_df[
+            ["code", "Longitude", "Latitude", "operational_comments"]
+        ].dropna(subset=["code", "Longitude", "Latitude", "operational_comments"])
+        # replace empty strings with "Cell OK"
+        operational_comments_map_df["operational_comments"] = (
+            operational_comments_map_df["operational_comments"].replace("", "Cell OK")
+        )
+        # add size column equalt to 20
+        operational_comments_map_df["size"] = 20
+        fig = px.scatter_map(
+            operational_comments_map_df,
+            lat="Latitude",
+            lon="Longitude",
+            color="operational_comments",
+            size="size",
+            zoom=10,
+            height=600,
+            title="Operational comments distribution",
+            hover_data={
+                "code": True,
+                "operational_comments": True,
+            },
+            hover_name="code",
+        )
+        fig.update_layout(mapbox_style="open-street-map")
+        st.plotly_chart(fig, use_container_width=True)

process_kpi/process_wcel_capacity.py CHANGED Viewed

@@ -9,6 +9,7 @@ from utils.kpi_analysis_utils import (
     kpi_naming_cleaning,
     summarize_fails_comments,
 )
 tx_comments_mapping = {
     "iub_frameloss exceeded threshold": "iub frameloss",
@@ -31,6 +32,20 @@ operational_comments_mapping = {
     "hsdpa iub congestion, critical instability": "Availability and TX issues",
 }
 KPI_COLUMNS = [
     "WCEL_name",
     "date",
@@ -48,6 +63,36 @@ KPI_COLUMNS = [
     "rrc_conn_stp_fail_bts_M1001C4",
 ]
 class WcelCapacity:
     final_results: pd.DataFrame = None
@@ -80,13 +125,11 @@ def wcel_kpi_analysis(
     hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
     max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
     # Add Max of Trafics, throughput and max users
-    trafic_cs_df["max_traffic_cs"] = trafic_cs_df.max(axis=1)
-    hsdpa_traffic_df["max_traffic_dl"] = hsdpa_traffic_df.max(axis=1)
     hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
     max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
     # add average of Trafics, throughput and max users
-    trafic_cs_df["avg_traffic_cs"] = trafic_cs_df.mean(axis=1)
-    hsdpa_traffic_df["avg_traffic_dl"] = hsdpa_traffic_df.mean(axis=1)
     hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
         axis=1
     )
@@ -196,7 +239,72 @@ def wcel_kpi_analysis(
         new_column="fails_comments",
     )
     kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
-    return [kpi_df]
 def load_and_process_wcel_capacity_data(
@@ -228,7 +336,7 @@ def load_and_process_wcel_capacity_data(
     df = kpi_naming_cleaning(df)
     df = create_daily_date(df)
     df = df[KPI_COLUMNS]
-    df = wcel_kpi_analysis(
         df,
         num_last_days,
         num_threshold_days,
@@ -237,4 +345,4 @@ def load_and_process_wcel_capacity_data(
         hsdpa_congestion_rate_iub_threshold,
         fails_treshold,
     )
-    return df

     kpi_naming_cleaning,
     summarize_fails_comments,
 )
+from utils.utils_vars import get_physical_db
 tx_comments_mapping = {
     "iub_frameloss exceeded threshold": "iub frameloss",
     "hsdpa iub congestion, critical instability": "Availability and TX issues",
 }
+fails_comments_mapping = {
+    "ac, ac_dl, bts, code fails": "Power, Bts and Code fails",
+    "bts fails": "Bts fails",
+    "ac, bts, code fails": "Power and Code fails",
+    "ac, code fails": "Power fails",
+    "ac fails": "Power fails",
+    "ac, ac_dl fails": "Power fails",
+    "ac, bts fails": "Power and Bts fails",
+    "ac, ac_dl, bts fails": "Power and Bts fails",
+    "ac, ac_dl, code fails": "Power and Code fails",
+    "ac, ac_ul, bts, code fails": "Power, Bts and Code fails",
+    "ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails",
+}
 KPI_COLUMNS = [
     "WCEL_name",
     "date",
     "rrc_conn_stp_fail_bts_M1001C4",
 ]
+WCEL_ANALYSIS_COLUMNS = [
+    "WCEL_name",
+    "Average_cell_availability_daily",
+    "number_of_days_exceeding_availability_threshold_daily",
+    "availability_comment_daily",
+    "sum_traffic_cs",
+    "sum_traffic_dl",
+    "max_dl_throughput",
+    "avg_dl_throughput",
+    "max_users",
+    "max_iub_frameloss",
+    "number_of_days_with_iub_frameloss_exceeded",
+    "max_hsdpa_congestion_rate_iub",
+    "number_of_days_with_hsdpa_congestion_rate_iub_exceeded",
+    "max_rrc_fail_ac",
+    "number_of_days_with_rrc_fail_ac_exceeded",
+    "max_rrc_fail_ac_ul",
+    "number_of_days_with_rrc_fail_ac_ul_exceeded",
+    "max_rrc_fail_ac_dl",
+    "number_of_days_with_rrc_fail_ac_dl_exceeded",
+    "max_rrc_fail_code",
+    "number_of_days_with_rrc_fail_code_exceeded",
+    "max_rrc_fail_bts",
+    "number_of_days_with_rrc_fail_bts_exceeded",
+    "tx_congestion_comments",
+    "operational_comments",
+    "fails_comments",
+    "final_comments",
+]
 class WcelCapacity:
     final_results: pd.DataFrame = None
     hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
     max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
     # Add Max of Trafics, throughput and max users
+    trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1)
+    hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1)
     hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
     max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
     # add average of Trafics, throughput and max users
     hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
         axis=1
     )
         new_column="fails_comments",
     )
     kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
+    kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(
+        lambda x: fails_comments_mapping.get(x, x)
+    )
+    kpi_df = combine_comments(
+        kpi_df,
+        "operational_comments",
+        "fails_comments",
+        new_column="final_comments",
+    )
+    wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS]
+    wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1)
+    # Rename
+    wcel_analysis_df = wcel_analysis_df.rename(
+        columns={
+            "WCEL_name": "name",
+            "Average_cell_availability_daily": "Avg_availability",
+            "number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days",
+            "availability_comment_daily": "availability_comment",
+            "number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days",
+            "number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days",
+            "number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days",
+            "number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days",
+            "number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days",
+            "number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days",
+            "number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days",
+        }
+    )
+    # remove row if name less than 5 characters
+    wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5]
+    wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0]
+    wcel_analysis_df["code"] = (
+        pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int)
+    )
+    wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1]
+    # move code to the first column
+    wcel_analysis_df = wcel_analysis_df[
+        ["code", "Region"]
+        + [col for col in wcel_analysis_df if col != "code" and col != "Region"]
+    ]
+    # Load physical database
+    physical_db: pd.DataFrame = get_physical_db()
+    # Convert code_sector to code
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    # remove duplicates
+    physical_db = physical_db.drop_duplicates(subset="code")
+    # keep only code and longitude and latitude
+    physical_db = physical_db[["code", "Longitude", "Latitude"]]
+    physical_db["code"] = (
+        pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
+    )
+    wcel_analysis_df = pd.merge(
+        wcel_analysis_df,
+        physical_db,
+        on="code",
+        how="left",
+    )
+    return [wcel_analysis_df, kpi_df]
 def load_and_process_wcel_capacity_data(
     df = kpi_naming_cleaning(df)
     df = create_daily_date(df)
     df = df[KPI_COLUMNS]
+    dfs = wcel_kpi_analysis(
         df,
         num_last_days,
         num_threshold_days,
         hsdpa_congestion_rate_iub_threshold,
         fails_treshold,
     )
+    return dfs