DavMelchi commited on
Commit
4d0848d
Β·
1 Parent(s): 5113cfc

Add LCG analysis Part1

Browse files
app.py CHANGED
@@ -146,6 +146,10 @@ if check_password():
146
  "apps/kpi_analysis/wcel_capacity.py",
147
  title=" πŸ“Š WCEL Capacity Analysis",
148
  ),
 
 
 
 
149
  st.Page(
150
  "apps/kpi_analysis/lte_capacity.py",
151
  title=" πŸ“Š LTE Capacity Analysis",
 
146
  "apps/kpi_analysis/wcel_capacity.py",
147
  title=" πŸ“Š WCEL Capacity Analysis",
148
  ),
149
+ st.Page(
150
+ "apps/kpi_analysis/lcg_analysis.py",
151
+ title=" πŸ“Š LCG Capacity Analysis",
152
+ ),
153
  st.Page(
154
  "apps/kpi_analysis/lte_capacity.py",
155
  title=" πŸ“Š LTE Capacity Analysis",
apps/kpi_analysis/lcg_analysis.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import streamlit as st
4
+
5
+ from process_kpi.process_lcg_capacity import load_and_process_lcg_data
6
+ from utils.convert_to_excel import convert_dfs
7
+
8
+
9
+ class LcgCapacity:
10
+ final_results = None
11
+
12
+
13
+ # Streamlit UI
14
+ st.title(" πŸ“Š LCG Analysis")
15
+ doc_col, image_col = st.columns(2)
16
+
17
+ with doc_col:
18
+ st.write(
19
+ """This app allows you to analyze the LCG of a network.
20
+ It provides insights into the utilization of LCG resources,
21
+ helping you identify potential capacity issues and plan for upgrades.
22
+
23
+ The report should be run with a minimum of 3 days of data.
24
+ - Daily Aggregated
25
+ - LCG level
26
+ - Exported in CSV format.
27
+ """
28
+ )
29
+
30
+ with image_col:
31
+ st.image("./assets/wcel_capacity.png", width=400)
32
+
33
+ uploaded_file = st.file_uploader("Upload LCG report in CSV format", type="csv")
34
+
35
+ param_col1, param_col2, param_col3 = st.columns(3)
36
+ param_col4, param_col5, param_col6 = st.columns(3)
37
+
38
+
39
+ # num_last_days
40
+ # num_threshold_days
41
+ # lcg_utilization_threshold
42
+ # difference_between_lcgs
43
+
44
+ if uploaded_file is not None:
45
+ LcgCapacity.final_results = None
46
+ with param_col1:
47
+ num_last_days = st.number_input(
48
+ "Number of days for analysis",
49
+ min_value=3,
50
+ max_value=30,
51
+ value=7,
52
+ )
53
+ with param_col2:
54
+ num_threshold_days = st.number_input(
55
+ "Number of days for threshold",
56
+ min_value=1,
57
+ max_value=30,
58
+ value=2,
59
+ )
60
+ with param_col3:
61
+ lcg_utilization_threshold = st.number_input(
62
+ "LCG Utilization Threshold (%)",
63
+ min_value=0,
64
+ max_value=100,
65
+ value=80,
66
+ )
67
+ with param_col4:
68
+ difference_between_lcgs = st.number_input(
69
+ "Difference between LCgs (%)",
70
+ min_value=0,
71
+ max_value=100,
72
+ value=20,
73
+ )
74
+ if st.button("Analyze Data", type="primary"):
75
+ # Input validation
76
+ try:
77
+ if num_threshold_days > num_last_days:
78
+ st.warning("Number of threshold days cannot be greater than number of analysis days")
79
+ st.stop()
80
+
81
+ if num_last_days < 3:
82
+ st.warning("Analysis period should be at least 3 days for meaningful results")
83
+ st.stop()
84
+
85
+ if lcg_utilization_threshold <= 0 or lcg_utilization_threshold > 100:
86
+ st.warning("LCG utilization threshold must be between 1 and 100")
87
+ st.stop()
88
+
89
+ with st.spinner("Processing data..."):
90
+ results = load_and_process_lcg_data(
91
+ uploaded_file,
92
+ num_last_days,
93
+ num_threshold_days,
94
+ lcg_utilization_threshold,
95
+ difference_between_lcgs,
96
+ )
97
+ except Exception as e:
98
+ st.error(f"An error occurred during input validation: {str(e)}")
99
+ st.stop()
100
+ if results is not None:
101
+ lcg_analysis_df = results[0]
102
+ kpi_df = results[1]
103
+ LcgCapacity.final_results = convert_dfs(
104
+ [lcg_analysis_df, kpi_df], ["lcg_analysis", "kpi"]
105
+ )
106
+ st.download_button(
107
+ on_click="ignore",
108
+ type="primary",
109
+ label="Download the Analysis Report",
110
+ data=LcgCapacity.final_results,
111
+ file_name="LCG_Capacity_Report.xlsx",
112
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
113
+ )
114
+ st.write(lcg_analysis_df)
115
+ # Add dataframe and Pie chart with "final_comments" distribution
116
+ st.markdown("***")
117
+ st.markdown(":blue[**Final comment distribution**]")
118
+ final_comments_df = (
119
+ lcg_analysis_df.groupby("final_comments")
120
+ .size()
121
+ .reset_index(name="count")
122
+ .sort_values(by="count", ascending=False)
123
+ )
124
+ final_comments_df["percent"] = (
125
+ final_comments_df["count"] / final_comments_df["count"].sum()
126
+ ) * 100
127
+ final_comments_col1, final_comments_col2 = st.columns((1, 3))
128
+ with final_comments_col1:
129
+ st.write(final_comments_df)
130
+ with final_comments_col2:
131
+ fig = px.pie(
132
+ final_comments_df,
133
+ names="final_comments",
134
+ values="count",
135
+ hover_name="final_comments",
136
+ hover_data=["count", "percent"],
137
+ title="Final Comments Distribution",
138
+ )
139
+ fig.update_layout(height=600)
140
+ fig.update_traces(
141
+ texttemplate="<b>%{label}</b><br> %{value} <b>(%{customdata[1]:.1f}%)</b>",
142
+ textfont_size=15,
143
+ textposition="outside",
144
+ )
145
+ st.plotly_chart(fig)
146
+
147
+ # Add dataframe and Bar chart with "final_comments" distribution per Region
148
+ st.markdown("***")
149
+ st.markdown(":blue[**Final comment distribution per Region**]")
150
+ final_comments_df = (
151
+ lcg_analysis_df.groupby(["Region", "final_comments"])
152
+ .size()
153
+ .reset_index(name="count")
154
+ .sort_values(by="count", ascending=False)
155
+ )
156
+ final_comments_col1, final_comments_col2 = st.columns((1, 3))
157
+ with final_comments_col1:
158
+ st.write(final_comments_df)
159
+ with final_comments_col2:
160
+ fig = px.bar(
161
+ final_comments_df,
162
+ x="Region",
163
+ y="count",
164
+ color="final_comments",
165
+ title="Final Comments Distribution per Region",
166
+ text="count",
167
+ )
168
+ fig.update_traces(textposition="outside")
169
+ fig.update_layout(height=600)
170
+ st.plotly_chart(fig)
171
+
172
+ # Add map plot with scatter_map with code ,Longitude,Latitude,final_comments
173
+ st.markdown("***")
174
+ st.markdown(":blue[**Final comments distribution**]")
175
+ final_comments_map_df = lcg_analysis_df[
176
+ ["code", "Longitude", "Latitude", "final_comments"]
177
+ ].dropna(subset=["code", "Longitude", "Latitude", "final_comments"])
178
+
179
+ # replace empty strings with "Cell OK"
180
+ # final_comments_map_df["final_comments"] = final_comments_map_df[
181
+ # "final_comments"
182
+ # ].replace("", "Cell OK")
183
+ # add size column equalt to 20
184
+ final_comments_map_df["size"] = 20
185
+
186
+ fig = px.scatter_map(
187
+ final_comments_map_df,
188
+ lat="Latitude",
189
+ lon="Longitude",
190
+ color="final_comments",
191
+ size="size",
192
+ zoom=10,
193
+ height=600,
194
+ title="Final Comments Distribution",
195
+ hover_data={
196
+ "code": True,
197
+ "final_comments": True,
198
+ },
199
+ hover_name="code",
200
+ )
201
+ fig.update_layout(mapbox_style="open-street-map")
202
+ st.plotly_chart(fig)
process_kpi/process_lcg_capacity.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from utils.kpi_analysis_utils import (
5
+ analyze_lcg_utilization,
6
+ combine_comments,
7
+ create_daily_date,
8
+ create_dfs_per_kpi,
9
+ kpi_naming_cleaning,
10
+ )
11
+ from utils.utils_vars import get_physical_db
12
+
13
+ lcg_comments_mapping = {
14
+ "2": "No Congestion",
15
+ "1": "No Congestion",
16
+ "lcg1 exceeded threshold, lcg2 exceeded threshold, 2": "Need BB SU upgrage",
17
+ "lcg1 exceeded threshold, 2": "Need LCG balancing",
18
+ "lcg1 exceeded threshold, 1": "Need BB SU upgrage",
19
+ "lcg2 exceeded threshold, 2": "Need LCG balancing",
20
+ }
21
+
22
+
23
+ KPI_COLUMNS = [
24
+ "date",
25
+ "WBTS_name",
26
+ "lcg_id",
27
+ "BB_SU_LCG_MAX_R",
28
+ ]
29
+
30
+ LCG_ANALYSIS_COLUMNS = [
31
+ "WBTS_name",
32
+ "lcg1_utilisation",
33
+ "avg_lcg1",
34
+ "max_lcg1",
35
+ "number_of_days_with_lcg1_exceeded",
36
+ "lcg1_comment",
37
+ "lcg2_utilisation",
38
+ "avg_lcg2",
39
+ "max_lcg2",
40
+ "number_of_days_with_lcg2_exceeded",
41
+ "lcg2_comment",
42
+ "difference_between_lcgs",
43
+ "difference_between_lcgs_comment",
44
+ "lcg_comment",
45
+ "number_of_lcg",
46
+ "final_comments",
47
+ ]
48
+
49
+
50
+ def lcg_kpi_analysis(
51
+ df,
52
+ num_last_days,
53
+ num_threshold_days,
54
+ lcg_utilization_threshold,
55
+ difference_between_lcgs,
56
+ ) -> pd.DataFrame:
57
+ """
58
+ Analyze LCG capacity data.
59
+
60
+ Args:
61
+ df: DataFrame containing LCG capacity data
62
+ num_last_days: Number of days for analysis
63
+ num_threshold_days: Minimum days above threshold to flag for upgrade
64
+ lcg_utilization_threshold: Utilization threshold percentage for flagging
65
+ difference_between_lcgs: Difference between LCGs for flagging
66
+
67
+ Returns:
68
+ Processed DataFrame with LCG capacity analysis results
69
+ """
70
+
71
+ lcg1_df = df[df["lcg_id"] == 1]
72
+ lcg2_df = df[df["lcg_id"] == 2]
73
+
74
+ pivoted_kpi_dfs = create_dfs_per_kpi(
75
+ df=df,
76
+ pivot_date_column="date",
77
+ pivot_name_column="WBTS_name",
78
+ kpi_columns_from=2,
79
+ )
80
+
81
+ pivoted_lcg1_df = create_dfs_per_kpi(
82
+ df=lcg1_df,
83
+ pivot_date_column="date",
84
+ pivot_name_column="WBTS_name",
85
+ kpi_columns_from=2,
86
+ )
87
+ pivoted_lcg2_df = create_dfs_per_kpi(
88
+ df=lcg2_df,
89
+ pivot_date_column="date",
90
+ pivot_name_column="WBTS_name",
91
+ kpi_columns_from=2,
92
+ )
93
+
94
+ # BB_SU_LCG_MAX_R to have all site with LCG 1 and/ or LCG 2
95
+ BB_SU_LCG_MAX_R_df = pivoted_kpi_dfs["BB_SU_LCG_MAX_R"]
96
+
97
+ pivoted_lcg1_df = pivoted_lcg1_df["BB_SU_LCG_MAX_R"]
98
+ pivoted_lcg2_df = pivoted_lcg2_df["BB_SU_LCG_MAX_R"]
99
+
100
+ # rename column
101
+ pivoted_lcg1_df = pivoted_lcg1_df.rename(
102
+ columns={"BB_SU_LCG_MAX_R": "lcg1_utilisation"}
103
+ )
104
+ pivoted_lcg2_df = pivoted_lcg2_df.rename(
105
+ columns={"BB_SU_LCG_MAX_R": "lcg2_utilisation"}
106
+ )
107
+
108
+ # analyze lcg utilization for each site per number_of_kpi_days and number_of_threshold_days
109
+ pivoted_lcg1_df = analyze_lcg_utilization(
110
+ df=pivoted_lcg1_df,
111
+ number_of_kpi_days=num_last_days,
112
+ number_of_threshold_days=num_threshold_days,
113
+ kpi_threshold=lcg_utilization_threshold,
114
+ kpi_column_name="lcg1",
115
+ )
116
+ pivoted_lcg2_df = analyze_lcg_utilization(
117
+ df=pivoted_lcg2_df,
118
+ number_of_kpi_days=num_last_days,
119
+ number_of_threshold_days=num_threshold_days,
120
+ kpi_threshold=lcg_utilization_threshold,
121
+ kpi_column_name="lcg2",
122
+ )
123
+ kpi_df = pd.concat(
124
+ [
125
+ BB_SU_LCG_MAX_R_df,
126
+ pivoted_lcg1_df,
127
+ pivoted_lcg2_df,
128
+ ],
129
+ axis=1,
130
+ )
131
+
132
+ kpi_df = kpi_df.reset_index()
133
+
134
+ # Number of available lcgs
135
+ # kpi_df = pd.merge(kpi_df, available_lcgs_df, on="WBTS_name", how="left")
136
+
137
+ # calculate difference between lcg1 and lcg2
138
+ kpi_df["difference_between_lcgs"] = kpi_df[["avg_lcg1", "avg_lcg2"]].apply(
139
+ lambda row: max(row) - min(row), axis=1
140
+ )
141
+
142
+ # flag if difference between lcg1 and lcg2 is above threshold
143
+ kpi_df["difference_between_lcgs_comment"] = np.where(
144
+ kpi_df["difference_between_lcgs"] > difference_between_lcgs,
145
+ "difference between lcgs exceeded threshold",
146
+ None,
147
+ )
148
+
149
+ # Combine comments
150
+ kpi_df = combine_comments(
151
+ kpi_df,
152
+ "lcg1_comment",
153
+ "lcg2_comment",
154
+ # "difference_between_lcgs_comment",
155
+ new_column="lcg_comment",
156
+ )
157
+
158
+ # Replace if "lcg_comment" contains "nan" and ", nan" and "nan, " with None
159
+ kpi_df["lcg_comment"] = kpi_df["lcg_comment"].replace("nan", None)
160
+
161
+ # Remove "nan" from comma-separated strings
162
+ kpi_df["lcg_comment"] = (
163
+ kpi_df["lcg_comment"].str.replace(r"\bnan\b,?\s?", "", regex=True).str.strip()
164
+ )
165
+
166
+ kpi_df["number_of_lcg"] = np.where(
167
+ kpi_df["avg_lcg1"].notna() & kpi_df["avg_lcg2"].notna(),
168
+ 2,
169
+ np.where(kpi_df["avg_lcg1"].notna() | kpi_df["avg_lcg2"].notna(), 1, 0),
170
+ )
171
+ # Combine comments
172
+ kpi_df = combine_comments(
173
+ kpi_df,
174
+ "lcg_comment",
175
+ "number_of_lcg",
176
+ new_column="final_comments",
177
+ )
178
+ kpi_df["final_comments"] = kpi_df["final_comments"].apply(
179
+ lambda x: lcg_comments_mapping.get(x, x)
180
+ )
181
+ kpi_df = kpi_df[LCG_ANALYSIS_COLUMNS]
182
+
183
+ lcg_analysis_df = kpi_df.copy()
184
+
185
+ lcg_analysis_df = lcg_analysis_df[
186
+ [
187
+ "WBTS_name",
188
+ "avg_lcg1",
189
+ "max_lcg1",
190
+ "number_of_days_with_lcg1_exceeded",
191
+ "lcg1_comment",
192
+ "avg_lcg2",
193
+ "max_lcg2",
194
+ "number_of_days_with_lcg2_exceeded",
195
+ "lcg2_comment",
196
+ "difference_between_lcgs",
197
+ "final_comments",
198
+ ]
199
+ ]
200
+
201
+ lcg_analysis_df = lcg_analysis_df.droplevel(level=1, axis=1)
202
+ # Remove row if code less than 5 characters
203
+ lcg_analysis_df = lcg_analysis_df[lcg_analysis_df["WBTS_name"].str.len() >= 5]
204
+
205
+ # Add code
206
+ lcg_analysis_df["code"] = lcg_analysis_df["WBTS_name"].str.split("_").str[0]
207
+
208
+ lcg_analysis_df["code"] = (
209
+ pd.to_numeric(lcg_analysis_df["code"], errors="coerce").fillna(0).astype(int)
210
+ )
211
+
212
+ lcg_analysis_df["Region"] = (
213
+ lcg_analysis_df["WBTS_name"].str.split("_").str[1:2].str.join("_")
214
+ )
215
+ lcg_analysis_df["Region"] = lcg_analysis_df["Region"].fillna("UNKNOWN")
216
+
217
+ # move code to the first column
218
+ lcg_analysis_df = lcg_analysis_df[
219
+ ["code", "Region"]
220
+ + [col for col in lcg_analysis_df if col != "code" and col != "Region"]
221
+ ]
222
+
223
+ # Load physical database
224
+ physical_db: pd.DataFrame = get_physical_db()
225
+
226
+ # Convert code_sector to code
227
+ physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
228
+ # remove duplicates
229
+ physical_db = physical_db.drop_duplicates(subset="code")
230
+
231
+ # keep only code and longitude and latitude
232
+ physical_db = physical_db[["code", "Longitude", "Latitude"]]
233
+
234
+ physical_db["code"] = (
235
+ pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
236
+ )
237
+
238
+ lcg_analysis_df = pd.merge(
239
+ lcg_analysis_df,
240
+ physical_db,
241
+ on="code",
242
+ how="left",
243
+ )
244
+
245
+ return [lcg_analysis_df, kpi_df]
246
+
247
+
248
+ def load_and_process_lcg_data(
249
+ uploaded_file,
250
+ num_last_days,
251
+ num_threshold_days,
252
+ lcg_utilization_threshold,
253
+ difference_between_lcgs,
254
+ ) -> pd.DataFrame:
255
+ """Load and process data for LCG capacity analysis."""
256
+ try:
257
+ # Load data
258
+ df = pd.read_csv(uploaded_file, delimiter=";")
259
+ if df.empty:
260
+ raise ValueError("Uploaded file is empty")
261
+
262
+ df = kpi_naming_cleaning(df)
263
+ df = create_daily_date(df)
264
+
265
+ # Validate required columns
266
+ missing_cols = [col for col in KPI_COLUMNS if col not in df.columns]
267
+ if missing_cols:
268
+ raise ValueError(f"Missing required columns: {', '.join(missing_cols)}")
269
+
270
+ df = df[KPI_COLUMNS]
271
+
272
+ # Process the data
273
+ dfs = lcg_kpi_analysis(
274
+ df,
275
+ num_last_days,
276
+ num_threshold_days,
277
+ lcg_utilization_threshold,
278
+ difference_between_lcgs,
279
+ )
280
+ return dfs
281
+
282
+ except Exception as e:
283
+ # Log the error and re-raise with a user-friendly message
284
+ error_msg = f"Error processing LCG data: {str(e)}"
285
+ st.error(error_msg)
286
+ raise
utils/kpi_analysis_utils.py CHANGED
@@ -636,3 +636,31 @@ def analyze_fails_kpi(
636
  None,
637
  )
638
  return result_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
  None,
637
  )
638
  return result_df
639
+
640
+
641
+ def analyze_lcg_utilization(
642
+ df: pd.DataFrame,
643
+ number_of_kpi_days: int,
644
+ number_of_threshold_days: int,
645
+ kpi_threshold: int,
646
+ kpi_column_name: str,
647
+ ) -> pd.DataFrame:
648
+ result_df: pd.DataFrame = df.copy()
649
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
650
+ # last_days_df = last_days_df.fillna(0)
651
+
652
+ result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
653
+ result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
654
+ # Count the number of days above threshold
655
+ result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
656
+ lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
657
+ )
658
+
659
+ # Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
660
+ result_df[f"{kpi_column_name}_comment"] = np.where(
661
+ result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
662
+ >= number_of_threshold_days,
663
+ f"{kpi_column_name} exceeded threshold",
664
+ None,
665
+ )
666
+ return result_df