Adding WBTS KPI analysis
Browse files- app.py +6 -0
- apps/kpi_analysis/wbts_capacty.py +107 -0
- assets/wbts_capacity.png +0 -0
- process_kpi/process_wbts_capacity.py +422 -0
- utils/convert_to_excel.py +1 -1
app.py
CHANGED
@@ -27,6 +27,12 @@ pages = {
|
|
27 |
),
|
28 |
st.Page("apps/import_physical_db.py", title="🌏Physical Database Verification"),
|
29 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
"Documentations": [
|
31 |
st.Page("documentations/database_doc.py", title="📚Databases Documentation"),
|
32 |
st.Page("documentations/core_dump_doc.py", title="📗Dump core Documentation"),
|
|
|
27 |
),
|
28 |
st.Page("apps/import_physical_db.py", title="🌏Physical Database Verification"),
|
29 |
],
|
30 |
+
"KPI Analysis": [
|
31 |
+
st.Page(
|
32 |
+
"apps/kpi_analysis/wbts_capacty.py",
|
33 |
+
title=" 📊 WBTS Capacity BB and CE Analysis",
|
34 |
+
),
|
35 |
+
],
|
36 |
"Documentations": [
|
37 |
st.Page("documentations/database_doc.py", title="📚Databases Documentation"),
|
38 |
st.Page("documentations/core_dump_doc.py", title="📗Dump core Documentation"),
|
apps/kpi_analysis/wbts_capacty.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import plotly.express as px
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from process_kpi.process_wbts_capacity import WbtsCapacity, load_data
|
6 |
+
from utils.convert_to_excel import convert_dfs
|
7 |
+
|
8 |
+
# Streamlit UI
|
9 |
+
|
10 |
+
st.title(" 📊 WBTS Capacity Analysis")
|
11 |
+
doc_col, image_col = st.columns(2)
|
12 |
+
|
13 |
+
with doc_col:
|
14 |
+
st.write(
|
15 |
+
"""This app allows you to analyze the capacity of WBTSs in a network.
|
16 |
+
It provides insights into the utilization of BB and CE resources,
|
17 |
+
helping you identify potential capacity issues and plan for upgrades.
|
18 |
+
|
19 |
+
The report should be run with a minimum of 3 days of data.
|
20 |
+
- Daily Aggregated
|
21 |
+
- WBTS level
|
22 |
+
- Exported in CSV format.
|
23 |
+
"""
|
24 |
+
)
|
25 |
+
|
26 |
+
with image_col:
|
27 |
+
st.image("./assets/wbts_capacity.png")
|
28 |
+
|
29 |
+
uploaded_file = st.file_uploader(
|
30 |
+
"Upload WBTS capacity report in CSV format", type="csv"
|
31 |
+
)
|
32 |
+
|
33 |
+
col1, col2, col3 = st.columns(3)
|
34 |
+
|
35 |
+
if uploaded_file is not None:
|
36 |
+
WbtsCapacity.final_results = None
|
37 |
+
with col1:
|
38 |
+
num_days = st.number_input(
|
39 |
+
"Number of days for analysis",
|
40 |
+
min_value=3,
|
41 |
+
max_value=30,
|
42 |
+
value=7,
|
43 |
+
)
|
44 |
+
with col2:
|
45 |
+
number_of_threshold_days = st.number_input(
|
46 |
+
"Number of days for threshold",
|
47 |
+
min_value=1,
|
48 |
+
max_value=30,
|
49 |
+
value=3,
|
50 |
+
)
|
51 |
+
with col3:
|
52 |
+
threshold = st.number_input("Threshold", min_value=1, max_value=100, value=80)
|
53 |
+
|
54 |
+
if st.button("Analyze Data", type="primary"):
|
55 |
+
try:
|
56 |
+
df = load_data(uploaded_file, num_days, threshold, number_of_threshold_days)
|
57 |
+
WbtsCapacity.final_results = convert_dfs([df], ["WBTS_Analysis"])
|
58 |
+
|
59 |
+
if WbtsCapacity.final_results is not None:
|
60 |
+
st.download_button(
|
61 |
+
on_click="ignore",
|
62 |
+
type="primary",
|
63 |
+
label="Download the Analysis Report",
|
64 |
+
data=WbtsCapacity.final_results,
|
65 |
+
file_name="WBTS_Analysis_Report.xlsx",
|
66 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
67 |
+
)
|
68 |
+
st.write(df)
|
69 |
+
# BB comments analysis and visualization
|
70 |
+
bb_comments_df = df["bb_comments"].value_counts().reset_index()
|
71 |
+
bb_comments_df.columns = ["bb_comments", "count"]
|
72 |
+
|
73 |
+
bb_col1, bb_col2 = st.columns(2)
|
74 |
+
with bb_col1:
|
75 |
+
st.write(bb_comments_df)
|
76 |
+
|
77 |
+
# BB comments chart
|
78 |
+
fig = px.bar(
|
79 |
+
bb_comments_df,
|
80 |
+
x="bb_comments",
|
81 |
+
y="count",
|
82 |
+
title="BB Comments Distribution",
|
83 |
+
)
|
84 |
+
fig.update_traces(texttemplate="%{value}", textposition="outside")
|
85 |
+
with bb_col2:
|
86 |
+
st.plotly_chart(fig)
|
87 |
+
|
88 |
+
# CE comments analysis and visualization
|
89 |
+
ce_comments_df = df["ce_comments"].value_counts().reset_index()
|
90 |
+
ce_comments_df.columns = ["ce_comments", "count"]
|
91 |
+
|
92 |
+
ce_col1, ce_col2 = st.columns(2)
|
93 |
+
with ce_col1:
|
94 |
+
st.write(ce_comments_df)
|
95 |
+
|
96 |
+
# CE comments chart
|
97 |
+
fig = px.bar(
|
98 |
+
ce_comments_df,
|
99 |
+
x="ce_comments",
|
100 |
+
y="count",
|
101 |
+
title="CE Comments Distribution",
|
102 |
+
)
|
103 |
+
fig.update_traces(texttemplate="%{value}", textposition="outside")
|
104 |
+
with ce_col2:
|
105 |
+
st.plotly_chart(fig)
|
106 |
+
except Exception as e:
|
107 |
+
st.error(f"An error occurred. Error: {e}")
|
assets/wbts_capacity.png
ADDED
![]() |
process_kpi/process_wbts_capacity.py
ADDED
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
|
4 |
+
class WbtsCapacity:
|
5 |
+
final_results: pd.DataFrame = None
|
6 |
+
|
7 |
+
|
8 |
+
def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str:
|
9 |
+
"""
|
10 |
+
Check if any value in the row deviates more than max_diff from the most common value.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
row: Series of values to check for deviation
|
14 |
+
max_diff: Maximum allowed difference from the most common value
|
15 |
+
type: Type identifier for the deviation message
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
A message indicating deviation if found, otherwise an empty string
|
19 |
+
"""
|
20 |
+
numeric_row = row.astype(float) # Ensure numeric
|
21 |
+
mode_series = numeric_row.mode()
|
22 |
+
|
23 |
+
# Safe fallback in case mode is empty
|
24 |
+
most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0]
|
25 |
+
|
26 |
+
diffs = abs(numeric_row - most_common)
|
27 |
+
|
28 |
+
if (diffs > max_diff).any():
|
29 |
+
return f"{type} Deviation > {max_diff} detected"
|
30 |
+
else:
|
31 |
+
return ""
|
32 |
+
|
33 |
+
|
34 |
+
def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
|
35 |
+
"""
|
36 |
+
Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
df: DataFrame containing PERIOD_START_TIME column
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
DataFrame with new date column and unnecessary columns removed
|
43 |
+
"""
|
44 |
+
date_df = df.copy()
|
45 |
+
date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
|
46 |
+
".", expand=True
|
47 |
+
)
|
48 |
+
date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
|
49 |
+
# Remove unnecessary columns
|
50 |
+
date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
|
51 |
+
return date_df
|
52 |
+
|
53 |
+
|
54 |
+
def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
|
55 |
+
"""
|
56 |
+
Clean KPI column names by replacing special characters and standardizing format.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
df: DataFrame with KPI column names to clean
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
DataFrame with cleaned column names
|
63 |
+
"""
|
64 |
+
name_df = df.copy()
|
65 |
+
name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
|
66 |
+
name_df.columns = name_df.columns.str.replace("___", "_")
|
67 |
+
name_df.columns = name_df.columns.str.replace("__", "_")
|
68 |
+
name_df.columns = name_df.columns.str.replace("%", "perc")
|
69 |
+
name_df.columns = name_df.columns.str.rstrip("_")
|
70 |
+
return name_df
|
71 |
+
|
72 |
+
|
73 |
+
def create_wbts_index(df: pd.DataFrame) -> pd.DataFrame:
|
74 |
+
"""
|
75 |
+
Create a custom index by combining date and DN columns.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
df: DataFrame containing date and DN columns
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
DataFrame with new custom_index column
|
82 |
+
"""
|
83 |
+
wbts_index_df = df.copy()
|
84 |
+
wbts_index_df["custom_index"] = wbts_index_df["date"] + "_" + wbts_index_df["DN"]
|
85 |
+
return wbts_index_df
|
86 |
+
|
87 |
+
|
88 |
+
def max_used_bb_subunits_analysis(
|
89 |
+
df: pd.DataFrame,
|
90 |
+
days: int = 7,
|
91 |
+
threshold: int = 80,
|
92 |
+
number_of_threshold_days: int = 3,
|
93 |
+
) -> pd.DataFrame:
|
94 |
+
"""
|
95 |
+
Analyze maximum used baseband subunits and identify sites needing upgrades.
|
96 |
+
|
97 |
+
Args:
|
98 |
+
df: DataFrame containing baseband utilization data
|
99 |
+
days: Number of days to analyze
|
100 |
+
threshold: Utilization threshold percentage for flagging
|
101 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
DataFrame with analysis results and upgrade recommendations
|
105 |
+
"""
|
106 |
+
result_df = df.copy()
|
107 |
+
last_days_df = result_df.iloc[:, -days:]
|
108 |
+
last_days_df = last_days_df.fillna(0)
|
109 |
+
|
110 |
+
result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2)
|
111 |
+
# Count the number of days above threshold
|
112 |
+
result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply(
|
113 |
+
lambda row: sum(1 for x in row if x >= threshold), axis=1
|
114 |
+
)
|
115 |
+
|
116 |
+
# Initialize comment column
|
117 |
+
result_df["Average_used_bb_ratio_comment"] = ""
|
118 |
+
|
119 |
+
# Apply condition for upgrade recommendation
|
120 |
+
result_df.loc[
|
121 |
+
(result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
|
122 |
+
& (result_df["Average_used_bb_ratio"] >= threshold),
|
123 |
+
"Average_used_bb_ratio_comment",
|
124 |
+
] = "need BB upgrade"
|
125 |
+
|
126 |
+
return result_df
|
127 |
+
|
128 |
+
|
129 |
+
def cell_availability_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
|
130 |
+
"""
|
131 |
+
Analyze cell availability and categorize sites based on availability metrics.
|
132 |
+
|
133 |
+
Args:
|
134 |
+
df: DataFrame containing cell availability data
|
135 |
+
days: Number of days to analyze
|
136 |
+
|
137 |
+
Returns:
|
138 |
+
DataFrame with availability analysis and site status comments
|
139 |
+
"""
|
140 |
+
result_df = df.copy().fillna(0)
|
141 |
+
last_days_df = result_df.iloc[:, -days:]
|
142 |
+
result_df["Average_cell_availability"] = last_days_df.mean(axis=1).round(2)
|
143 |
+
|
144 |
+
# Categorize sites based on availability
|
145 |
+
def categorize_availability(x: float) -> str:
|
146 |
+
if x == 0 or pd.isnull(x):
|
147 |
+
return "Down Site"
|
148 |
+
elif 0 < x <= 70:
|
149 |
+
return "critical instability"
|
150 |
+
elif 70 < x <= 95:
|
151 |
+
return "instability"
|
152 |
+
else:
|
153 |
+
return "Site Ok"
|
154 |
+
|
155 |
+
result_df["availability_comment"] = result_df["Average_cell_availability"].apply(
|
156 |
+
categorize_availability
|
157 |
+
)
|
158 |
+
|
159 |
+
return result_df
|
160 |
+
|
161 |
+
|
162 |
+
def max_used_ce_analysis(
|
163 |
+
df: pd.DataFrame,
|
164 |
+
days: int = 7,
|
165 |
+
threshold: int = 80,
|
166 |
+
number_of_threshold_days: int = 3,
|
167 |
+
) -> pd.DataFrame:
|
168 |
+
"""
|
169 |
+
Analyze maximum used channel elements and identify sites needing upgrades.
|
170 |
+
|
171 |
+
Args:
|
172 |
+
df: DataFrame containing channel element utilization data
|
173 |
+
days: Number of days to analyze
|
174 |
+
threshold: Utilization threshold percentage for flagging
|
175 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
176 |
+
|
177 |
+
Returns:
|
178 |
+
DataFrame with analysis results and upgrade recommendations
|
179 |
+
"""
|
180 |
+
result_df = df.copy().fillna(0)
|
181 |
+
last_days_df = result_df.iloc[:, -days:]
|
182 |
+
|
183 |
+
result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2)
|
184 |
+
|
185 |
+
# Count the number of days above threshold
|
186 |
+
result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply(
|
187 |
+
lambda row: sum(1 for x in row if x >= threshold), axis=1
|
188 |
+
)
|
189 |
+
|
190 |
+
# Initialize comment column
|
191 |
+
result_df["Average_used_ce_ratio_comment"] = ""
|
192 |
+
|
193 |
+
# Apply condition for upgrade recommendation
|
194 |
+
result_df.loc[
|
195 |
+
(result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
|
196 |
+
& (result_df["Average_used_ce_ratio"] >= threshold),
|
197 |
+
"Average_used_ce_ratio_comment",
|
198 |
+
] = "need CE upgrade"
|
199 |
+
|
200 |
+
return result_df
|
201 |
+
|
202 |
+
|
203 |
+
def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame:
|
204 |
+
"""
|
205 |
+
Analyze baseband subunit count for deviations.
|
206 |
+
|
207 |
+
Args:
|
208 |
+
df: DataFrame containing baseband subunit count data
|
209 |
+
days: Number of days to analyze
|
210 |
+
|
211 |
+
Returns:
|
212 |
+
DataFrame with deviation analysis comments
|
213 |
+
"""
|
214 |
+
result_df = df.copy()
|
215 |
+
last_days_df = result_df.iloc[:, -days:]
|
216 |
+
result_df["num_bb_subunits_comment"] = last_days_df.apply(
|
217 |
+
lambda row: check_deviation(row, type="bb"), axis=1
|
218 |
+
)
|
219 |
+
return result_df
|
220 |
+
|
221 |
+
|
222 |
+
def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
|
223 |
+
"""
|
224 |
+
Analyze available channel elements for deviations.
|
225 |
+
|
226 |
+
Args:
|
227 |
+
df: DataFrame containing available channel element data
|
228 |
+
days: Number of days to analyze
|
229 |
+
|
230 |
+
Returns:
|
231 |
+
DataFrame with deviation analysis comments
|
232 |
+
"""
|
233 |
+
result_df = df.copy()
|
234 |
+
last_days_df = result_df.iloc[:, -days:]
|
235 |
+
result_df["avail_ce_comment"] = last_days_df.apply(
|
236 |
+
lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1
|
237 |
+
)
|
238 |
+
return result_df
|
239 |
+
|
240 |
+
|
241 |
+
def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
|
242 |
+
"""
|
243 |
+
Combine comments from multiple columns into one column.
|
244 |
+
|
245 |
+
Args:
|
246 |
+
df: DataFrame containing comment columns
|
247 |
+
*columns: Variable number of column names containing comments
|
248 |
+
new_column: Name for the new combined comments column
|
249 |
+
|
250 |
+
Returns:
|
251 |
+
DataFrame with a new column containing combined comments
|
252 |
+
"""
|
253 |
+
result_df = df.copy()
|
254 |
+
result_df[new_column] = result_df[list(columns)].apply(
|
255 |
+
lambda row: ", ".join([x for x in row if x]), axis=1
|
256 |
+
)
|
257 |
+
# Trim all trailing commas
|
258 |
+
result_df[new_column] = result_df[new_column].str.replace(
|
259 |
+
r"^[,\s]+|[,\s]+$", "", regex=True
|
260 |
+
)
|
261 |
+
# Replace multiple commas with a single comma
|
262 |
+
result_df[new_column] = result_df[new_column].str.replace(
|
263 |
+
r",\s*,", ", ", regex=True
|
264 |
+
)
|
265 |
+
return result_df
|
266 |
+
|
267 |
+
|
268 |
+
def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
|
269 |
+
"""
|
270 |
+
Combine baseband related comments into a single column.
|
271 |
+
|
272 |
+
Args:
|
273 |
+
df: DataFrame containing baseband comment columns
|
274 |
+
|
275 |
+
Returns:
|
276 |
+
DataFrame with combined baseband comments
|
277 |
+
"""
|
278 |
+
return combine_comments(
|
279 |
+
df,
|
280 |
+
"num_bb_subunits_comment",
|
281 |
+
"Average_used_bb_ratio_comment",
|
282 |
+
"availability_comment",
|
283 |
+
new_column="bb_comments",
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
+
def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
|
288 |
+
"""
|
289 |
+
Combine channel element related comments into a single column.
|
290 |
+
|
291 |
+
Args:
|
292 |
+
df: DataFrame containing channel element comment columns
|
293 |
+
|
294 |
+
Returns:
|
295 |
+
DataFrame with combined channel element comments
|
296 |
+
"""
|
297 |
+
return combine_comments(
|
298 |
+
df,
|
299 |
+
"avail_ce_comment",
|
300 |
+
"Average_used_ce_ratio_comment",
|
301 |
+
"availability_comment",
|
302 |
+
new_column="ce_comments",
|
303 |
+
)
|
304 |
+
|
305 |
+
|
306 |
+
def create_dfs_per_kpi(
|
307 |
+
df: pd.DataFrame,
|
308 |
+
num_days: int = 7,
|
309 |
+
threshold: int = 80,
|
310 |
+
number_of_threshold_days: int = 3,
|
311 |
+
) -> pd.DataFrame:
|
312 |
+
"""
|
313 |
+
Create pivoted DataFrames for each KPI and perform analysis.
|
314 |
+
|
315 |
+
Args:
|
316 |
+
df: DataFrame containing KPI data
|
317 |
+
num_days: Number of days to analyze
|
318 |
+
threshold: Utilization threshold percentage for flagging
|
319 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
320 |
+
|
321 |
+
Returns:
|
322 |
+
DataFrame with combined analysis results
|
323 |
+
"""
|
324 |
+
kpi_columns = df.columns[5:]
|
325 |
+
pivoted_kpi_dfs = {}
|
326 |
+
|
327 |
+
# Loop through each KPI and create pivoted DataFrames
|
328 |
+
for kpi in kpi_columns:
|
329 |
+
temp_df = df[["date", "DN", kpi]].copy()
|
330 |
+
|
331 |
+
# Pivot the dataframe
|
332 |
+
pivot_df = temp_df.pivot(index="DN", columns="date", values=kpi)
|
333 |
+
pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
|
334 |
+
pivot_df.columns.names = ["KPI", "Date"]
|
335 |
+
|
336 |
+
# Store in dictionary with KPI name as key
|
337 |
+
pivoted_kpi_dfs[kpi] = pivot_df
|
338 |
+
|
339 |
+
# Extract individual KPI DataFrames
|
340 |
+
wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
|
341 |
+
licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"]
|
342 |
+
max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"]
|
343 |
+
max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"]
|
344 |
+
max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"]
|
345 |
+
max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"]
|
346 |
+
num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"]
|
347 |
+
max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"]
|
348 |
+
cell_availability_df = pivoted_kpi_dfs[
|
349 |
+
"Cell_Availability_excluding_blocked_by_user_state_BLU"
|
350 |
+
]
|
351 |
+
total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
|
352 |
+
total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"]
|
353 |
+
max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"]
|
354 |
+
|
355 |
+
# Perform analysis on each KPI DataFrame
|
356 |
+
max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis(
|
357 |
+
max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days
|
358 |
+
)
|
359 |
+
cell_availability_df = cell_availability_analysis(cell_availability_df, num_days)
|
360 |
+
max_used_ce_ratio_flexi_df = max_used_ce_analysis(
|
361 |
+
max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days
|
362 |
+
)
|
363 |
+
num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days)
|
364 |
+
licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days)
|
365 |
+
|
366 |
+
# Concatenate all DataFrames
|
367 |
+
result_df = pd.concat(
|
368 |
+
[
|
369 |
+
wbts_name_df,
|
370 |
+
licensed_ce_df,
|
371 |
+
max_used_ce_dl_df,
|
372 |
+
max_used_ce_ul_df,
|
373 |
+
max_avail_ce_df,
|
374 |
+
max_used_bb_subunits_df,
|
375 |
+
num_bb_subunits_df,
|
376 |
+
max_bb_sus_util_ratio_df,
|
377 |
+
cell_availability_df,
|
378 |
+
total_cs_traffic_df,
|
379 |
+
total_data_traffic_df,
|
380 |
+
max_used_ce_ratio_flexi_df,
|
381 |
+
],
|
382 |
+
axis=1,
|
383 |
+
)
|
384 |
+
|
385 |
+
# Add combined comments analysis
|
386 |
+
result_df = bb_comments_analysis(result_df)
|
387 |
+
result_df = ce_comments_analysis(result_df)
|
388 |
+
|
389 |
+
return result_df
|
390 |
+
|
391 |
+
|
392 |
+
def load_data(
|
393 |
+
filepath: str,
|
394 |
+
num_days: int,
|
395 |
+
threshold: int,
|
396 |
+
number_of_threshold_days: int,
|
397 |
+
) -> pd.DataFrame:
|
398 |
+
"""
|
399 |
+
Load data from CSV file and perform preprocessing and analysis.
|
400 |
+
|
401 |
+
Args:
|
402 |
+
filepath: Path to CSV file or uploaded file object
|
403 |
+
num_days: Number of days to analyze
|
404 |
+
threshold: Utilization threshold percentage for flagging
|
405 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
406 |
+
|
407 |
+
Returns:
|
408 |
+
DataFrame with processed and analyzed data
|
409 |
+
"""
|
410 |
+
df = pd.read_csv(filepath, delimiter=";")
|
411 |
+
|
412 |
+
# Preprocess data
|
413 |
+
df = create_daily_date(df)
|
414 |
+
df = kpi_naming_cleaning(df)
|
415 |
+
|
416 |
+
# Reorder columns for better organization
|
417 |
+
df = df[["date"] + [col for col in df.columns if col not in ["date"]]]
|
418 |
+
df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
|
419 |
+
|
420 |
+
# Perform KPI analysis
|
421 |
+
df = create_dfs_per_kpi(df, num_days, threshold, number_of_threshold_days)
|
422 |
+
return df
|
utils/convert_to_excel.py
CHANGED
@@ -15,7 +15,7 @@ def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
|
|
15 |
# Write the dataframes to the BytesIO object
|
16 |
with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
|
17 |
for df, sheet_name in zip(dfs, sheet_names):
|
18 |
-
df.to_excel(writer, sheet_name=sheet_name, index=
|
19 |
|
20 |
# Get the bytes data
|
21 |
bytes_data = bytes_io.getvalue()
|
|
|
15 |
# Write the dataframes to the BytesIO object
|
16 |
with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
|
17 |
for df, sheet_name in zip(dfs, sheet_names):
|
18 |
+
df.to_excel(writer, sheet_name=sheet_name, index=True)
|
19 |
|
20 |
# Get the bytes data
|
21 |
bytes_data = bytes_io.getvalue()
|