DavMelchi commited on
Commit
a64569a
·
1 Parent(s): faf6fca

improve functional coding

Browse files
process_kpi/process_wbts_capacity.py CHANGED
@@ -1,5 +1,11 @@
1
  import pandas as pd
2
 
 
 
 
 
 
 
3
 
4
  class WbtsCapacity:
5
  final_results: pd.DataFrame = None
@@ -31,45 +37,6 @@ def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> st
31
  return ""
32
 
33
 
34
- def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
35
- """
36
- Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
37
-
38
- Args:
39
- df: DataFrame containing PERIOD_START_TIME column
40
-
41
- Returns:
42
- DataFrame with new date column and unnecessary columns removed
43
- """
44
- date_df = df.copy()
45
- date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
46
- ".", expand=True
47
- )
48
- date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
49
- # Remove unnecessary columns
50
- date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
51
- return date_df
52
-
53
-
54
- def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
55
- """
56
- Clean KPI column names by replacing special characters and standardizing format.
57
-
58
- Args:
59
- df: DataFrame with KPI column names to clean
60
-
61
- Returns:
62
- DataFrame with cleaned column names
63
- """
64
- name_df = df.copy()
65
- name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
66
- name_df.columns = name_df.columns.str.replace("___", "_")
67
- name_df.columns = name_df.columns.str.replace("__", "_")
68
- name_df.columns = name_df.columns.str.replace("%", "perc")
69
- name_df.columns = name_df.columns.str.rstrip("_")
70
- return name_df
71
-
72
-
73
  def max_used_bb_subunits_analysis(
74
  df: pd.DataFrame,
75
  days: int = 7,
@@ -288,7 +255,7 @@ def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
288
  )
289
 
290
 
291
- def create_dfs_per_kpi(
292
  df: pd.DataFrame,
293
  num_days: int = 7,
294
  threshold: int = 80,
@@ -306,20 +273,12 @@ def create_dfs_per_kpi(
306
  Returns:
307
  DataFrame with combined analysis results
308
  """
309
- kpi_columns = df.columns[5:]
310
  pivoted_kpi_dfs = {}
311
 
312
- # Loop through each KPI and create pivoted DataFrames
313
- for kpi in kpi_columns:
314
- temp_df = df[["date", "DN", kpi]].copy()
315
-
316
- # Pivot the dataframe
317
- pivot_df = temp_df.pivot(index="DN", columns="date", values=kpi)
318
- pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
319
- pivot_df.columns.names = ["KPI", "Date"]
320
-
321
- # Store in dictionary with KPI name as key
322
- pivoted_kpi_dfs[kpi] = pivot_df
323
 
324
  # Extract individual KPI DataFrames
325
  wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
@@ -403,5 +362,9 @@ def load_data(
403
  df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
404
 
405
  # Perform KPI analysis
406
- df = create_dfs_per_kpi(df, num_days, threshold, number_of_threshold_days)
 
 
 
 
407
  return df
 
1
  import pandas as pd
2
 
3
+ from utils.kpi_analysis_utils import (
4
+ create_daily_date,
5
+ create_dfs_per_kpi,
6
+ kpi_naming_cleaning,
7
+ )
8
+
9
 
10
  class WbtsCapacity:
11
  final_results: pd.DataFrame = None
 
37
  return ""
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def max_used_bb_subunits_analysis(
41
  df: pd.DataFrame,
42
  days: int = 7,
 
255
  )
256
 
257
 
258
+ def wbts_kpi_analysis(
259
  df: pd.DataFrame,
260
  num_days: int = 7,
261
  threshold: int = 80,
 
273
  Returns:
274
  DataFrame with combined analysis results
275
  """
276
+ # kpi_columns = df.columns[5:]
277
  pivoted_kpi_dfs = {}
278
 
279
+ pivoted_kpi_dfs = create_dfs_per_kpi(
280
+ df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5
281
+ )
 
 
 
 
 
 
 
 
282
 
283
  # Extract individual KPI DataFrames
284
  wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
 
362
  df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
363
 
364
  # Perform KPI analysis
365
+ df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days)
366
+
367
+ # for col, col_index in zip(df.columns, df.columns.get_indexer(df.columns)):
368
+ # print(f"Column: {col}, Index: {col_index}")
369
+
370
  return df
utils/kpi_analysis_utils.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ class GsmAnalysis:
5
+ hf_rate_coef = {
6
+ 10: 1.1,
7
+ 20: 1.2,
8
+ 40: 1.4,
9
+ 60: 1.6,
10
+ 70: 1.7,
11
+ 80: 1.8,
12
+ 99: 2.0,
13
+ 100: 1.4,
14
+ }
15
+ erlangB_table = {
16
+ 1: 0.0204,
17
+ 2: 0.2234,
18
+ 3: 0.6022,
19
+ 4: 1.092,
20
+ 5: 1.657,
21
+ 6: 2.276,
22
+ 7: 2.935,
23
+ 8: 3.627,
24
+ 9: 4.345,
25
+ 10: 5.084,
26
+ 11: 5.841,
27
+ 12: 6.614,
28
+ 13: 7.401,
29
+ 14: 8.2,
30
+ 15: 9.009,
31
+ 16: 9.828,
32
+ 17: 10.66,
33
+ 18: 11.49,
34
+ 19: 12.33,
35
+ 20: 13.18,
36
+ 21: 14.04,
37
+ 22: 14.9,
38
+ 23: 15.76,
39
+ 24: 16.63,
40
+ 25: 17.5,
41
+ 26: 18.38,
42
+ 27: 19.26,
43
+ 28: 20.15,
44
+ 29: 21.04,
45
+ 30: 21.93,
46
+ 31: 22.83,
47
+ 32: 23.72,
48
+ 33: 24.63,
49
+ 34: 25.53,
50
+ 35: 26.43,
51
+ 36: 27.34,
52
+ 37: 28.25,
53
+ 38: 29.17,
54
+ 39: 30.08,
55
+ 40: 31,
56
+ 41: 31.91,
57
+ 42: 32.84,
58
+ 43: 33.76,
59
+ 44: 34.68,
60
+ 45: 35.61,
61
+ 46: 36.53,
62
+ 47: 37.46,
63
+ 48: 38.39,
64
+ 49: 39.32,
65
+ 50: 40.25,
66
+ 51: 41.19,
67
+ 52: 42.12,
68
+ 53: 43.06,
69
+ 54: 44,
70
+ 55: 44.93,
71
+ 56: 45.88,
72
+ 57: 46.81,
73
+ 58: 47.75,
74
+ 59: 48.7,
75
+ 60: 49.64,
76
+ 61: 50.59,
77
+ 62: 51.53,
78
+ 63: 52.48,
79
+ 64: 53.43,
80
+ 65: 54.38,
81
+ 66: 55.32,
82
+ 67: 56.27,
83
+ 68: 57.22,
84
+ 69: 58.18,
85
+ 70: 59.13,
86
+ 71: 60.08,
87
+ 72: 61.04,
88
+ 73: 61.99,
89
+ 74: 62.94,
90
+ 75: 63.9,
91
+ 76: 64.86,
92
+ 77: 65.81,
93
+ 78: 66.77,
94
+ 79: 67.73,
95
+ 80: 68.69,
96
+ 81: 69.64,
97
+ 82: 70.61,
98
+ 83: 71.57,
99
+ 84: 72.53,
100
+ 85: 73.49,
101
+ 86: 74.45,
102
+ 87: 75.41,
103
+ 88: 76.38,
104
+ 89: 77.34,
105
+ 90: 78.3,
106
+ 91: 79.27,
107
+ 92: 80.23,
108
+ 93: 81.2,
109
+ 94: 82.16,
110
+ 95: 83.13,
111
+ 96: 84.09,
112
+ 97: 85.06,
113
+ 98: 86.03,
114
+ 99: 87,
115
+ 100: 87.97,
116
+ 101: 88.94,
117
+ 102: 89.91,
118
+ 103: 90.88,
119
+ 104: 91.85,
120
+ 105: 92.82,
121
+ 106: 93.79,
122
+ 107: 94.76,
123
+ 108: 95.73,
124
+ 109: 96.71,
125
+ 110: 97.68,
126
+ 111: 98.65,
127
+ 112: 99.63,
128
+ 113: 100.6,
129
+ 114: 101.57,
130
+ 115: 102.54,
131
+ 116: 103.52,
132
+ 117: 104.49,
133
+ 118: 105.47,
134
+ 119: 106.44,
135
+ 120: 107.42,
136
+ 121: 108.4,
137
+ 122: 109.37,
138
+ 123: 110.35,
139
+ 124: 111.32,
140
+ 125: 112.3,
141
+ 126: 113.28,
142
+ 127: 114.25,
143
+ 128: 115.23,
144
+ 129: 116.21,
145
+ 130: 117.19,
146
+ 131: 118.17,
147
+ 132: 119.15,
148
+ 133: 120.12,
149
+ 134: 121.1,
150
+ 135: 122.08,
151
+ 136: 123.07,
152
+ 137: 124.04,
153
+ 138: 125.02,
154
+ 139: 126.01341,
155
+ 140: 127.00918,
156
+ 141: 127.96752,
157
+ 142: 128.98152,
158
+ 143: 129.92152,
159
+ 144: 130.88534,
160
+ 145: 131.96461,
161
+ 146: 132.89897,
162
+ 147: 133.86373,
163
+ 148: 134.82569,
164
+ 149: 135.76295,
165
+ 150: 136.82988,
166
+ 151: 137.79,
167
+ 152: 138.77,
168
+ 153: 139.75,
169
+ 154: 140.74,
170
+ 155: 141.72,
171
+ 156: 142.7,
172
+ 157: 143.69,
173
+ 158: 144.67,
174
+ 159: 145.66,
175
+ 160: 146.64,
176
+ 161: 147.63,
177
+ 162: 148.61,
178
+ 163: 149.6,
179
+ 164: 150.58,
180
+ 165: 151.57,
181
+ 166: 152.55,
182
+ 167: 153.54,
183
+ 168: 154.53,
184
+ 169: 155.51,
185
+ 170: 156.5,
186
+ 171: 157.48,
187
+ 172: 158.47,
188
+ 173: 159.46,
189
+ 174: 160.44,
190
+ 175: 161.43,
191
+ 176: 162.42,
192
+ 177: 163.41,
193
+ 178: 164.39,
194
+ 179: 165.38,
195
+ 180: 166.37,
196
+ 181: 167.36,
197
+ 182: 168.35,
198
+ 183: 169.33,
199
+ 184: 170.32,
200
+ 185: 171.31,
201
+ 186: 172.3,
202
+ 187: 173.29,
203
+ 188: 174.28,
204
+ 189: 175.27,
205
+ 190: 176.26,
206
+ 191: 177.25,
207
+ 192: 178.24,
208
+ 193: 179.23,
209
+ 194: 180.22,
210
+ 195: 181.21,
211
+ 196: 182.2,
212
+ 197: 183.19,
213
+ 198: 184.18,
214
+ 199: 185.17,
215
+ 200: 186.16,
216
+ }
217
+
218
+
219
+ def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
220
+ """
221
+ Clean KPI column names by replacing special characters and standardizing format.
222
+
223
+ Args:
224
+ df: DataFrame with KPI column names to clean
225
+
226
+ Returns:
227
+ DataFrame with cleaned column names
228
+ """
229
+ name_df = df.copy()
230
+ name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
231
+ name_df.columns = name_df.columns.str.replace("___", "_")
232
+ name_df.columns = name_df.columns.str.replace("__", "_")
233
+ name_df.columns = name_df.columns.str.replace("%", "perc")
234
+ name_df.columns = name_df.columns.str.rstrip("_")
235
+ return name_df
236
+
237
+
238
+ def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
239
+ """
240
+ Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
241
+
242
+ Args:
243
+ df: DataFrame containing PERIOD_START_TIME column
244
+
245
+ Returns:
246
+ DataFrame with new date column and unnecessary columns removed
247
+ """
248
+ date_df = df.copy()
249
+ date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
250
+ ".", expand=True
251
+ )
252
+ date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
253
+ # Remove unnecessary columns
254
+ date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
255
+ return date_df
256
+
257
+
258
+ def create_hourly_date(df: pd.DataFrame):
259
+ date_df = df
260
+ date_df[["date_t", "hour"]] = date_df["PERIOD_START_TIME"].str.split(
261
+ " ", expand=True
262
+ )
263
+ date_df[["mois", "jour", "annee"]] = date_df["date_t"].str.split(".", expand=True)
264
+ date_df["datetime"] = (
265
+ date_df["annee"]
266
+ + "-"
267
+ + date_df["mois"]
268
+ + "-"
269
+ + date_df["jour"]
270
+ + " "
271
+ + date_df["hour"]
272
+ )
273
+
274
+ date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
275
+
276
+ # Remove columns 'année' and 'mois'
277
+ date_df = date_df.drop(
278
+ ["annee", "mois", "jour", "date_t", "PERIOD_START_TIME"], axis=1
279
+ )
280
+ return date_df
281
+
282
+
283
+ def create_dfs_per_kpi(
284
+ df: pd.DataFrame = None,
285
+ pivot_date_column: str = "date",
286
+ pivot_name_column: str = "BTS_name",
287
+ kpi_columns_from: int = None,
288
+ ) -> pd.DataFrame:
289
+ """
290
+ Create pivoted DataFrames for each KPI and perform analysis.
291
+
292
+ Args:
293
+ df: DataFrame containing KPI data
294
+ Returns:
295
+ DataFrame with combined analysis results
296
+ """
297
+ kpi_columns = df.columns[kpi_columns_from:]
298
+ # print(kpi_columns)
299
+ pivoted_kpi_dfs = {}
300
+
301
+ # Loop through each KPI and create pivoted DataFrames
302
+ for kpi in kpi_columns:
303
+ temp_df = df[[pivot_date_column, pivot_name_column, kpi]].copy()
304
+ # remove duplicates
305
+ temp_df = temp_df.drop_duplicates(
306
+ subset=[pivot_name_column, pivot_date_column], keep="first"
307
+ )
308
+ temp_df = temp_df.reset_index()
309
+ # Pivot the dataframe
310
+ pivot_df = temp_df.pivot(
311
+ index=pivot_name_column, columns=pivot_date_column, values=kpi
312
+ )
313
+ # print(pivot_df)
314
+ pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
315
+ pivot_df.columns.names = ["KPI", "Date"]
316
+
317
+ # Store in dictionary with KPI name as key
318
+ pivoted_kpi_dfs[kpi] = pivot_df
319
+
320
+ return pivoted_kpi_dfs