DavMelchi commited on
Commit
9d2b604
·
1 Parent(s): 583ea65

Adding global trafic analysis

Browse files
Files changed (2) hide show
  1. app.py +4 -0
  2. apps/kpi_analysis/trafic_analysis.py +399 -0
app.py CHANGED
@@ -176,6 +176,10 @@ if check_password():
176
  "apps/kpi_analysis/anomalie.py",
177
  title=" 📊 KPIs Anomaly Detection",
178
  ),
 
 
 
 
179
  ],
180
  "Documentations": [
181
  st.Page(
 
176
  "apps/kpi_analysis/anomalie.py",
177
  title=" 📊 KPIs Anomaly Detection",
178
  ),
179
+ st.Page(
180
+ "apps/kpi_analysis/trafic_analysis.py",
181
+ title=" 📊 Trafic Analysis",
182
+ ),
183
  ],
184
  "Documentations": [
185
  st.Page(
apps/kpi_analysis/trafic_analysis.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import streamlit as st
6
+
7
+ from utils.convert_to_excel import convert_dfs, save_dataframe
8
+ from utils.utils_vars import get_physical_db
9
+
10
+
11
+ class TraficAnalysis:
12
+ last_period_df: pd.DataFrame = None
13
+
14
+
15
+ ############### PROCESSING ###############
16
+ def extract_code(name):
17
+ name = name.replace(" ", "_") if isinstance(name, str) else None
18
+ return int(name.split("_")[0]) if name and len(name) >= 10 else None
19
+
20
+
21
+ def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
22
+ df = df[df["BCF name"].str.len() >= 10].copy()
23
+ df["2g_data_trafic"] = df["TRAFFIC_PS DL"] + df["PS_UL_Load"]
24
+ df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
25
+ df["code"] = df["BCF name"].apply(extract_code)
26
+ df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
27
+ df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
28
+ df = df.groupby(["date", "ID", "code"], as_index=False)[
29
+ ["2g_data_trafic", "2g_voice_trafic"]
30
+ ].sum()
31
+ return df
32
+
33
+
34
+ def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
35
+ df = df[df["WBTS name"].str.len() >= 10].copy()
36
+ df["code"] = df["WBTS name"].apply(extract_code)
37
+ df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
38
+ df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
39
+ df.rename(
40
+ columns={
41
+ "Total CS traffic - Erl": "3g_voice_trafic",
42
+ "Total_Data_Traffic": "3g_data_trafic",
43
+ },
44
+ inplace=True,
45
+ )
46
+ df = df.groupby(["date", "ID", "code"], as_index=False)[
47
+ ["3g_voice_trafic", "3g_data_trafic"]
48
+ ].sum()
49
+ return df
50
+
51
+
52
+ def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
53
+ df = df[df["LNBTS name"].str.len() >= 10].copy()
54
+ df["lte_data_trafic"] = (
55
+ df["4G/LTE DL Traffic Volume (GBytes)"]
56
+ + df["4G/LTE UL Traffic Volume (GBytes)"]
57
+ )
58
+ df["code"] = df["LNBTS name"].apply(extract_code)
59
+ df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
60
+ df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
61
+ df = df.groupby(["date", "ID", "code"], as_index=False)[["lte_data_trafic"]].sum()
62
+ return df
63
+
64
+
65
+ ############################## ANALYSIS ################
66
+ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range):
67
+
68
+ # Load physical database
69
+ physical_db = get_physical_db()
70
+ physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
71
+ physical_db["code"] = (
72
+ pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
73
+ )
74
+ physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
75
+ physical_db = physical_db.drop_duplicates(subset="code")
76
+
77
+ df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer")
78
+ df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer")
79
+ # print(df)
80
+
81
+ for col in [
82
+ "2g_data_trafic",
83
+ "2g_voice_trafic",
84
+ "3g_voice_trafic",
85
+ "3g_data_trafic",
86
+ "lte_data_trafic",
87
+ ]:
88
+ if col not in df:
89
+ df[col] = 0
90
+
91
+ df.fillna(0, inplace=True)
92
+
93
+ df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
94
+ df["total_data_trafic"] = (
95
+ df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
96
+ )
97
+ df = pd.merge(df, physical_db, on=["code"], how="left")
98
+
99
+ # Assign period based on date range
100
+ pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1])
101
+ post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1])
102
+ last_period_start, last_period_end = pd.to_datetime(
103
+ last_period_range[0]
104
+ ), pd.to_datetime(last_period_range[1])
105
+
106
+ last_period = df[
107
+ (df["date"] >= last_period_start) & (df["date"] <= last_period_end)
108
+ ]
109
+
110
+ def assign_period(date):
111
+ if pre_start <= date <= pre_end:
112
+ return "pre"
113
+ elif post_start <= date <= post_end:
114
+ return "post"
115
+ else:
116
+ return "other"
117
+
118
+ df["period"] = df["date"].apply(assign_period)
119
+
120
+ comparison = df[df["period"].isin(["pre", "post"])]
121
+
122
+ pivot = (
123
+ comparison.groupby(["code", "period"])[
124
+ ["total_voice_trafic", "total_data_trafic"]
125
+ ]
126
+ .sum()
127
+ .unstack()
128
+ )
129
+ pivot.columns = [f"{metric}_{period}" for metric, period in pivot.columns]
130
+ pivot = pivot.reset_index()
131
+
132
+ # Differences
133
+ pivot["total_voice_trafic_diff"] = (
134
+ pivot["total_voice_trafic_post"] - pivot["total_voice_trafic_pre"]
135
+ )
136
+ pivot["total_data_trafic_diff"] = (
137
+ pivot["total_data_trafic_post"] - pivot["total_data_trafic_pre"]
138
+ )
139
+
140
+ for metric in ["total_voice_trafic", "total_data_trafic"]:
141
+ pivot[f"{metric}_diff_pct"] = (
142
+ (pivot.get(f"{metric}_post", 0) - pivot.get(f"{metric}_pre", 0))
143
+ / pivot.get(f"{metric}_pre", 1)
144
+ ) * 100
145
+ return df, last_period, pivot.round(2)
146
+
147
+
148
+ ############################## UI #########################
149
+ st.title("📊 Global Trafic Analysis - 2G / 3G / LTE")
150
+ doc_col, image_col = st.columns(2)
151
+
152
+ with doc_col:
153
+ st.write(
154
+ """
155
+ The report analyzes 2G / 3G / LTE traffic :
156
+ - 2G Traffic Report in CSV format (required columns : BCF name, PERIOD_START_TIME, TRAFFIC_PS DL, PS_UL_Load)
157
+ - 3G Traffic Report in CSV format (required columns : WBTS name, PERIOD_START_TIME, Total CS traffic - Erl, Total_Data_Traffic)
158
+ - LTE Traffic Report in CSV format (required columns : LNBTS name, PERIOD_START_TIME, 4G/LTE DL Traffic Volume (GBytes), 4G/LTE UL Traffic Volume (GBytes))
159
+ """
160
+ )
161
+
162
+ # with image_col:
163
+ # st.image("./assets/trafic_analysis.png", width=250)
164
+
165
+
166
+ upload_2g_col, upload_3g_col, upload_lte_col = st.columns(3)
167
+ with upload_2g_col:
168
+ two_g_file = st.file_uploader(
169
+ "Upload 2G Traffic Report", type=["csv", "xls", "xlsx"]
170
+ )
171
+ with upload_3g_col:
172
+ three_g_file = st.file_uploader(
173
+ "Upload 3G Traffic Report", type=["csv", "xls", "xlsx"]
174
+ )
175
+ with upload_lte_col:
176
+ lte_file = st.file_uploader(
177
+ "Upload LTE Traffic Report", type=["csv", "xls", "xlsx"]
178
+ )
179
+
180
+ pre_range_col, post_range_col = st.columns(2)
181
+ with pre_range_col:
182
+ pre_range = st.date_input("Pre-period (from - to)", [])
183
+ with post_range_col:
184
+ post_range = st.date_input("Post-period (from - to)", [])
185
+
186
+ last_period_range_col, number_of_top_trafic_sites_col = st.columns(2)
187
+ with last_period_range_col:
188
+ last_period_range = st.date_input("Last period (from - to)", [])
189
+ with number_of_top_trafic_sites_col:
190
+ number_of_top_trafic_sites = st.number_input(
191
+ "Number of top traffic sites", value=25
192
+ )
193
+
194
+ if len(pre_range) != 2 or len(post_range) != 2:
195
+ st.warning("⚠️ Please select 2 dates for each period (pre and post).")
196
+ st.stop()
197
+ if not all([two_g_file, three_g_file, lte_file]):
198
+ st.info("Please upload all 3 reports and select the comparison periods.")
199
+ st.stop()
200
+
201
+ if st.button("🔍 Run Analysis"):
202
+
203
+ df_2g = pd.read_csv(two_g_file, delimiter=";")
204
+ df_3g = pd.read_csv(three_g_file, delimiter=";")
205
+ df_lte = pd.read_csv(lte_file, delimiter=";")
206
+
207
+ df_2g_clean = preprocess_2g(df_2g)
208
+ df_3g_clean = preprocess_3g(df_3g)
209
+ df_lte_clean = preprocess_lte(df_lte)
210
+
211
+ full_df, last_period, summary_df = merge_and_compare(
212
+ df_2g_clean, df_3g_clean, df_lte_clean, pre_range, post_range, last_period_range
213
+ )
214
+
215
+ # 🔍 Display Summary
216
+ st.success("✅ Analysis completed")
217
+ st.subheader("📈 Summary Analysis Pre / Post")
218
+ st.dataframe(summary_df)
219
+ TraficAnalysis.last_period_df = last_period
220
+
221
+ #######################################################################################################""
222
+
223
+ #######################################################################################################
224
+ if TraficAnalysis.last_period_df is not None:
225
+
226
+ df = TraficAnalysis.last_period_df
227
+ # Get top trafics sites based on total data trafic during last period
228
+ top_sites = (
229
+ df.groupby(["code", "City"])["total_data_trafic"]
230
+ .sum()
231
+ .sort_values(ascending=False)
232
+ )
233
+ top_sites = top_sites.head(number_of_top_trafic_sites)
234
+
235
+ st.subheader(f"Top {number_of_top_trafic_sites} sites by data traffic")
236
+ chart_col, data_col = st.columns(2)
237
+ with data_col:
238
+ st.dataframe(top_sites.sort_values(ascending=True))
239
+ # chart
240
+ fig = px.bar(
241
+ top_sites.reset_index(),
242
+ y=top_sites.reset_index()[["City", "code"]].agg(
243
+ lambda x: "_".join(map(str, x)), axis=1
244
+ ),
245
+ x="total_data_trafic",
246
+ title=f"Top {number_of_top_trafic_sites} sites by data traffic",
247
+ orientation="h",
248
+ text="total_data_trafic",
249
+ text_auto=True,
250
+ )
251
+ # fig.update_layout(height=600)
252
+ with chart_col:
253
+ st.plotly_chart(fig)
254
+
255
+ # Top sites by voice trafic during last period
256
+ top_sites_voice = (
257
+ df.groupby(["code", "City"])["total_voice_trafic"]
258
+ .sum()
259
+ .sort_values(ascending=False)
260
+ )
261
+ top_sites_voice = top_sites_voice.head(number_of_top_trafic_sites)
262
+
263
+ st.subheader(f"Top {number_of_top_trafic_sites} sites by voice traffic")
264
+ chart_col, data_col = st.columns(2)
265
+ with data_col:
266
+ st.dataframe(top_sites_voice.sort_values(ascending=True))
267
+ # chart
268
+ fig = px.bar(
269
+ top_sites_voice.reset_index(),
270
+ y=top_sites_voice.reset_index()[["City", "code"]].agg(
271
+ lambda x: "_".join(map(str, x)), axis=1
272
+ ),
273
+ x="total_voice_trafic",
274
+ title=f"Top {number_of_top_trafic_sites} sites by voice traffic",
275
+ orientation="h",
276
+ text="total_voice_trafic",
277
+ text_auto=True,
278
+ )
279
+ # fig.update_layout(height=600)
280
+ with chart_col:
281
+ st.plotly_chart(fig)
282
+
283
+ #####################################################
284
+ min_size = 5
285
+ max_size = 40
286
+ # Map of sum of data trafic during last period
287
+ # Aggregate total data traffic
288
+ df_data = (
289
+ df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"]
290
+ .sum()
291
+ .reset_index()
292
+ )
293
+
294
+ st.subheader("Map of data trafic during last period")
295
+
296
+ # Define size range
297
+
298
+ # Linear size scaling
299
+ traffic_data_min = df_data["total_data_trafic"].min()
300
+ traffic_data_max = df_data["total_data_trafic"].max()
301
+ df_data["bubble_size"] = df_data["total_data_trafic"].apply(
302
+ lambda x: min_size
303
+ + (max_size - min_size)
304
+ * (x - traffic_data_min)
305
+ / (traffic_data_max - traffic_data_min)
306
+ )
307
+
308
+ # Custom blue color scale: start from visible blue
309
+ custom_blue_red = [
310
+ [0.0, "#4292c6"], # light blue
311
+ [0.2, "#2171b5"],
312
+ [0.4, "#084594"], # dark blue
313
+ [0.6, "#cb181d"], # Strong red
314
+ [0.8, "#a50f15"], # Darker red
315
+ [1.0, "#67000d"], # Very dark red
316
+ ]
317
+
318
+ fig = px.scatter_map(
319
+ df_data,
320
+ lat="Latitude",
321
+ lon="Longitude",
322
+ color="total_data_trafic",
323
+ size="bubble_size",
324
+ color_continuous_scale=custom_blue_red,
325
+ size_max=max_size,
326
+ zoom=10,
327
+ height=600,
328
+ title="Data traffic distribution",
329
+ hover_data={"code": True, "total_data_trafic": True},
330
+ hover_name="code",
331
+ text=[str(x) for x in df_data["code"]],
332
+ )
333
+
334
+ fig.update_layout(
335
+ mapbox_style="open-street-map",
336
+ coloraxis_colorbar=dict(title="Total Data Traffic (MB)"),
337
+ coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max),
338
+ font=dict(size=10, color="black"),
339
+ )
340
+
341
+ st.plotly_chart(fig)
342
+
343
+ ########################################################################################
344
+ # Map of sum of voice trafic during last period
345
+ # Aggregate total voice traffic
346
+ df_voice = (
347
+ df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"]
348
+ .sum()
349
+ .reset_index()
350
+ )
351
+ st.subheader("Map of voice trafic during last period")
352
+
353
+ # Linear size scaling
354
+ traffic_voice_min = df_voice["total_voice_trafic"].min()
355
+ traffic_voice_max = df_voice["total_voice_trafic"].max()
356
+ df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply(
357
+ lambda x: min_size
358
+ + (max_size - min_size)
359
+ * (x - traffic_voice_min)
360
+ / (traffic_voice_max - traffic_voice_min)
361
+ )
362
+
363
+ fig = px.scatter_map(
364
+ df_voice,
365
+ lat="Latitude",
366
+ lon="Longitude",
367
+ color="total_voice_trafic",
368
+ size="bubble_size",
369
+ color_continuous_scale=custom_blue_red,
370
+ size_max=max_size,
371
+ zoom=10,
372
+ height=600,
373
+ title="Voice traffic distribution",
374
+ hover_data={"code": True, "total_voice_trafic": True},
375
+ hover_name="code",
376
+ text=[str(x) for x in df_voice["code"]],
377
+ )
378
+
379
+ fig.update_layout(
380
+ mapbox_style="open-street-map",
381
+ coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"),
382
+ coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max),
383
+ font=dict(size=10, color="black"),
384
+ )
385
+
386
+ st.plotly_chart(fig)
387
+
388
+ final_dfs = convert_dfs(
389
+ [full_df, summary_df], ["Global_Trafic_Analysis", "Pre_Post_analysis"]
390
+ )
391
+ # 📥 Bouton de téléchargement
392
+ st.download_button(
393
+ on_click="ignore",
394
+ type="primary",
395
+ label="Download the Analysis Report",
396
+ data=final_dfs,
397
+ file_name=f"Global_Trafic_Analysis_Report_{datetime.now()}.xlsx",
398
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
399
+ )