DavMelchi commited on
Commit
f9c0284
·
1 Parent(s): 0898fcc

Adding clustering App

Browse files
Files changed (3) hide show
  1. Changelog.md +1 -0
  2. app.py +4 -0
  3. apps/clustering.py +103 -0
Changelog.md CHANGED
@@ -5,6 +5,7 @@
5
 
6
  - Add paging analysis App
7
  - Add capacity analysis App
 
8
 
9
  ## [0.2.8] - 2025-04-22
10
 
 
5
 
6
  - Add paging analysis App
7
  - Add capacity analysis App
8
+ - Add automatic site clustering App
9
 
10
  ## [0.2.8] - 2025-04-22
11
 
app.py CHANGED
@@ -129,6 +129,10 @@ if check_password():
129
  "apps/sector_kml_generator.py",
130
  title="📡 Sector KML Generator",
131
  ),
 
 
 
 
132
  st.Page(
133
  "apps/import_physical_db.py", title="🌏Physical Database Verification"
134
  ),
 
129
  "apps/sector_kml_generator.py",
130
  title="📡 Sector KML Generator",
131
  ),
132
+ st.Page(
133
+ "apps/clustering.py",
134
+ title="📡 Automatic Site Clustering",
135
+ ),
136
  st.Page(
137
  "apps/import_physical_db.py", title="🌏Physical Database Verification"
138
  ),
apps/clustering.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import streamlit as st
7
+ from sklearn.cluster import KMeans
8
+
9
+
10
+ def cluster_sites(
11
+ df: pd.DataFrame,
12
+ lat_col: str,
13
+ lon_col: str,
14
+ region_col: str,
15
+ max_sites: int = 25,
16
+ mix_regions: bool = False,
17
+ ):
18
+ clusters = []
19
+ cluster_id = 0
20
+
21
+ if not mix_regions:
22
+ grouped = df.groupby(region_col)
23
+ else:
24
+ grouped = [("All", df)]
25
+
26
+ for region, group in grouped:
27
+ coords = group[[lat_col, lon_col]].to_numpy()
28
+ n_clusters = max(1, int(np.ceil(len(group) / max_sites)))
29
+
30
+ if len(group) < max_sites:
31
+ labels = np.zeros(len(group), dtype=int)
32
+ else:
33
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
34
+ labels = kmeans.fit_predict(coords)
35
+
36
+ group = group.copy()
37
+ group["Cluster"] = [f"C{cluster_id + l}" for l in labels]
38
+ clusters.append(group)
39
+ cluster_id += len(set(labels))
40
+
41
+ return pd.concat(clusters)
42
+
43
+
44
+ def to_excel(df: pd.DataFrame) -> bytes:
45
+ output = BytesIO()
46
+ with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
47
+ df.to_excel(writer, index=False, sheet_name="Clusters")
48
+ return output.getvalue()
49
+
50
+
51
+ st.title("Automatic Site Clustering App")
52
+
53
+ uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
54
+
55
+ if uploaded_file:
56
+ df = pd.read_excel(uploaded_file)
57
+ st.write("Sample of uploaded data:", df.head())
58
+
59
+ columns = df.columns.tolist()
60
+
61
+ with st.form("clustering_form"):
62
+ lat_col = st.selectbox("Select Latitude column", columns)
63
+ lon_col = st.selectbox("Select Longitude column", columns)
64
+ region_col = st.selectbox("Select Region column", columns)
65
+ code_col = st.selectbox("Select Site Code column", columns)
66
+ max_sites = st.number_input(
67
+ "Max sites per cluster", min_value=5, max_value=100, value=25
68
+ )
69
+ mix_regions = st.checkbox(
70
+ "Allow mixing different regions in clusters", value=False
71
+ )
72
+ submitted = st.form_submit_button("Run Clustering")
73
+
74
+ if submitted:
75
+ clustered_df = cluster_sites(
76
+ df, lat_col, lon_col, region_col, max_sites, mix_regions
77
+ )
78
+ st.success("Clustering completed!")
79
+ st.write(clustered_df.head())
80
+
81
+ # Plot
82
+ fig = px.scatter_map(
83
+ clustered_df,
84
+ lat=lat_col,
85
+ lon=lon_col,
86
+ color="Cluster",
87
+ hover_name=code_col,
88
+ hover_data=[region_col],
89
+ zoom=5,
90
+ height=600,
91
+ )
92
+ fig.update_layout(mapbox_style="open-street-map")
93
+ st.plotly_chart(fig)
94
+
95
+ # Download button
96
+ st.download_button(
97
+ label="Download clustered Excel file",
98
+ data=to_excel(clustered_df),
99
+ file_name="clustered_sites.xlsx",
100
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
101
+ on_click="ignore",
102
+ type="primary",
103
+ )