improve clustering
Browse files- apps/clustering.py +23 -21
apps/clustering.py
CHANGED
|
@@ -170,7 +170,10 @@ if uploaded_file:
|
|
| 170 |
)
|
| 171 |
cluster_method = st.selectbox(
|
| 172 |
"Select clustering method",
|
| 173 |
-
[
|
|
|
|
|
|
|
|
|
|
| 174 |
)
|
| 175 |
mix_regions = st.checkbox(
|
| 176 |
"Allow mixing different regions in clusters", value=False
|
|
@@ -178,33 +181,15 @@ if uploaded_file:
|
|
| 178 |
submitted = st.form_submit_button("Run Clustering")
|
| 179 |
|
| 180 |
if submitted:
|
| 181 |
-
if cluster_method == "Hilbert Curve Same
|
| 182 |
clustered_df = cluster_sites_hilbert_curve_same_size(
|
| 183 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
| 184 |
)
|
| 185 |
-
elif cluster_method == "KMeans Lower
|
| 186 |
clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
|
| 187 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
| 188 |
)
|
| 189 |
st.success("Clustering completed!")
|
| 190 |
-
st.write(clustered_df.head())
|
| 191 |
-
|
| 192 |
-
# Plot
|
| 193 |
-
clustered_df["size"] = 10
|
| 194 |
-
fig = px.scatter_map(
|
| 195 |
-
clustered_df,
|
| 196 |
-
lat=lat_col,
|
| 197 |
-
lon=lon_col,
|
| 198 |
-
color="Cluster",
|
| 199 |
-
size="size",
|
| 200 |
-
hover_name=code_col,
|
| 201 |
-
hover_data=[region_col],
|
| 202 |
-
zoom=5,
|
| 203 |
-
height=600,
|
| 204 |
-
)
|
| 205 |
-
fig.update_layout(mapbox_style="open-street-map")
|
| 206 |
-
fig.update_traces(marker=dict(size=15))
|
| 207 |
-
st.plotly_chart(fig)
|
| 208 |
|
| 209 |
# Show cluster size per cluster plot
|
| 210 |
cluster_size = clustered_df["Cluster"].value_counts().sort_index()
|
|
@@ -222,6 +207,23 @@ if uploaded_file:
|
|
| 222 |
fig.update_layout(title="Cluster Size per Region")
|
| 223 |
st.plotly_chart(fig)
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
# Download button
|
| 226 |
st.download_button(
|
| 227 |
label="Download clustered Excel file",
|
|
|
|
| 170 |
)
|
| 171 |
cluster_method = st.selectbox(
|
| 172 |
"Select clustering method",
|
| 173 |
+
[
|
| 174 |
+
"Hilbert Curve- Same number of sites for each cluster",
|
| 175 |
+
"KMeans - Number of sites Lower than max",
|
| 176 |
+
],
|
| 177 |
)
|
| 178 |
mix_regions = st.checkbox(
|
| 179 |
"Allow mixing different regions in clusters", value=False
|
|
|
|
| 181 |
submitted = st.form_submit_button("Run Clustering")
|
| 182 |
|
| 183 |
if submitted:
|
| 184 |
+
if cluster_method == "Hilbert Curve- Same number of sites for each cluster":
|
| 185 |
clustered_df = cluster_sites_hilbert_curve_same_size(
|
| 186 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
| 187 |
)
|
| 188 |
+
elif cluster_method == "KMeans - Number of sites Lower than max":
|
| 189 |
clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
|
| 190 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
| 191 |
)
|
| 192 |
st.success("Clustering completed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
# Show cluster size per cluster plot
|
| 195 |
cluster_size = clustered_df["Cluster"].value_counts().sort_index()
|
|
|
|
| 207 |
fig.update_layout(title="Cluster Size per Region")
|
| 208 |
st.plotly_chart(fig)
|
| 209 |
|
| 210 |
+
# Map Plot
|
| 211 |
+
clustered_df["size"] = 10
|
| 212 |
+
fig = px.scatter_map(
|
| 213 |
+
clustered_df,
|
| 214 |
+
lat=lat_col,
|
| 215 |
+
lon=lon_col,
|
| 216 |
+
color="Cluster",
|
| 217 |
+
size="size",
|
| 218 |
+
hover_name=code_col,
|
| 219 |
+
hover_data=[region_col],
|
| 220 |
+
zoom=5,
|
| 221 |
+
height=600,
|
| 222 |
+
)
|
| 223 |
+
fig.update_layout(mapbox_style="open-street-map")
|
| 224 |
+
fig.update_traces(marker=dict(size=15))
|
| 225 |
+
st.plotly_chart(fig)
|
| 226 |
+
|
| 227 |
# Download button
|
| 228 |
st.download_button(
|
| 229 |
label="Download clustered Excel file",
|