improve clustering
Browse files- apps/clustering.py +23 -21
apps/clustering.py
CHANGED
@@ -170,7 +170,10 @@ if uploaded_file:
|
|
170 |
)
|
171 |
cluster_method = st.selectbox(
|
172 |
"Select clustering method",
|
173 |
-
[
|
|
|
|
|
|
|
174 |
)
|
175 |
mix_regions = st.checkbox(
|
176 |
"Allow mixing different regions in clusters", value=False
|
@@ -178,33 +181,15 @@ if uploaded_file:
|
|
178 |
submitted = st.form_submit_button("Run Clustering")
|
179 |
|
180 |
if submitted:
|
181 |
-
if cluster_method == "Hilbert Curve Same
|
182 |
clustered_df = cluster_sites_hilbert_curve_same_size(
|
183 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
184 |
)
|
185 |
-
elif cluster_method == "KMeans Lower
|
186 |
clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
|
187 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
188 |
)
|
189 |
st.success("Clustering completed!")
|
190 |
-
st.write(clustered_df.head())
|
191 |
-
|
192 |
-
# Plot
|
193 |
-
clustered_df["size"] = 10
|
194 |
-
fig = px.scatter_map(
|
195 |
-
clustered_df,
|
196 |
-
lat=lat_col,
|
197 |
-
lon=lon_col,
|
198 |
-
color="Cluster",
|
199 |
-
size="size",
|
200 |
-
hover_name=code_col,
|
201 |
-
hover_data=[region_col],
|
202 |
-
zoom=5,
|
203 |
-
height=600,
|
204 |
-
)
|
205 |
-
fig.update_layout(mapbox_style="open-street-map")
|
206 |
-
fig.update_traces(marker=dict(size=15))
|
207 |
-
st.plotly_chart(fig)
|
208 |
|
209 |
# Show cluster size per cluster plot
|
210 |
cluster_size = clustered_df["Cluster"].value_counts().sort_index()
|
@@ -222,6 +207,23 @@ if uploaded_file:
|
|
222 |
fig.update_layout(title="Cluster Size per Region")
|
223 |
st.plotly_chart(fig)
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
# Download button
|
226 |
st.download_button(
|
227 |
label="Download clustered Excel file",
|
|
|
170 |
)
|
171 |
cluster_method = st.selectbox(
|
172 |
"Select clustering method",
|
173 |
+
[
|
174 |
+
"Hilbert Curve- Same number of sites for each cluster",
|
175 |
+
"KMeans - Number of sites Lower than max",
|
176 |
+
],
|
177 |
)
|
178 |
mix_regions = st.checkbox(
|
179 |
"Allow mixing different regions in clusters", value=False
|
|
|
181 |
submitted = st.form_submit_button("Run Clustering")
|
182 |
|
183 |
if submitted:
|
184 |
+
if cluster_method == "Hilbert Curve- Same number of sites for each cluster":
|
185 |
clustered_df = cluster_sites_hilbert_curve_same_size(
|
186 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
187 |
)
|
188 |
+
elif cluster_method == "KMeans - Number of sites Lower than max":
|
189 |
clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
|
190 |
df, lat_col, lon_col, region_col, max_sites, mix_regions
|
191 |
)
|
192 |
st.success("Clustering completed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
# Show cluster size per cluster plot
|
195 |
cluster_size = clustered_df["Cluster"].value_counts().sort_index()
|
|
|
207 |
fig.update_layout(title="Cluster Size per Region")
|
208 |
st.plotly_chart(fig)
|
209 |
|
210 |
+
# Map Plot
|
211 |
+
clustered_df["size"] = 10
|
212 |
+
fig = px.scatter_map(
|
213 |
+
clustered_df,
|
214 |
+
lat=lat_col,
|
215 |
+
lon=lon_col,
|
216 |
+
color="Cluster",
|
217 |
+
size="size",
|
218 |
+
hover_name=code_col,
|
219 |
+
hover_data=[region_col],
|
220 |
+
zoom=5,
|
221 |
+
height=600,
|
222 |
+
)
|
223 |
+
fig.update_layout(mapbox_style="open-street-map")
|
224 |
+
fig.update_traces(marker=dict(size=15))
|
225 |
+
st.plotly_chart(fig)
|
226 |
+
|
227 |
# Download button
|
228 |
st.download_button(
|
229 |
label="Download clustered Excel file",
|