DavMelchi commited on
Commit
b213530
·
1 Parent(s): a4525dd

improve clustering

Browse files
Files changed (1) hide show
  1. apps/clustering.py +23 -21
apps/clustering.py CHANGED
@@ -170,7 +170,10 @@ if uploaded_file:
170
  )
171
  cluster_method = st.selectbox(
172
  "Select clustering method",
173
- ["Hilbert Curve Same Size", "KMeans Lower To Fixed Size"],
 
 
 
174
  )
175
  mix_regions = st.checkbox(
176
  "Allow mixing different regions in clusters", value=False
@@ -178,33 +181,15 @@ if uploaded_file:
178
  submitted = st.form_submit_button("Run Clustering")
179
 
180
  if submitted:
181
- if cluster_method == "Hilbert Curve Same Size":
182
  clustered_df = cluster_sites_hilbert_curve_same_size(
183
  df, lat_col, lon_col, region_col, max_sites, mix_regions
184
  )
185
- elif cluster_method == "KMeans Lower To Fixed Size":
186
  clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
187
  df, lat_col, lon_col, region_col, max_sites, mix_regions
188
  )
189
  st.success("Clustering completed!")
190
- st.write(clustered_df.head())
191
-
192
- # Plot
193
- clustered_df["size"] = 10
194
- fig = px.scatter_map(
195
- clustered_df,
196
- lat=lat_col,
197
- lon=lon_col,
198
- color="Cluster",
199
- size="size",
200
- hover_name=code_col,
201
- hover_data=[region_col],
202
- zoom=5,
203
- height=600,
204
- )
205
- fig.update_layout(mapbox_style="open-street-map")
206
- fig.update_traces(marker=dict(size=15))
207
- st.plotly_chart(fig)
208
 
209
  # Show cluster size per cluster plot
210
  cluster_size = clustered_df["Cluster"].value_counts().sort_index()
@@ -222,6 +207,23 @@ if uploaded_file:
222
  fig.update_layout(title="Cluster Size per Region")
223
  st.plotly_chart(fig)
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # Download button
226
  st.download_button(
227
  label="Download clustered Excel file",
 
170
  )
171
  cluster_method = st.selectbox(
172
  "Select clustering method",
173
+ [
174
+ "Hilbert Curve- Same number of sites for each cluster",
175
+ "KMeans - Number of sites Lower than max",
176
+ ],
177
  )
178
  mix_regions = st.checkbox(
179
  "Allow mixing different regions in clusters", value=False
 
181
  submitted = st.form_submit_button("Run Clustering")
182
 
183
  if submitted:
184
+ if cluster_method == "Hilbert Curve- Same number of sites for each cluster":
185
  clustered_df = cluster_sites_hilbert_curve_same_size(
186
  df, lat_col, lon_col, region_col, max_sites, mix_regions
187
  )
188
+ elif cluster_method == "KMeans - Number of sites Lower than max":
189
  clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
190
  df, lat_col, lon_col, region_col, max_sites, mix_regions
191
  )
192
  st.success("Clustering completed!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  # Show cluster size per cluster plot
195
  cluster_size = clustered_df["Cluster"].value_counts().sort_index()
 
207
  fig.update_layout(title="Cluster Size per Region")
208
  st.plotly_chart(fig)
209
 
210
+ # Map Plot
211
+ clustered_df["size"] = 10
212
+ fig = px.scatter_map(
213
+ clustered_df,
214
+ lat=lat_col,
215
+ lon=lon_col,
216
+ color="Cluster",
217
+ size="size",
218
+ hover_name=code_col,
219
+ hover_data=[region_col],
220
+ zoom=5,
221
+ height=600,
222
+ )
223
+ fig.update_layout(mapbox_style="open-street-map")
224
+ fig.update_traces(marker=dict(size=15))
225
+ st.plotly_chart(fig)
226
+
227
  # Download button
228
  st.download_button(
229
  label="Download clustered Excel file",