Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on Jul 16

Commit

b213530

1 Parent(s): a4525dd

improve clustering

Browse files

Files changed (1) hide show

apps/clustering.py +23 -21

apps/clustering.py CHANGED Viewed

@@ -170,7 +170,10 @@ if uploaded_file:
         )
         cluster_method = st.selectbox(
             "Select clustering method",
-            ["Hilbert Curve Same Size", "KMeans Lower To Fixed Size"],
         )
         mix_regions = st.checkbox(
             "Allow mixing different regions in clusters", value=False
@@ -178,33 +181,15 @@ if uploaded_file:
         submitted = st.form_submit_button("Run Clustering")
     if submitted:
-        if cluster_method == "Hilbert Curve Same Size":
             clustered_df = cluster_sites_hilbert_curve_same_size(
                 df, lat_col, lon_col, region_col, max_sites, mix_regions
             )
-        elif cluster_method == "KMeans Lower To Fixed Size":
             clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
                 df, lat_col, lon_col, region_col, max_sites, mix_regions
             )
         st.success("Clustering completed!")
-        st.write(clustered_df.head())
-        # Plot
-        clustered_df["size"] = 10
-        fig = px.scatter_map(
-            clustered_df,
-            lat=lat_col,
-            lon=lon_col,
-            color="Cluster",
-            size="size",
-            hover_name=code_col,
-            hover_data=[region_col],
-            zoom=5,
-            height=600,
-        )
-        fig.update_layout(mapbox_style="open-street-map")
-        fig.update_traces(marker=dict(size=15))
-        st.plotly_chart(fig)
         # Show cluster size per cluster plot
         cluster_size = clustered_df["Cluster"].value_counts().sort_index()
@@ -222,6 +207,23 @@ if uploaded_file:
         fig.update_layout(title="Cluster Size per Region")
         st.plotly_chart(fig)
         # Download button
         st.download_button(
             label="Download clustered Excel file",

         )
         cluster_method = st.selectbox(
             "Select clustering method",
+            [
+                "Hilbert Curve- Same number of sites for each cluster",
+                "KMeans - Number of sites Lower than max",
+            ],
         )
         mix_regions = st.checkbox(
             "Allow mixing different regions in clusters", value=False
         submitted = st.form_submit_button("Run Clustering")
     if submitted:
+        if cluster_method == "Hilbert Curve- Same number of sites for each cluster":
             clustered_df = cluster_sites_hilbert_curve_same_size(
                 df, lat_col, lon_col, region_col, max_sites, mix_regions
             )
+        elif cluster_method == "KMeans - Number of sites Lower than max":
             clustered_df = cluster_sites_kmeans_lower_to_fixed_size(
                 df, lat_col, lon_col, region_col, max_sites, mix_regions
             )
         st.success("Clustering completed!")
         # Show cluster size per cluster plot
         cluster_size = clustered_df["Cluster"].value_counts().sort_index()
         fig.update_layout(title="Cluster Size per Region")
         st.plotly_chart(fig)
+        # Map Plot
+        clustered_df["size"] = 10
+        fig = px.scatter_map(
+            clustered_df,
+            lat=lat_col,
+            lon=lon_col,
+            color="Cluster",
+            size="size",
+            hover_name=code_col,
+            hover_data=[region_col],
+            zoom=5,
+            height=600,
+        )
+        fig.update_layout(mapbox_style="open-street-map")
+        fig.update_traces(marker=dict(size=15))
+        st.plotly_chart(fig)
         # Download button
         st.download_button(
             label="Download clustered Excel file",