File size: 13,910 Bytes
10d0bac
 
 
be85e86
2619083
15b2d37
be85e86
 
15b2d37
2619083
15b2d37
be85e86
15b2d37
 
be85e86
513894a
 
 
 
15b2d37
be85e86
15b2d37
 
 
af5ac46
15b2d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af5ac46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15b2d37
 
 
 
 
 
 
 
 
 
 
 
 
 
af5ac46
be85e86
 
6c809c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be85e86
6c809c9
 
 
be85e86
a2f75f4
 
6c809c9
a2f75f4
 
 
6c809c9
a2f75f4
 
 
 
 
 
6c809c9
a2f75f4
 
 
 
 
6c809c9
a2f75f4
 
 
 
 
 
 
 
 
 
be85e86
af5ac46
be85e86
 
 
 
 
 
 
 
 
 
 
15b2d37
 
 
 
 
 
 
be85e86
 
 
 
 
 
 
 
 
af5ac46
be85e86
 
 
 
 
af5ac46
be85e86
 
 
 
 
 
 
 
15b2d37
be85e86
 
 
 
af5ac46
be85e86
 
 
 
 
 
 
15b2d37
be85e86
 
 
 
513894a
6c809c9
 
 
 
 
af5ac46
6c809c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513894a
6c809c9
 
513894a
 
 
 
 
 
 
 
6c809c9
 
513894a
6c809c9
 
 
 
 
 
 
 
 
 
 
be85e86
 
6c809c9
be85e86
6c809c9
 
be85e86
513894a
 
 
6c809c9
 
 
 
 
 
 
 
 
 
 
be85e86
6c809c9
 
be85e86
6c809c9
be85e86
6c809c9
 
 
513894a
 
 
 
 
 
6c809c9
 
 
 
15b2d37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import streamlit as st
from streamlit_folium import st_folium
import folium
from folium.plugins import Draw
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point
import numpy as np
import re # For parsing STATEDAREA

st.set_page_config(layout="wide", page_title="Multiplex Coop Housing Filter")

st.title("🗺️ Multiplex Coop Housing Filter (Hugging Face Data)")
st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset from Hugging Face. Draw a polygon on the map to spatially filter properties. Use the form below to apply additional filters based on property attributes. **Note: FSI, Building Coverage, Height, and Stories are synthetic for demonstration as they are not directly available in the dataset.**")

# --- Configuration Constants ---
MAX_ROWS_DATAFRAME_DISPLAY = 1000  # Max rows to show in st.dataframe
MAX_MAP_FEATURES_DISPLAY = 5000    # Max features to plot on the Folium map to prevent MessageSizeError

# --- 1. Load Data from Hugging Face and Process ---
@st.cache_data
def load_and_process_data():
    """
    Loads the geospatial data from Hugging Face, processes relevant columns,
    generates synthetic data for missing attributes, and re-projects for centroid calculation.
    """
    try:
        gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
    except Exception as e:
        st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
        st.stop()

    # Process STATEDAREA to numeric (Lot Area in Sq Metres)
    def parse_stated_area(area_str):
        if pd.isna(area_str):
            return np.nan
        match = re.search(r'(\d+\.?\d*)\s*sq\.m', str(area_str))
        if match:
            return float(match.group(1))
        return np.nan

    gdf['zn_area'] = gdf['STATEDAREA'].apply(parse_stated_area)

    # Map FEATURE_TYPE to zn_type (Zoning Type)
    gdf['zn_type'] = gdf['FEATURE_TYPE']

    # Generate synthetic data for attributes not present in the Hugging Face dataset
    num_rows = len(gdf)
    gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
    gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
    gdf['height_metres'] = np.round(np.random.uniform(5, 30, num_rows), 1)
    gdf['stories'] = np.random.randint(2, 10, num_rows)

    # Add unique ID and a display name
    gdf['id'] = range(1, num_rows + 1)
    gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")

    # Ensure geometries are valid for centroid calculation and plotting
    gdf['geometry'] = gdf['geometry'].buffer(0)

    # --- IMPORTANT: Re-project for accurate centroid calculation ---
    # Convert to a projected CRS (e.g., Web Mercator EPSG:3857) for accurate centroid calculation
    gdf_projected = gdf.to_crs(epsg=3857)

    # Calculate centroids on the projected CRS
    gdf['centroid_x_proj'] = gdf_projected.geometry.centroid.x
    gdf['centroid_y_proj'] = gdf_projected.geometry.centroid.y

    # Convert centroids back to geographic CRS (EPSG:4326) for Folium plotting
    centroids_gdf = gpd.GeoDataFrame(
        gdf.index,
        geometry=gpd.points_from_xy(gdf['centroid_x_proj'], gdf['centroid_y_proj'], crs="EPSG:3857")
    ).to_crs(epsg=4326)

    gdf['latitude'] = centroids_gdf.geometry.y
    gdf['longitude'] = centroids_gdf.geometry.x

    # Select and reorder relevant columns for display and filtering
    df_processed = gdf[[
        'id', 'name', 'latitude', 'longitude', 'geometry',
        'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories',
        'PARCELID', # Original Parcel ID for reference
        'ADDRESS_NUMBER', 'LINEAR_NAME_FULL' # For detailed address in tooltips
    ]].copy()

    return df_processed

df = load_and_process_data()

# Initialize filtered_df with the full dataframe for initial state
# This will be updated based on spatial and attribute filters
filtered_df = df.copy()

# --- 2. Map for Drawing (now in an expander) ---
with st.expander("Draw a Polygon on the Map", expanded=False):
    # Center the map around the mean of the actual data's centroids
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)

    # Add drawing tools to the map
    draw = Draw(
        export=True,
        filename="drawn_polygon.geojson",
        position="topleft",
        draw_options={
            "polyline": False, "rectangle": False, "circlemarker": False,
            "circle": False, "marker": False,
            "polygon": {
                "allowIntersection": False,
                "drawError": {"color": "#e0115f", "message": "Oups!"},
                "shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
            },
        },
        edit_options={"edit": False, "remove": True},
    )
    m.add_child(draw)

    st.info("Draw a polygon on the map to spatially filter properties. The filtered results will appear below.")
    output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])

    polygon_drawn = False
    shapely_polygon = None
    polygon_coords = None

    if output and output["all_draw_features"]:
        polygons = [
            feature["geometry"]["coordinates"]
            for feature in output["all_draw_features"]
            if feature["geometry"]["type"] == "Polygon"
        ]

        if polygons:
            polygon_coords = polygons[-1][0] # Get the coordinates of the last drawn polygon
            # Shapely Polygon expects (lon, lat) tuples, Folium provides (lat, lon)
            shapely_polygon = Polygon([(lon, lat) for lat, lon in polygon_coords])
            polygon_drawn = True

            # Apply spatial filter to the full dataframe based on centroid containment
            filtered_df = df[
                df.apply(
                    lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
                    axis=1
                )
            ].copy()
            st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
        else:
            st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")
    else:
        st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")

# --- 3. Attribute Filtering Form ---
st.subheader("Filter Property Attributes")

with st.form("attribute_filters"):
    col1, col2 = st.columns(2)

    with col1:
        all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
        selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")

        min_zn_area = st.number_input(
            "Minimum Lot Area in Sq Metres",
            min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
            value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
            step=100.0,
            key="zn_area_input"
        )

        min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")

    with col2:
        max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")

        height_stories_option = st.radio(
            "Filter by",
            ("Height", "Stories"),
            index=0,
            key="height_stories_radio"
        )

        if height_stories_option == "Height":
            min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
        else:
            min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")

    submitted = st.form_submit_button("Apply Attribute Filters")

    if submitted:
        if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
            filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]

        filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]

        if min_fsi_total > 0:
            filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]

        if max_prcnt_cver < 100:
            filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]

        if height_stories_option == "Height" and min_height_value > 0:
            filtered_df = filtered_df[filtered_df['height_metres'] >= min_height_value]
        elif height_stories_option == "Stories" and min_stories_value > 0:
            filtered_df = filtered_df[filtered_df['stories'] >= min_stories_value]

        st.success(f"Applied attribute filters. Total properties after all filters: {len(filtered_df)}")
    else:
        st.info("Adjust filters and click 'Apply Attribute Filters'.")


# --- 4. Display Filtered Data on a New Map and as a Table ---
with st.expander("Filtered Properties Display", expanded=True):
    if not filtered_df.empty:
        # Calculate bounds for filtered data to set appropriate zoom
        min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
        min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()

        if min_lat == max_lat and min_lon == max_lon: # Single point case
            filtered_map_center = [min_lat, min_lon]
            filtered_map_zoom = 18
        else:
            filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
            lat_diff = max_lat - min_lat
            lon_diff = max_lon - min_lon
            # Heuristic for zoom level
            if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
            elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
            elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
            else: filtered_map_zoom = 12

        filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)

        # Add the drawn polygon to the new map if it exists
        if polygon_drawn and polygon_coords:
            folium.Polygon(
                locations=polygon_coords,
                color="#ef233c",
                fill=True,
                fill_color="#ef233c",
                fill_opacity=0.5
            ).add_to(filtered_m)

        # Convert filtered_df to GeoDataFrame for plotting
        filtered_gdf = gpd.GeoDataFrame(filtered_df, geometry='geometry')

        # --- Apply map display limit ---
        features_to_plot_count = len(filtered_gdf)
        if features_to_plot_count > MAX_MAP_FEATURES_DISPLAY:
            st.warning(f"Displaying a random sample of {MAX_MAP_FEATURES_DISPLAY} properties on the map (out of {features_to_plot_count} total filtered) to prevent performance issues.")
            filtered_gdf_for_map = filtered_gdf.sample(MAX_MAP_FEATURES_DISPLAY, random_state=42)
        else:
            filtered_gdf_for_map = filtered_gdf

        # Add filtered polygons to the map as GeoJSON layer
        folium.GeoJson(
            filtered_gdf_for_map.to_json(),
            style_function=lambda x: {
                'fillColor': 'green',
                'color': 'darkgreen',
                'weight': 1,
                'fillOpacity': 0.7
            },
            tooltip=folium.GeoJsonTooltip(
                fields=['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'],
                aliases=['Parcel ID:', 'Zoning Type:', 'Lot Area (m²):', 'FSI:', 'Coverage (%):', 'Height (m):', 'Stories:', 'Address Num:', 'Street:'],
                localize=True
            )
        ).add_to(filtered_m)

        st_folium(filtered_m, width=1000, height=500)

        st.subheader("Filtered Properties Table")
        display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']

        if len(filtered_df) > MAX_ROWS_DATAFRAME_DISPLAY:
            st.warning(f"Displaying only the first {MAX_ROWS_DATAFRAME_DISPLAY} rows of the filtered data ({len(filtered_df)} total properties). Download the full dataset below.")
            st.dataframe(filtered_df[display_cols].head(MAX_ROWS_DATAFRAME_DISPLAY))
        else:
            st.dataframe(filtered_df[display_cols])

        # --- 5. Export Data Button ---
        csv = filtered_df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="Export Full Filtered Data to CSV",
            data=csv,
            file_name="multiplex_coop_filtered_properties.csv",
            mime="text/csv",
        )

    else:
        st.warning("No properties match the current filters. Adjust your criteria or draw a polygon on the map.")

# Add a note about the MessageSizeError and config option
st.markdown("---")
st.markdown(
    """
    **Troubleshooting Large Data:**
    If you still encounter a `MessageSizeError` despite the display limits,
    it means the data size still exceeds Streamlit's internal limit, or the sampled data is still too complex.
    You can try decreasing `MAX_MAP_FEATURES_DISPLAY` and `MAX_ROWS_DATAFRAME_DISPLAY` further.
    Alternatively, you can increase Streamlit's default message size limit by adding
    `server.maxMessageSize = <size_in_mb>` (e.g., `server.maxMessageSize = 500`)
    to your Streamlit `config.toml` file.
    However, be aware that increasing this limit can lead to longer loading times and higher
    memory consumption in your browser and on the Streamlit server.
    """
)
st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")