|
import streamlit as st |
|
from streamlit_folium import st_folium |
|
import folium |
|
from folium.plugins import Draw |
|
import pandas as pd |
|
import geopandas as gpd |
|
from shapely.geometry import Polygon, Point |
|
import numpy as np |
|
import re |
|
|
|
st.set_page_config(layout="wide", page_title="Multiplex Coop Housing Filter") |
|
|
|
st.title("🗺️ Multiplex Coop Housing Filter (Hugging Face Data)") |
|
st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset from Hugging Face. Draw a polygon on the map to spatially filter properties. Use the form below to apply additional filters based on property attributes. **Note: FSI, Building Coverage, Height, and Stories are synthetic for demonstration as they are not directly available in the dataset.**") |
|
|
|
|
|
MAX_ROWS_DATAFRAME_DISPLAY = 1000 |
|
MAX_MAP_FEATURES_DISPLAY = 5000 |
|
|
|
|
|
@st.cache_data |
|
def load_and_process_data(): |
|
""" |
|
Loads the geospatial data from Hugging Face, processes relevant columns, |
|
generates synthetic data for missing attributes, and re-projects for centroid calculation. |
|
""" |
|
try: |
|
gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet") |
|
except Exception as e: |
|
st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}") |
|
st.stop() |
|
|
|
|
|
def parse_stated_area(area_str): |
|
if pd.isna(area_str): |
|
return np.nan |
|
match = re.search(r'(\d+\.?\d*)\s*sq\.m', str(area_str)) |
|
if match: |
|
return float(match.group(1)) |
|
return np.nan |
|
|
|
gdf['zn_area'] = gdf['STATEDAREA'].apply(parse_stated_area) |
|
|
|
|
|
gdf['zn_type'] = gdf['FEATURE_TYPE'] |
|
|
|
|
|
num_rows = len(gdf) |
|
gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2) |
|
gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows) |
|
gdf['height_metres'] = np.round(np.random.uniform(5, 30, num_rows), 1) |
|
gdf['stories'] = np.random.randint(2, 10, num_rows) |
|
|
|
|
|
gdf['id'] = range(1, num_rows + 1) |
|
gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}") |
|
|
|
|
|
gdf['geometry'] = gdf['geometry'].buffer(0) |
|
|
|
|
|
|
|
gdf_projected = gdf.to_crs(epsg=3857) |
|
|
|
|
|
gdf['centroid_x_proj'] = gdf_projected.geometry.centroid.x |
|
gdf['centroid_y_proj'] = gdf_projected.geometry.centroid.y |
|
|
|
|
|
centroids_gdf = gpd.GeoDataFrame( |
|
gdf.index, |
|
geometry=gpd.points_from_xy(gdf['centroid_x_proj'], gdf['centroid_y_proj'], crs="EPSG:3857") |
|
).to_crs(epsg=4326) |
|
|
|
gdf['latitude'] = centroids_gdf.geometry.y |
|
gdf['longitude'] = centroids_gdf.geometry.x |
|
|
|
|
|
df_processed = gdf[[ |
|
'id', 'name', 'latitude', 'longitude', 'geometry', |
|
'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', |
|
'PARCELID', |
|
'ADDRESS_NUMBER', 'LINEAR_NAME_FULL' |
|
]].copy() |
|
|
|
return df_processed |
|
|
|
df = load_and_process_data() |
|
|
|
|
|
|
|
filtered_df = df.copy() |
|
|
|
|
|
with st.expander("Draw a Polygon on the Map", expanded=False): |
|
|
|
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12) |
|
|
|
|
|
draw = Draw( |
|
export=True, |
|
filename="drawn_polygon.geojson", |
|
position="topleft", |
|
draw_options={ |
|
"polyline": False, "rectangle": False, "circlemarker": False, |
|
"circle": False, "marker": False, |
|
"polygon": { |
|
"allowIntersection": False, |
|
"drawError": {"color": "#e0115f", "message": "Oups!"}, |
|
"shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5}, |
|
}, |
|
}, |
|
edit_options={"edit": False, "remove": True}, |
|
) |
|
m.add_child(draw) |
|
|
|
st.info("Draw a polygon on the map to spatially filter properties. The filtered results will appear below.") |
|
output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"]) |
|
|
|
polygon_drawn = False |
|
shapely_polygon = None |
|
polygon_coords = None |
|
|
|
if output and output["all_draw_features"]: |
|
polygons = [ |
|
feature["geometry"]["coordinates"] |
|
for feature in output["all_draw_features"] |
|
if feature["geometry"]["type"] == "Polygon" |
|
] |
|
|
|
if polygons: |
|
polygon_coords = polygons[-1][0] |
|
|
|
shapely_polygon = Polygon([(lon, lat) for lat, lon in polygon_coords]) |
|
polygon_drawn = True |
|
|
|
|
|
filtered_df = df[ |
|
df.apply( |
|
lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])), |
|
axis=1 |
|
) |
|
].copy() |
|
st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.") |
|
else: |
|
st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.") |
|
else: |
|
st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.") |
|
|
|
|
|
st.subheader("Filter Property Attributes") |
|
|
|
with st.form("attribute_filters"): |
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist()) |
|
selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select") |
|
|
|
min_zn_area = st.number_input( |
|
"Minimum Lot Area in Sq Metres", |
|
min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0), |
|
value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0), |
|
step=100.0, |
|
key="zn_area_input" |
|
) |
|
|
|
min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input") |
|
|
|
with col2: |
|
max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input") |
|
|
|
height_stories_option = st.radio( |
|
"Filter by", |
|
("Height", "Stories"), |
|
index=0, |
|
key="height_stories_radio" |
|
) |
|
|
|
if height_stories_option == "Height": |
|
min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input") |
|
else: |
|
min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input") |
|
|
|
submitted = st.form_submit_button("Apply Attribute Filters") |
|
|
|
if submitted: |
|
if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)': |
|
filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type] |
|
|
|
filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area] |
|
|
|
if min_fsi_total > 0: |
|
filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total] |
|
|
|
if max_prcnt_cver < 100: |
|
filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver] |
|
|
|
if height_stories_option == "Height" and min_height_value > 0: |
|
filtered_df = filtered_df[filtered_df['height_metres'] >= min_height_value] |
|
elif height_stories_option == "Stories" and min_stories_value > 0: |
|
filtered_df = filtered_df[filtered_df['stories'] >= min_stories_value] |
|
|
|
st.success(f"Applied attribute filters. Total properties after all filters: {len(filtered_df)}") |
|
else: |
|
st.info("Adjust filters and click 'Apply Attribute Filters'.") |
|
|
|
|
|
|
|
with st.expander("Filtered Properties Display", expanded=True): |
|
if not filtered_df.empty: |
|
|
|
min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max() |
|
min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max() |
|
|
|
if min_lat == max_lat and min_lon == max_lon: |
|
filtered_map_center = [min_lat, min_lon] |
|
filtered_map_zoom = 18 |
|
else: |
|
filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()] |
|
lat_diff = max_lat - min_lat |
|
lon_diff = max_lon - min_lon |
|
|
|
if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18 |
|
elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16 |
|
elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14 |
|
else: filtered_map_zoom = 12 |
|
|
|
filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom) |
|
|
|
|
|
if polygon_drawn and polygon_coords: |
|
folium.Polygon( |
|
locations=polygon_coords, |
|
color="#ef233c", |
|
fill=True, |
|
fill_color="#ef233c", |
|
fill_opacity=0.5 |
|
).add_to(filtered_m) |
|
|
|
|
|
filtered_gdf = gpd.GeoDataFrame(filtered_df, geometry='geometry') |
|
|
|
|
|
features_to_plot_count = len(filtered_gdf) |
|
if features_to_plot_count > MAX_MAP_FEATURES_DISPLAY: |
|
st.warning(f"Displaying a random sample of {MAX_MAP_FEATURES_DISPLAY} properties on the map (out of {features_to_plot_count} total filtered) to prevent performance issues.") |
|
filtered_gdf_for_map = filtered_gdf.sample(MAX_MAP_FEATURES_DISPLAY, random_state=42) |
|
else: |
|
filtered_gdf_for_map = filtered_gdf |
|
|
|
|
|
folium.GeoJson( |
|
filtered_gdf_for_map.to_json(), |
|
style_function=lambda x: { |
|
'fillColor': 'green', |
|
'color': 'darkgreen', |
|
'weight': 1, |
|
'fillOpacity': 0.7 |
|
}, |
|
tooltip=folium.GeoJsonTooltip( |
|
fields=['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'], |
|
aliases=['Parcel ID:', 'Zoning Type:', 'Lot Area (m²):', 'FSI:', 'Coverage (%):', 'Height (m):', 'Stories:', 'Address Num:', 'Street:'], |
|
localize=True |
|
) |
|
).add_to(filtered_m) |
|
|
|
st_folium(filtered_m, width=1000, height=500) |
|
|
|
st.subheader("Filtered Properties Table") |
|
display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'] |
|
|
|
if len(filtered_df) > MAX_ROWS_DATAFRAME_DISPLAY: |
|
st.warning(f"Displaying only the first {MAX_ROWS_DATAFRAME_DISPLAY} rows of the filtered data ({len(filtered_df)} total properties). Download the full dataset below.") |
|
st.dataframe(filtered_df[display_cols].head(MAX_ROWS_DATAFRAME_DISPLAY)) |
|
else: |
|
st.dataframe(filtered_df[display_cols]) |
|
|
|
|
|
csv = filtered_df.to_csv(index=False).encode('utf-8') |
|
st.download_button( |
|
label="Export Full Filtered Data to CSV", |
|
data=csv, |
|
file_name="multiplex_coop_filtered_properties.csv", |
|
mime="text/csv", |
|
) |
|
|
|
else: |
|
st.warning("No properties match the current filters. Adjust your criteria or draw a polygon on the map.") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown( |
|
""" |
|
**Troubleshooting Large Data:** |
|
If you still encounter a `MessageSizeError` despite the display limits, |
|
it means the data size still exceeds Streamlit's internal limit, or the sampled data is still too complex. |
|
You can try decreasing `MAX_MAP_FEATURES_DISPLAY` and `MAX_ROWS_DATAFRAME_DISPLAY` further. |
|
Alternatively, you can increase Streamlit's default message size limit by adding |
|
`server.maxMessageSize = <size_in_mb>` (e.g., `server.maxMessageSize = 500`) |
|
to your Streamlit `config.toml` file. |
|
However, be aware that increasing this limit can lead to longer loading times and higher |
|
memory consumption in your browser and on the Streamlit server. |
|
""" |
|
) |
|
st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.") |