import streamlit as st
from streamlit_folium import st_folium
import folium
from folium.plugins import Draw
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point
import numpy as np
import re # For parsing STATEDAREA

st.set_page_config(layout="wide", page_title="Multiplex Coop Housing Filter")

st.title("🗺️ Multiplex Coop Housing Filter (Hugging Face Data)")
st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset from Hugging Face. Draw a polygon on the map to spatially filter properties. Use the form below to apply additional filters based on property attributes. **Note: FSI, Building Coverage, Height, and Stories are synthetic for demonstration as they are not directly available in the dataset.**")

# --- Configuration Constants ---
MAX_ROWS_DATAFRAME_DISPLAY = 1000  # Max rows to show in st.dataframe
MAX_MAP_FEATURES_DISPLAY = 5000    # Max features to plot on the Folium map to prevent MessageSizeError

# --- 1. Load Data from Hugging Face and Process ---
@st.cache_data
def load_and_process_data():
    """
    Loads the geospatial data from Hugging Face, processes relevant columns,
    generates synthetic data for missing attributes, and re-projects for centroid calculation.
    """
    try:
        gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
    except Exception as e:
        st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
        st.stop()

    # Process STATEDAREA to numeric (Lot Area in Sq Metres)
    def parse_stated_area(area_str):
        if pd.isna(area_str):
            return np.nan
        match = re.search(r'(\d+\.?\d*)\s*sq\.m', str(area_str))
        if match:
            return float(match.group(1))
        return np.nan

    gdf['zn_area'] = gdf['STATEDAREA'].apply(parse_stated_area)

    # Map FEATURE_TYPE to zn_type (Zoning Type)
    gdf['zn_type'] = gdf['FEATURE_TYPE']

    # Generate synthetic data for attributes not present in the Hugging Face dataset
    num_rows = len(gdf)
    gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
    gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
    gdf['height_metres'] = np.round(np.random.uniform(5, 30, num_rows), 1)
    gdf['stories'] = np.random.randint(2, 10, num_rows)

    # Add unique ID and a display name
    gdf['id'] = range(1, num_rows + 1)
    gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")

    # Ensure geometries are valid for centroid calculation and plotting
    gdf['geometry'] = gdf['geometry'].buffer(0)

    # --- IMPORTANT: Re-project for accurate centroid calculation ---
    # Convert to a projected CRS (e.g., Web Mercator EPSG:3857) for accurate centroid calculation
    gdf_projected = gdf.to_crs(epsg=3857)

    # Calculate centroids on the projected CRS
    gdf['centroid_x_proj'] = gdf_projected.geometry.centroid.x
    gdf['centroid_y_proj'] = gdf_projected.geometry.centroid.y

    # Convert centroids back to geographic CRS (EPSG:4326) for Folium plotting
    centroids_gdf = gpd.GeoDataFrame(
        gdf.index,
        geometry=gpd.points_from_xy(gdf['centroid_x_proj'], gdf['centroid_y_proj'], crs="EPSG:3857")
    ).to_crs(epsg=4326)

    gdf['latitude'] = centroids_gdf.geometry.y
    gdf['longitude'] = centroids_gdf.geometry.x

    # Select and reorder relevant columns for display and filtering
    df_processed = gdf[[
        'id', 'name', 'latitude', 'longitude', 'geometry',
        'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories',
        'PARCELID', # Original Parcel ID for reference
        'ADDRESS_NUMBER', 'LINEAR_NAME_FULL' # For detailed address in tooltips
    ]].copy()

    return df_processed

df = load_and_process_data()

# Initialize filtered_df with the full dataframe for initial state
# This will be updated based on spatial and attribute filters
filtered_df = df.copy()

# --- 2. Map for Drawing (now in an expander) ---
with st.expander("Draw a Polygon on the Map", expanded=False):
    # Center the map around the mean of the actual data's centroids
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)

    # Add drawing tools to the map
    draw = Draw(
        export=True,
        filename="drawn_polygon.geojson",
        position="topleft",
        draw_options={
            "polyline": False, "rectangle": False, "circlemarker": False,
            "circle": False, "marker": False,
            "polygon": {
                "allowIntersection": False,
                "drawError": {"color": "#e0115f", "message": "Oups!"},
                "shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
            },
        },
        edit_options={"edit": False, "remove": True},
    )
    m.add_child(draw)

    st.info("Draw a polygon on the map to spatially filter properties. The filtered results will appear below.")
    output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])

    polygon_drawn = False
    shapely_polygon = None
    polygon_coords = None

    if output and output["all_draw_features"]:
        polygons = [
            feature["geometry"]["coordinates"]
            for feature in output["all_draw_features"]
            if feature["geometry"]["type"] == "Polygon"
        ]

        if polygons:
            polygon_coords = polygons[-1][0] # Get the coordinates of the last drawn polygon
            # Shapely Polygon expects (lon, lat) tuples, Folium provides (lat, lon)
            shapely_polygon = Polygon([(lon, lat) for lat, lon in polygon_coords])
            polygon_drawn = True

            # Apply spatial filter to the full dataframe based on centroid containment
            filtered_df = df[
                df.apply(
                    lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
                    axis=1
                )
            ].copy()
            st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
        else:
            st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")
    else:
        st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")

# --- 3. Attribute Filtering Form ---
st.subheader("Filter Property Attributes")

with st.form("attribute_filters"):
    col1, col2 = st.columns(2)

    with col1:
        all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
        selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")

        min_zn_area = st.number_input(
            "Minimum Lot Area in Sq Metres",
            min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
            value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
            step=100.0,
            key="zn_area_input"
        )

        min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")

    with col2:
        max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")

        height_stories_option = st.radio(
            "Filter by",
            ("Height", "Stories"),
            index=0,
            key="height_stories_radio"
        )

        if height_stories_option == "Height":
            min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
        else:
            min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")

    submitted = st.form_submit_button("Apply Attribute Filters")

    if submitted:
        if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
            filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]

        filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]

        if min_fsi_total > 0:
            filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]

        if max_prcnt_cver < 100:
            filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]

        if height_stories_option == "Height" and min_height_value > 0:
            filtered_df = filtered_df[filtered_df['height_metres'] >= min_height_value]
        elif height_stories_option == "Stories" and min_stories_value > 0:
            filtered_df = filtered_df[filtered_df['stories'] >= min_stories_value]

        st.success(f"Applied attribute filters. Total properties after all filters: {len(filtered_df)}")
    else:
        st.info("Adjust filters and click 'Apply Attribute Filters'.")


# --- 4. Display Filtered Data on a New Map and as a Table ---
with st.expander("Filtered Properties Display", expanded=True):
    if not filtered_df.empty:
        # Calculate bounds for filtered data to set appropriate zoom
        min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
        min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()

        if min_lat == max_lat and min_lon == max_lon: # Single point case
            filtered_map_center = [min_lat, min_lon]
            filtered_map_zoom = 18
        else:
            filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
            lat_diff = max_lat - min_lat
            lon_diff = max_lon - min_lon
            # Heuristic for zoom level
            if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
            elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
            elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
            else: filtered_map_zoom = 12

        filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)

        # Add the drawn polygon to the new map if it exists
        if polygon_drawn and polygon_coords:
            folium.Polygon(
                locations=polygon_coords,
                color="#ef233c",
                fill=True,
                fill_color="#ef233c",
                fill_opacity=0.5
            ).add_to(filtered_m)

        # Convert filtered_df to GeoDataFrame for plotting
        filtered_gdf = gpd.GeoDataFrame(filtered_df, geometry='geometry')

        # --- Apply map display limit ---
        features_to_plot_count = len(filtered_gdf)
        if features_to_plot_count > MAX_MAP_FEATURES_DISPLAY:
            st.warning(f"Displaying a random sample of {MAX_MAP_FEATURES_DISPLAY} properties on the map (out of {features_to_plot_count} total filtered) to prevent performance issues.")
            filtered_gdf_for_map = filtered_gdf.sample(MAX_MAP_FEATURES_DISPLAY, random_state=42)
        else:
            filtered_gdf_for_map = filtered_gdf

        # Add filtered polygons to the map as GeoJSON layer
        folium.GeoJson(
            filtered_gdf_for_map.to_json(),
            style_function=lambda x: {
                'fillColor': 'green',
                'color': 'darkgreen',
                'weight': 1,
                'fillOpacity': 0.7
            },
            tooltip=folium.GeoJsonTooltip(
                fields=['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'],
                aliases=['Parcel ID:', 'Zoning Type:', 'Lot Area (m²):', 'FSI:', 'Coverage (%):', 'Height (m):', 'Stories:', 'Address Num:', 'Street:'],
                localize=True
            )
        ).add_to(filtered_m)

        st_folium(filtered_m, width=1000, height=500)

        st.subheader("Filtered Properties Table")
        display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']

        if len(filtered_df) > MAX_ROWS_DATAFRAME_DISPLAY:
            st.warning(f"Displaying only the first {MAX_ROWS_DATAFRAME_DISPLAY} rows of the filtered data ({len(filtered_df)} total properties). Download the full dataset below.")
            st.dataframe(filtered_df[display_cols].head(MAX_ROWS_DATAFRAME_DISPLAY))
        else:
            st.dataframe(filtered_df[display_cols])

        # --- 5. Export Data Button ---
        csv = filtered_df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="Export Full Filtered Data to CSV",
            data=csv,
            file_name="multiplex_coop_filtered_properties.csv",
            mime="text/csv",
        )

    else:
        st.warning("No properties match the current filters. Adjust your criteria or draw a polygon on the map.")

# Add a note about the MessageSizeError and config option
st.markdown("---")
st.markdown(
    """
    **Troubleshooting Large Data:**
    If you still encounter a `MessageSizeError` despite the display limits,
    it means the data size still exceeds Streamlit's internal limit, or the sampled data is still too complex.
    You can try decreasing `MAX_MAP_FEATURES_DISPLAY` and `MAX_ROWS_DATAFRAME_DISPLAY` further.
    Alternatively, you can increase Streamlit's default message size limit by adding
    `server.maxMessageSize = <size_in_mb>` (e.g., `server.maxMessageSize = 500`)
    to your Streamlit `config.toml` file.
    However, be aware that increasing this limit can lead to longer loading times and higher
    memory consumption in your browser and on the Streamlit server.
    """
)
st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")