Commit
·
ae1519e
1
Parent(s):
430f057
fixed justice40 stats
Browse files- get_zonal_stats.ipynb +67 -65
get_zonal_stats.ipynb
CHANGED
|
@@ -516,9 +516,7 @@
|
|
| 516 |
" .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
|
| 517 |
" .select(\"justice40\",\"geometry\")\n",
|
| 518 |
" )\n",
|
| 519 |
-
"
|
| 520 |
-
"get_geotiff(gdf,\"ca_justice40.tif\",\"justice40\")\n",
|
| 521 |
-
"\n"
|
| 522 |
]
|
| 523 |
},
|
| 524 |
{
|
|
@@ -528,63 +526,67 @@
|
|
| 528 |
"metadata": {},
|
| 529 |
"outputs": [],
|
| 530 |
"source": [
|
| 531 |
-
"#justice40 is
|
| 532 |
"\n",
|
| 533 |
-
"def big_zonal_stats_binary(vec_file,
|
|
|
|
| 534 |
" gdf = gpd.read_parquet(vec_file)\n",
|
|
|
|
|
|
|
|
|
|
| 535 |
" if gdf.crs is None:\n",
|
| 536 |
" gdf = gdf.set_crs(\"EPSG:4326\")\n",
|
| 537 |
-
"
|
| 538 |
-
"
|
| 539 |
-
"
|
| 540 |
-
"\n",
|
| 541 |
-
"
|
| 542 |
-
"
|
| 543 |
-
"
|
| 544 |
-
"
|
| 545 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
" \n",
|
| 547 |
-
"
|
| 548 |
-
"
|
| 549 |
-
"
|
| 550 |
-
"
|
| 551 |
-
"\n",
|
| 552 |
-
" def get_stats(geom_slice):\n",
|
| 553 |
-
" geom = [geom_slice.geometry]\n",
|
| 554 |
-
" masked_image, _ = mask(src, geom, crop=True, all_touched=True, nodata=src.nodata)\n",
|
| 555 |
-
" \n",
|
| 556 |
-
" # If the masked area is empty, return None\n",
|
| 557 |
-
" if masked_image.size == 0:\n",
|
| 558 |
-
" return {'percentage_1': None}\n",
|
| 559 |
-
" \n",
|
| 560 |
-
" # Count 1s and calculate percentage\n",
|
| 561 |
-
" count_1 = (masked_image == 1).sum()\n",
|
| 562 |
-
" total_count = (masked_image != src.nodata).sum()\n",
|
| 563 |
-
" \n",
|
| 564 |
-
" # Calculate percentage of justice40 = 1 within the polygon\n",
|
| 565 |
-
" percentage_1 = (count_1 / total_count) * 100 if total_count > 0 else None\n",
|
| 566 |
" \n",
|
| 567 |
-
"
|
| 568 |
-
"\n",
|
| 569 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
" \n",
|
| 571 |
-
"
|
|
|
|
| 572 |
" return gdf\n",
|
| 573 |
"\n",
|
| 574 |
-
"
|
| 575 |
]
|
| 576 |
},
|
| 577 |
{
|
| 578 |
"cell_type": "code",
|
| 579 |
"execution_count": null,
|
| 580 |
-
"id": "
|
| 581 |
"metadata": {},
|
| 582 |
"outputs": [],
|
| 583 |
"source": [
|
| 584 |
"%%time\n",
|
| 585 |
-
"tif_file = 'ca_justice40.tif'\n",
|
| 586 |
"vec_file = './cpad-stats-temp.parquet'\n",
|
| 587 |
-
"
|
|
|
|
| 588 |
"df.to_parquet(\"cpad-stats-temp.parquet\")\n"
|
| 589 |
]
|
| 590 |
},
|
|
@@ -610,34 +612,34 @@
|
|
| 610 |
"ca = (con\n",
|
| 611 |
" .read_parquet(\"cpad-stats-temp.parquet\")\n",
|
| 612 |
" .cast({\n",
|
| 613 |
-
"
|
| 614 |
-
"
|
| 615 |
-
"
|
| 616 |
-
"
|
| 617 |
-
"
|
| 618 |
" .rename(svi = \"SVI\")\n",
|
| 619 |
" .mutate(\n",
|
| 620 |
" richness=_.richness.round(3),\n",
|
| 621 |
-
"
|
| 622 |
-
"
|
| 623 |
-
"
|
| 624 |
-
"
|
| 625 |
-
"
|
| 626 |
-
"
|
| 627 |
-
"
|
| 628 |
-
"
|
| 629 |
-
"
|
| 630 |
-
"
|
| 631 |
-
"
|
| 632 |
-
"
|
| 633 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
" )\n",
|
| 635 |
-
" .drop(\"geometry\", \"__index_level_0__\",\"socioeconomic_status\", \"household_char\", \"racial_ethnic_minority\", \"housing_transit\", \n",
|
| 636 |
-
" \"biodiversity_intactness_loss\",\"forest_integrity_loss\",\"crop_reduction\",\"crop_expansion\"\n",
|
| 637 |
-
" ) \n",
|
| 638 |
-
" .join(ca_geom, \"id\", how=\"inner\")\n",
|
| 639 |
-
" )\n",
|
| 640 |
-
"\n",
|
| 641 |
"\n",
|
| 642 |
"ca.head(5).execute()\n"
|
| 643 |
]
|
|
|
|
| 516 |
" .mutate(geometry = _.geometry.convert(\"ESRI:102039\",\"EPSG:4326\"))\n",
|
| 517 |
" .select(\"justice40\",\"geometry\")\n",
|
| 518 |
" )\n",
|
| 519 |
+
"justice40.execute().to_parquet(\"ca_justice40.parquet\")"
|
|
|
|
|
|
|
| 520 |
]
|
| 521 |
},
|
| 522 |
{
|
|
|
|
| 526 |
"metadata": {},
|
| 527 |
"outputs": [],
|
| 528 |
"source": [
|
| 529 |
+
"# #justice40 is either 0 or 1, so we want to get the percentage of polygon where justice40 = 1. \n",
|
| 530 |
"\n",
|
| 531 |
+
"def big_zonal_stats_binary(vec_file, justice40_file, col_name,projected_crs=\"EPSG:3310\"):\n",
|
| 532 |
+
" # Read both vector files as GeoDataFrames\n",
|
| 533 |
" gdf = gpd.read_parquet(vec_file)\n",
|
| 534 |
+
" justice40_gdf = gpd.read_parquet(justice40_file)\n",
|
| 535 |
+
" \n",
|
| 536 |
+
" # Set CRS if not already set (assuming both should be in EPSG:4326, modify if needed)\n",
|
| 537 |
" if gdf.crs is None:\n",
|
| 538 |
" gdf = gdf.set_crs(\"EPSG:4326\")\n",
|
| 539 |
+
" if justice40_gdf.crs is None:\n",
|
| 540 |
+
" justice40_gdf = justice40_gdf.set_crs(\"EPSG:4326\")\n",
|
| 541 |
+
" # Ensure both GeoDataFrames are in the same CRS and reproject to a projected CRS for area calculations\n",
|
| 542 |
+
" gdf = gdf.to_crs(projected_crs)\n",
|
| 543 |
+
" justice40_gdf = justice40_gdf.to_crs(projected_crs)\n",
|
| 544 |
+
" \n",
|
| 545 |
+
" # Ensure both GeoDataFrames are in the same CRS\n",
|
| 546 |
+
" gdf = gdf.to_crs(justice40_gdf.crs)\n",
|
| 547 |
+
" \n",
|
| 548 |
+
" # Filter justice40 polygons where justice40 == 1\n",
|
| 549 |
+
" justice40_gdf = justice40_gdf[justice40_gdf['justice40'] == 1].copy()\n",
|
| 550 |
+
" \n",
|
| 551 |
+
" # Prepare a list to hold percentage of justice40 == 1 for each polygon\n",
|
| 552 |
+
" percentages = []\n",
|
| 553 |
+
" \n",
|
| 554 |
+
" # Iterate over each polygon in the main GeoDataFrame\n",
|
| 555 |
+
" for geom in gdf.geometry:\n",
|
| 556 |
+
" # Find intersecting justice40 polygons\n",
|
| 557 |
+
" justice40_intersections = justice40_gdf[justice40_gdf.intersects(geom)].copy()\n",
|
| 558 |
" \n",
|
| 559 |
+
" # Calculate the intersection area\n",
|
| 560 |
+
" if not justice40_intersections.empty:\n",
|
| 561 |
+
" justice40_intersections['intersection'] = justice40_intersections.intersection(geom)\n",
|
| 562 |
+
" total_intersection_area = justice40_intersections['intersection'].area.sum()\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
" \n",
|
| 564 |
+
" # Calculate percentage based on original polygon's area\n",
|
| 565 |
+
" percentage_1 = (total_intersection_area / geom.area) \n",
|
| 566 |
+
" else:\n",
|
| 567 |
+
" percentage_1 = 0.0 # No intersection with justice40 == 1 polygons\n",
|
| 568 |
+
" \n",
|
| 569 |
+
" # Append result\n",
|
| 570 |
+
" percentages.append(percentage_1)\n",
|
| 571 |
" \n",
|
| 572 |
+
" # Add results to the original GeoDataFrame\n",
|
| 573 |
+
" gdf[col_name] = percentages\n",
|
| 574 |
" return gdf\n",
|
| 575 |
"\n",
|
| 576 |
+
"\n"
|
| 577 |
]
|
| 578 |
},
|
| 579 |
{
|
| 580 |
"cell_type": "code",
|
| 581 |
"execution_count": null,
|
| 582 |
+
"id": "fe80fc28-73ce-4a26-9925-851c2798e467",
|
| 583 |
"metadata": {},
|
| 584 |
"outputs": [],
|
| 585 |
"source": [
|
| 586 |
"%%time\n",
|
|
|
|
| 587 |
"vec_file = './cpad-stats-temp.parquet'\n",
|
| 588 |
+
"\n",
|
| 589 |
+
"df = big_zonal_stats_binary(vec_file, \"ca_justice40.parquet\", col_name=\"percent_disadvantaged\")\n",
|
| 590 |
"df.to_parquet(\"cpad-stats-temp.parquet\")\n"
|
| 591 |
]
|
| 592 |
},
|
|
|
|
| 612 |
"ca = (con\n",
|
| 613 |
" .read_parquet(\"cpad-stats-temp.parquet\")\n",
|
| 614 |
" .cast({\n",
|
| 615 |
+
" \"crop_expansion\": \"int64\",\n",
|
| 616 |
+
" \"crop_reduction\": \"int64\",\n",
|
| 617 |
+
" \"manageable_carbon\": \"int64\",\n",
|
| 618 |
+
" \"irrecoverable_carbon\": \"int64\"\n",
|
| 619 |
+
" })\n",
|
| 620 |
" .rename(svi = \"SVI\")\n",
|
| 621 |
" .mutate(\n",
|
| 622 |
" richness=_.richness.round(3),\n",
|
| 623 |
+
" rsr=_.rsr.round(3),\n",
|
| 624 |
+
" all_species_rwr=_.all_species_rwr.round(3),\n",
|
| 625 |
+
" all_species_richness=_.all_species_richness.round(3),\n",
|
| 626 |
+
" percent_disadvantaged=(_.percent_disadvantaged).round(3),\n",
|
| 627 |
+
" svi=_.svi.round(3),\n",
|
| 628 |
+
" svi_socioeconomic_status=_.socioeconomic_status.round(3),\n",
|
| 629 |
+
" svi_household_char=_.household_char.round(3),\n",
|
| 630 |
+
" svi_racial_ethnic_minority=_.racial_ethnic_minority.round(3),\n",
|
| 631 |
+
" svi_housing_transit=_.housing_transit.round(3),\n",
|
| 632 |
+
" human_impact=_.human_impact.round(3),\n",
|
| 633 |
+
" deforest_carbon=_.deforest_carbon.round(3),\n",
|
| 634 |
+
" biodiversity_intactness_loss=_.biodiversity_intactness_loss.round(3),\n",
|
| 635 |
+
" forest_integrity_loss=_.forest_integrity_loss.round(3),\n",
|
| 636 |
+
" )\n",
|
| 637 |
+
" .select('established', 'reGAP', 'name', 'access_type', 'manager', 'manager_type', 'Easement', 'Acres', 'id', 'type','richness', \n",
|
| 638 |
+
" 'rsr', 'all_species_rwr', 'all_species_richness','deforest_carbon', 'irrecoverable_carbon', 'manageable_carbon', 'human_impact',\n",
|
| 639 |
+
" 'percent_disadvantaged','svi', 'svi_socioeconomic_status', 'svi_household_char', \n",
|
| 640 |
+
" 'svi_racial_ethnic_minority','svi_housing_transit')\n",
|
| 641 |
+
" .join(ca_geom, \"id\", how=\"inner\")\n",
|
| 642 |
" )\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
"\n",
|
| 644 |
"ca.head(5).execute()\n"
|
| 645 |
]
|