#
# Install Python libraries if necessary
# ----------------------------------------
#%pip install geopandas pandas matplotlib seaborn pygris
#%pip install "folium>=0.12" matplotlib mapclassify

#
# Import libraries
# ----------------------------------------
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

#
# Pull shapefiles
# ----------------------------------------

# tracts
from pygris import tracts
ca_tracts = tracts(state = "CA", cb = True, year=2022, cache=True) # cb = true calls cartographic boundary files that are simplified and load/process faster

# places
from pygris import places
ca_places = places(state = "CA", cb = True, year=2022, cache=True) # cache set to true makes it easier to load if we call again

Using FIPS code '06' for input 'CA'
Using FIPS code '06' for input 'CA'

#
# View tracts
# ----------------------------------------

# Coordinates for San Francisco (approximate center)
sf_coords = [37.7749, -122.4194]

# Plot interactive map, zoomed into San Francisco
ca_tracts.explore(
    color = "blue",       # set color to blue
    alpha=0.4,            # make face color somewhat transparent
    location=sf_coords,   # location of 
    zoom_start=11         # adjust zoom level as needed
    
)

#
# View places
# ----------------------------------------

# Plot interactive map, zoomed into San Francisco
ca_places.explore(
    color = "red",        # set color to red
    alpha=0.4,            # make face color somewhat transparent
    location=sf_coords,   # location of focus
    zoom_start=11         # adjust zoom level as needed
    
)

#
# Subset tract data to city boundary of San Francisco
# ----------------------------------------

# 1. prep
# ---------
# get only San Francisco place boundary for spatial join
san_fran_place = ca_places[ca_places["NAME"] == "San Francisco"]

# choose a projected CRS for California (meters)
proj_crs = "EPSG:3310"  # California Albers

# reproject both shapefiles into same projected CRS
tracts_proj = ca_tracts.to_crs(proj_crs)
san_fran_proj = san_fran_place.to_crs(proj_crs)

# create centroids in projected CRS
tracts_proj["centroid"] = tracts_proj.centroid

# 2. spatial join 
# ---------
san_fran_tracts = gpd.sjoin(
    tracts_proj.set_geometry("centroid"),  # use centroids for join
    san_fran_proj,
    predicate="within"
).drop(columns="geometry")  # drop centroid geometry after join

# 3. cleaning
# ---------
# remove Farallon Islands which are 30 miles off shore and not necessary for most demo calculations
san_fran_tracts = san_fran_tracts[san_fran_tracts["GEOID_left"] != "06075980401"] 

# restore original tract geometries for mapping (otherwise centroids will show)
san_fran_tracts = san_fran_tracts.set_geometry(tracts_proj.geometry)

# 4. plot
# ---------
sf_map = san_fran_tracts.explore(color = "purple", edgecolor="black", alpha=0.4)
sf_map

Libraries and environments¶

1. Import Python libraries¶

2. Get census tract and census place data¶

3. View tracts shapefile¶

5. View place shapefile¶

5. Spatially join tract and place data¶