Data source: American Community Survey 2012 and 2022 and Passenger Rail Stations 2019.

Code
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = FALSE, fig.show = "asis",
    fig.align = "center") 
library(tidyverse)
library(tidycensus)
library(sf)
library(kableExtra)
library(gridExtra)
library(cowplot)
library(geosphere)
library(dplyr)
options(scipen=999) # a penalty for printing scientific notation
options(tigris_class = "sf") # tells the tigris package to return spatial data as an sf object

source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
palette5 <- c("#f0f9e8","#bae4bc","#7bccc4","#43a2ca","#0868ac")
# Load API
census_api_key("e62580f74ef222beadd9dd2fbaf48ff130b31c4a", overwrite = TRUE)

# 2012 Census Data
tracts12 <- 
  get_acs(geography = "tract", 
          variables = c("B25026_001E","B19013_001E",
                        "B25058_001E","B25002_001E"), 
          year=2012, 
          state=06, 
          county=075, 
          geometry=TRUE, 
          output="wide") %>%
  st_transform('EPSG:7132') %>%
  filter(!str_detect(GEOID, "06075980401|06075990100|06075017902")) %>%
  rename(TotalPop = B25026_001E, 
         MedHHInc = B19013_001E, 
         MedRent = B25058_001E,
         TotalHU = B25002_001E) %>%
  dplyr::select(-NAME, -starts_with("B")) %>%
  mutate(year = "2012")
# 2022 Census Data
tracts22 <- 
  get_acs(geography = "tract", 
          variables = c("B25026_001E","B19013_001E",
                        "B25058_001E","B25002_001E"), 
          year=2022, 
          state=06, 
          county=075, 
          geometry=TRUE, 
          output="wide") %>%
  st_transform('EPSG:7132') %>%
  filter(!str_detect(GEOID, "06075980401|06075990100|06075017902|06075017903")) %>%
  rename(TotalPop = B25026_001E, 
         MedHHInc = B19013_001E, 
         MedRent = B25058_001E,
         TotalHU = B25002_001E) %>%
  dplyr::select(-NAME, -starts_with("B")) %>%
  mutate(year = "2022")

# combine the data
allTracts <- rbind(tracts12,tracts22)

# Passenger Rail Stations (2019)
Muni_Metro <- st_read("https://opendata.arcgis.com/datasets/efd75b7bb3c04dbda06c6e7cd73e9336_0.geojson") %>%
  st_transform(st_crs(tracts12))

# Select what I need
Muni_Metro <- st_filter(Muni_Metro, st_union(tracts12)) %>%
  mutate(Line = "Muni_Metro") %>%
  dplyr::select(station_na, mode, Line)
# 0.5 mile buffer
stopBuffer <- st_buffer(Muni_Metro, 2640)

stopUnion <- st_union(st_buffer(Muni_Metro, 2640))

muniBuffers <- 
  rbind(
     stopBuffer %>%
      mutate(Legend = "Buffer") %>%
      dplyr::select(Legend),
     stopUnion %>%
      st_sf() %>%
      mutate(Legend = "Unioned Buffer"))
# Using the `sf` Package for Spatial operations
buffer <- filter(muniBuffers, Legend=="Unioned Buffer")

# Spatial intersection with `st_centroid()` on polygon centroids
# 2012
selectCentroids.12 <-
  st_centroid(tracts12)[buffer,] %>%
  st_drop_geometry() %>%
  left_join(., dplyr::select(tracts22, GEOID), by = "GEOID") %>%
  st_sf() %>%
  dplyr::select(TotalPop) %>%
  mutate(Selection_Type = "Select by Centroids_2012")
# 2022
selectCentroids.22 <-
  st_centroid(tracts22)[buffer,] %>%
  st_drop_geometry() %>%
  left_join(., dplyr::select(tracts22, GEOID), by = "GEOID") %>%
  st_sf() %>%
  dplyr::select(TotalPop) %>%
  mutate(Selection_Type = "Select by Centroids_2022")

allTracts.group <- 
  rbind(
    st_centroid(allTracts)[buffer,] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "TOD"),
    st_centroid(allTracts)[buffer, op = st_disjoint] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "Non-TOD")) %>%
  mutate(MedRent.inf = ifelse(year == "2012", MedRent * 1.24, MedRent)) 

# Compute MRB
muni_MRB <- multipleRingBuffer(st_union(Muni_Metro),
                                maxDistance = 18480,
                                interval =  2640)

# Summarizing data
# 2012 data
tracts12.rings <- tracts12 %>% 
  select(GEOID, year) %>% 
  st_centroid() %>% 
  st_join(muni_MRB, join = st_intersects) %>%
  st_drop_geometry() %>%
  left_join(dplyr::select(tracts12, GEOID, MedRent, year), 
            by=c("GEOID"="GEOID", "year"="year")) %>%
  st_sf() %>%
  mutate(distance = distance / 5280) #convert to miles

tracts12.rings.summary <- st_drop_geometry(tracts12.rings) %>%
    group_by(distance, year) %>%
    summarize(Mean_Rent = mean(MedRent, na.rm=T))
# 2022 data
tracts22.rings <- tracts22 %>% 
  select(GEOID, year) %>% 
  st_centroid() %>% 
  st_join(muni_MRB, join = st_intersects) %>%
  st_drop_geometry() %>%
  left_join(dplyr::select(tracts22, GEOID, MedRent, year), 
            by=c("GEOID"="GEOID", "year"="year")) %>%
  st_sf() %>%
  mutate(distance = distance / 5280) #convert to miles

tracts22.rings.summary <- st_drop_geometry(tracts22.rings) %>%
    group_by(distance, year) %>%
    summarize(Mean_Rent = mean(MedRent, na.rm=T))

# combine data
tracts.rings.summary <- rbind(tracts12.rings.summary, tracts22.rings.summary)

allTracts.Summary <- 
  st_drop_geometry(allTracts.group) %>%
  group_by(year, TOD) %>%
  summarize(Rent = round(mean(MedRent, na.rm = T), 2),
            Population = round(mean(TotalPop, na.rm = T), 0),
            Income = round(mean(MedHHInc, na.rm = T), 2),
            Housing = round(mean(TotalHU, na.rm = T), 0)) %>%
  ungroup()

allTracts.Diff <- allTracts.Summary %>%
  pivot_wider(names_from = TOD, values_from = c(Rent, Population, Income, Housing))
allTracts.Diff <- allTracts.Diff %>%
  mutate(Diff_Rent = 100*(Rent_TOD - `Rent_Non-TOD`)/`Rent_Non-TOD`,
         Diff_Population = 100*(Population_TOD - `Population_Non-TOD`)/`Population_Non-TOD`,
         Diff_Income = 100*(Income_TOD - `Income_Non-TOD`)/`Income_Non-TOD`,
         Diff_Housing = 100*(Housing_TOD - `Housing_Non-TOD`)/`Housing_Non-TOD`)%>%
  select(year, starts_with("Diff_")) %>%
  rename(Rent = Diff_Rent,
         Population = Diff_Population,
         Income = Diff_Income,
         Housing = Diff_Housing)

allTracts.Summary.Diff <- bind_rows(
  allTracts.Summary,
  allTracts.Diff %>%
    mutate(TOD = "Difference(%)"))

# relate centroids of tracts to the buffer
Centroids.12 <- st_centroid(tracts12)[buffer,] %>%
                            dplyr::select(-MedHHInc, -TotalHU)
buffer_centroids.12 <- st_join(stopBuffer %>%
                            mutate(ID = seq(1, nrow(stopBuffer))) %>%
                            dplyr::select(-mode),
                          Centroids.12,
                          join = st_intersects)
Centroids.22 <- st_centroid(tracts22)[buffer,] %>%
                            dplyr::select(-MedHHInc, -TotalHU)
buffer_centroids.22 <- st_join(stopBuffer %>%
                            mutate(ID = seq(1, nrow(stopBuffer))) %>%
                            dplyr::select(-mode),
                          Centroids.22,
                          join = st_intersects)
# calculate pop and rent of each station
Muni_centroids.12 <- buffer_centroids.12 %>%
  group_by(ID) %>%
  summarise(Pop = mean(TotalPop, na.rm = TRUE),
            Rent = mean(MedRent, na.rm = TRUE)) %>%
  mutate(year = "2012")
Muni_centroids.22 <- buffer_centroids.22 %>%
  group_by(ID) %>%
  summarise(Pop = mean(TotalPop, na.rm = TRUE),
            Rent = mean(MedRent, na.rm = TRUE)) %>%
  mutate(year = "2022")

# relate the results to rail stations
Muni_centroids.12 <- cbind(Muni_Metro %>%
                          dplyr::select(-Line),
                        Muni_centroids.12 %>%
                          st_drop_geometry() %>%
                          dplyr::select(Pop, Rent, year))
Muni_centroids.22 <- cbind(Muni_Metro %>%
                          dplyr::select(-Line),
                        Muni_centroids.22 %>%
                          st_drop_geometry() %>%
                          dplyr::select(Pop, Rent, year))
Muni_centroids <- rbind(Muni_centroids.12,Muni_centroids.22)

Clustered residential neighborhoods connected by highway roads, with long daily car commutes—an all-too-familiar scene in California, especially around San Francisco. When the weekend comes, people flock to beaches, Fisherman’s Wharf, or state parks, avoiding downtown areas altogether. With wealthier residents moving out of city centers, it’s easy to assume downtowns are in decline. Adding to this, public transit usage is falling as more cars flood the roads. For many housing developers, investing far from railway stations and downtown areas seems like the safer bet.

But what if that conventional wisdom is leading you away from a hidden opportunity?

Code
ggplot() +
    geom_sf(data=muni_MRB, aes(fill = distance), color = "#708090", alpha = 0.3, show.legend = FALSE) +
    scale_fill_continuous(low = "#0868ac", high = "#FAF9F6")+
    geom_sf(data=st_union(tracts22), fill= "transparent", color = "black", size = 2) +
    geom_sf(data=Muni_Metro, aes(colour = mode), show.legend = TRUE,
           size= 1.5) + 
  scale_colour_manual(values = c("#40A2E3","#C9E6F0","#0D9276")) +
  geom_sf(data = buffer, fill = "transparent", color = "#0868ac") +
    labs(caption = "*TOD Areas in San Francisco*\nDistance in-between each buffer is 0.5 mile.") +
  guides(fill = "none",
         color = guide_legend(override.aes = list(size = 2))) +
    theme(legend.position = c(.2,.85),
          legend.background = element_blank(),
          legend.title = element_blank(),
          plot.title = element_text(size = 18),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.background = element_blank(),
        plot.subtitle = element_text(size = 10, face = "bold"))

Before diving in, let’s clarify an important concept: Transportation-Oriented Development (TOD). TOD is a strategic urban planning approach that promotes vibrant, mixed-use neighborhoods centered around public transit hubs. These areas include housing, offices, retail, and restaurants, creating dynamic communities where residents can live, work, and shop close to transit. A TOD area is typically defined as the region within 0.5 miles of a transit station. My research examines these areas in San Francisco over a 10-year period (2012–2022), uncovering socio-economic trends that reveal a surprising investment opportunity.

Reading Between the Lines

Code
diff <- allTracts.Summary %>%
  # dplyr::select(-Rent, -Income) %>%
  gather(Variable, Value, -year, -TOD) %>%
  pivot_wider(names_from = TOD, values_from = Value) %>%
  mutate(
    diff = `Non-TOD` - TOD,      
    ymin = pmin(`Non-TOD`, TOD), 
    ymax = pmax(`Non-TOD`, TOD)
  )
diff2 <- allTracts.Summary.Diff %>%
  unite(year.TOD, year, TOD, sep = ": ", remove = T) %>%
  gather(Variable, Value, -year.TOD) %>%
  mutate(Value = round(Value, 2)) %>%
  filter(year.TOD %in% c("2012: Difference(%)","2022: Difference(%)")) %>%
  rename(year = year.TOD, difference = Value) %>%
  mutate(year = case_when(
    year == "2012: Difference(%)" ~ 2012,
    year == "2022: Difference(%)" ~ 2022
  ))
diff3 <- diff %>% filter(Variable %in% c("Housing","Population")) %>%
  mutate(color = case_when(
    diff > 0 ~ "Less",
    diff < 0 ~ "More"
  ))
plot <- allTracts.Summary %>%
  # dplyr::select(-Rent, -Income) %>%
  gather(Variable, Value, -year, -TOD)

ggplot(data = plot, aes(year, Value, , fill = TOD)) +
  geom_bar(stat = "identity", position = "dodge", width = .5, alpha = 0.5) +
  facet_wrap(~Variable, scales = "free", nrow = 2) +
    geom_rect(
    data = diff3,
    aes(xmin = as.numeric(as.factor(year))-0.1,
        xmax = as.numeric(as.factor(year)) + 0.1,
        ymin = ymin, ymax = ymax, fill = color),
    inherit.aes = FALSE, alpha = 0.4
  ) +
  geom_line(
    data = diff2 %>% filter(Variable == "Rent"),
    aes(x = as.numeric(as.factor(year)),
        y = difference*100 + 1500,
        group = Variable), inherit.aes = FALSE,
    color = "#0868ac", size = 1
  ) +
  geom_line(
    data = diff2 %>% filter(Variable == "Income"),
    aes(x = as.numeric(as.factor(year)),
        y = difference*100 + 55000,
        group = Variable), inherit.aes = FALSE,
    color = "#0868ac", size = 1
  ) +
  geom_text(
    data = diff2 %>% filter(Variable == "Rent"),
    aes(x = as.numeric(as.factor(year)),
      y = difference*100 + 1500,
      label = paste0(difference, "%")), inherit.aes = FALSE,
    color = "#0868ac", vjust = -0.5
  ) +
  geom_text(
    data = diff2 %>% filter(Variable == "Income"),
    aes(x = as.numeric(as.factor(year)),
      y = difference*100 + 55000,
      label = paste0(difference, "%")), inherit.aes = FALSE,
    color = "#0868ac", vjust = -0.5
  ) +
  scale_fill_manual(values = c("Less" = "#0D9276", "More" = "#0868ac", 
               "Non-TOD" = "#bae4bc", "TOD" = "#43a2ca")) +
  scale_y_continuous(expand = expansion(0))+
  labs(caption = "*Housing & Population | Income & Rent Differences*\nPopulation delinces a lot but housing units are higher in TOD areas;\nthe differences of income and rent between TOD and non-TOD areas are getting smaller.") +
  theme(legend.position = "top",
        legend.title = element_blank(),
        plot.title = element_text(size = 18),
        plot.subtitle = element_text(size = 10, face = "bold"),
        axis.ticks.y=element_blank(),
        strip.text.x = element_text(size = 10),
        axis.title = element_blank(),
        axis.text.y = element_blank(),
        axis.line.x = element_line(),
        panel.background = element_blank(),
        panel.border = element_blank(),
        strip.background = element_rect(fill = "transparent", color = "transparent")) 

Digging deeper into the data, two trends in TOD areas stand out:

1. Population and Housing Shifts:

Between 2012 and 2022, TOD areas experienced a decrease in both housing units and population, mirroring the decline seen in some urban areas. However, the population decline outpaced the reduction in housing units, indicating a shift toward smaller households. This means more single or two-person households are moving into TOD areas—a prime demographic for developers to target.

2. Income and Rent Trends:

Non-TOD areas have historically attracted higher-income households, but the income gap between TOD and non-TOD areas is narrowing due to the percentage of difference. Some high-income households are beginning to migrate to TOD areas, signaling their growing appeal. As income levels rise in these zones, rent growth is likely to follow, creating an opportunity for long-term value appreciation.

Avoid the Tourist Traps

Code
# Create bounding box using st_bbox
bbox_coords <- st_bbox(tracts12)

# Adjust the bounding box based on your requirements (here 50% and 80% of xmax, ymax)
xmin <- 0.88 * bbox_coords["xmax"]
xmax <- 0.97 * bbox_coords["xmax"]
ymin <- 0.8 * bbox_coords["ymax"]
ymax <- 0.93 * bbox_coords["ymax"]

# Create a polygon representing the rectangle (bounding box)
bbox_polygon <- st_sfc(st_polygon(list(matrix(c(
  xmin, ymin,   # bottom-left corner
  xmax, ymin,   # bottom-right corner
  xmax, ymax,   # top-right corner
  xmin, ymax,   # top-left corner
  xmin, ymin    # close the polygon
), ncol = 2, byrow = TRUE))), crs = st_crs(tracts12))
ggplot() +
  geom_sf(data = tracts12, fill = "grey95", color = "white")+
  geom_sf(data = tracts22, fill = "grey95", color = "white")+
  geom_sf(data = bbox_polygon, 
    color = NA, fill = "#0868ac", alpha=.3, linetype = "dashed") +
  geom_sf(data = Muni_centroids, aes(size = q5(Pop),color = q5(Pop))) +
  scale_color_manual(values = scales::alpha(palette5, 0.3)) +
  scale_size_manual(values = c(0.5,1.5,2.5,3.5,4.5)) +
  geom_text(data = data.frame(year = "2022", x = xmin, y = ymax, label = "Fisherman’s Wharf, Embarcadero,\nWashington Square ..."),
    aes(x = x, y = y, label = label), 
    color = "#0868ac", size = 4, fontface = "bold", vjust = -0.5, hjust = .65
  ) +
  geom_segment(data = data.frame(year = "2022", x = xmin, y = ymax),
               aes(x = xmin+2000, xend = xmin-8000, y = ymax-5000, yend = ymax), color = "#0868ac", size = 0.8) +
  geom_segment(data = data.frame(year = "2012", x = xmax, y = ymax),
               aes(x = xmax-8000, xend = xmax-3000, y = ymax-2000, yend = ymax+5000), color = "#0868ac", size = 0.8) +
  geom_segment(data = data.frame(year = "2012", x = xmax, y = ymax),
               aes(x = xmax+12000, xend = xmax-3000, y = ymax+5000, yend = ymax+5000), color = "#0868ac", size = 0.8) +
  facet_wrap(~year) +
  labs(caption= "*Population within TOD Areas*\nThe number of the residents in tourism spots decreased significantly from 2012 to 2022.")+
  theme(legend.position = "none",
        legend.background = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(size = 18),
        strip.text.x = element_text(size = 10),
        axis.title = element_blank(),
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        panel.background = element_blank(),
        plot.subtitle = element_text(size = 10, face = "bold"),
        strip.background = element_rect(fill = "transparent", color = "transparent"))

Investing in TOD areas really sounds like a great deal, isn’t it? But remember to avoid regions dominated by tourism when you choose available land. The population in the northeastern part of San Francisco declined significantly from 2012 to 2022, where many popular tourist attractions are located, such as Fisherman’s Wharf, Embarcadero, and Washington Square. The thriving tourism industry drives away local residents, which is not suitable for developing houses but hotels or short-term rentals.

Why TOD Makes Sense

For housing developers seeking untapped potential, TOD areas offer an attractive combination of affordability, demographic shifts, and future growth prospects. While non-TOD areas currently command higher rents and greater attention, the low cost of entry in TOD zones, coupled with rising interest from higher-income households, sets the stage for a smart, forward-looking investment.

So, is downtown really in decline? Or is this the perfect moment to seize the opportunity in TOD areas before they gain widespread attention? The answer lies in choosing wisely—and acting quickly.