Data source: American Community Survey 2012 and 2022 and Passenger Rail Stations 2019.
Code
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = FALSE, fig.show = "asis",
fig.align = "center")
library(tidyverse)
library(tidycensus)
library(sf)
library(kableExtra)
library(gridExtra)
library(cowplot)
library(geosphere)
library(dplyr)
options(scipen=999) # a penalty for printing scientific notation
options(tigris_class = "sf") # tells the tigris package to return spatial data as an sf object
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
palette5 <- c("#f0f9e8","#bae4bc","#7bccc4","#43a2ca","#0868ac")
# Load API
census_api_key("e62580f74ef222beadd9dd2fbaf48ff130b31c4a", overwrite = TRUE)
# 2012 Census Data
tracts12 <-
get_acs(geography = "tract",
variables = c("B25026_001E","B19013_001E",
"B25058_001E","B25002_001E"),
year=2012,
state=06,
county=075,
geometry=TRUE,
output="wide") %>%
st_transform('EPSG:7132') %>%
filter(!str_detect(GEOID, "06075980401|06075990100|06075017902")) %>%
rename(TotalPop = B25026_001E,
MedHHInc = B19013_001E,
MedRent = B25058_001E,
TotalHU = B25002_001E) %>%
dplyr::select(-NAME, -starts_with("B")) %>%
mutate(year = "2012")
# 2022 Census Data
tracts22 <-
get_acs(geography = "tract",
variables = c("B25026_001E","B19013_001E",
"B25058_001E","B25002_001E"),
year=2022,
state=06,
county=075,
geometry=TRUE,
output="wide") %>%
st_transform('EPSG:7132') %>%
filter(!str_detect(GEOID, "06075980401|06075990100|06075017902|06075017903")) %>%
rename(TotalPop = B25026_001E,
MedHHInc = B19013_001E,
MedRent = B25058_001E,
TotalHU = B25002_001E) %>%
dplyr::select(-NAME, -starts_with("B")) %>%
mutate(year = "2022")
# combine the data
allTracts <- rbind(tracts12,tracts22)
# Passenger Rail Stations (2019)
Muni_Metro <- st_read("https://opendata.arcgis.com/datasets/efd75b7bb3c04dbda06c6e7cd73e9336_0.geojson") %>%
st_transform(st_crs(tracts12))
# Select what I need
Muni_Metro <- st_filter(Muni_Metro, st_union(tracts12)) %>%
mutate(Line = "Muni_Metro") %>%
dplyr::select(station_na, mode, Line)
# 0.5 mile buffer
stopBuffer <- st_buffer(Muni_Metro, 2640)
stopUnion <- st_union(st_buffer(Muni_Metro, 2640))
muniBuffers <-
rbind(
stopBuffer %>%
mutate(Legend = "Buffer") %>%
dplyr::select(Legend),
stopUnion %>%
st_sf() %>%
mutate(Legend = "Unioned Buffer"))
# Using the `sf` Package for Spatial operations
buffer <- filter(muniBuffers, Legend=="Unioned Buffer")
# Spatial intersection with `st_centroid()` on polygon centroids
# 2012
selectCentroids.12 <-
st_centroid(tracts12)[buffer,] %>%
st_drop_geometry() %>%
left_join(., dplyr::select(tracts22, GEOID), by = "GEOID") %>%
st_sf() %>%
dplyr::select(TotalPop) %>%
mutate(Selection_Type = "Select by Centroids_2012")
# 2022
selectCentroids.22 <-
st_centroid(tracts22)[buffer,] %>%
st_drop_geometry() %>%
left_join(., dplyr::select(tracts22, GEOID), by = "GEOID") %>%
st_sf() %>%
dplyr::select(TotalPop) %>%
mutate(Selection_Type = "Select by Centroids_2022")
allTracts.group <-
rbind(
st_centroid(allTracts)[buffer,] %>%
st_drop_geometry() %>%
left_join(allTracts) %>%
st_sf() %>%
mutate(TOD = "TOD"),
st_centroid(allTracts)[buffer, op = st_disjoint] %>%
st_drop_geometry() %>%
left_join(allTracts) %>%
st_sf() %>%
mutate(TOD = "Non-TOD")) %>%
mutate(MedRent.inf = ifelse(year == "2012", MedRent * 1.24, MedRent))
# Compute MRB
muni_MRB <- multipleRingBuffer(st_union(Muni_Metro),
maxDistance = 18480,
interval = 2640)
# Summarizing data
# 2012 data
tracts12.rings <- tracts12 %>%
select(GEOID, year) %>%
st_centroid() %>%
st_join(muni_MRB, join = st_intersects) %>%
st_drop_geometry() %>%
left_join(dplyr::select(tracts12, GEOID, MedRent, year),
by=c("GEOID"="GEOID", "year"="year")) %>%
st_sf() %>%
mutate(distance = distance / 5280) #convert to miles
tracts12.rings.summary <- st_drop_geometry(tracts12.rings) %>%
group_by(distance, year) %>%
summarize(Mean_Rent = mean(MedRent, na.rm=T))
# 2022 data
tracts22.rings <- tracts22 %>%
select(GEOID, year) %>%
st_centroid() %>%
st_join(muni_MRB, join = st_intersects) %>%
st_drop_geometry() %>%
left_join(dplyr::select(tracts22, GEOID, MedRent, year),
by=c("GEOID"="GEOID", "year"="year")) %>%
st_sf() %>%
mutate(distance = distance / 5280) #convert to miles
tracts22.rings.summary <- st_drop_geometry(tracts22.rings) %>%
group_by(distance, year) %>%
summarize(Mean_Rent = mean(MedRent, na.rm=T))
# combine data
tracts.rings.summary <- rbind(tracts12.rings.summary, tracts22.rings.summary)
allTracts.Summary <-
st_drop_geometry(allTracts.group) %>%
group_by(year, TOD) %>%
summarize(Rent = round(mean(MedRent, na.rm = T), 2),
Population = round(mean(TotalPop, na.rm = T), 0),
Income = round(mean(MedHHInc, na.rm = T), 2),
Housing = round(mean(TotalHU, na.rm = T), 0)) %>%
ungroup()
allTracts.Diff <- allTracts.Summary %>%
pivot_wider(names_from = TOD, values_from = c(Rent, Population, Income, Housing))
allTracts.Diff <- allTracts.Diff %>%
mutate(Diff_Rent = 100*(Rent_TOD - `Rent_Non-TOD`)/`Rent_Non-TOD`,
Diff_Population = 100*(Population_TOD - `Population_Non-TOD`)/`Population_Non-TOD`,
Diff_Income = 100*(Income_TOD - `Income_Non-TOD`)/`Income_Non-TOD`,
Diff_Housing = 100*(Housing_TOD - `Housing_Non-TOD`)/`Housing_Non-TOD`)%>%
select(year, starts_with("Diff_")) %>%
rename(Rent = Diff_Rent,
Population = Diff_Population,
Income = Diff_Income,
Housing = Diff_Housing)
allTracts.Summary.Diff <- bind_rows(
allTracts.Summary,
allTracts.Diff %>%
mutate(TOD = "Difference(%)"))
# relate centroids of tracts to the buffer
Centroids.12 <- st_centroid(tracts12)[buffer,] %>%
dplyr::select(-MedHHInc, -TotalHU)
buffer_centroids.12 <- st_join(stopBuffer %>%
mutate(ID = seq(1, nrow(stopBuffer))) %>%
dplyr::select(-mode),
Centroids.12,
join = st_intersects)
Centroids.22 <- st_centroid(tracts22)[buffer,] %>%
dplyr::select(-MedHHInc, -TotalHU)
buffer_centroids.22 <- st_join(stopBuffer %>%
mutate(ID = seq(1, nrow(stopBuffer))) %>%
dplyr::select(-mode),
Centroids.22,
join = st_intersects)
# calculate pop and rent of each station
Muni_centroids.12 <- buffer_centroids.12 %>%
group_by(ID) %>%
summarise(Pop = mean(TotalPop, na.rm = TRUE),
Rent = mean(MedRent, na.rm = TRUE)) %>%
mutate(year = "2012")
Muni_centroids.22 <- buffer_centroids.22 %>%
group_by(ID) %>%
summarise(Pop = mean(TotalPop, na.rm = TRUE),
Rent = mean(MedRent, na.rm = TRUE)) %>%
mutate(year = "2022")
# relate the results to rail stations
Muni_centroids.12 <- cbind(Muni_Metro %>%
dplyr::select(-Line),
Muni_centroids.12 %>%
st_drop_geometry() %>%
dplyr::select(Pop, Rent, year))
Muni_centroids.22 <- cbind(Muni_Metro %>%
dplyr::select(-Line),
Muni_centroids.22 %>%
st_drop_geometry() %>%
dplyr::select(Pop, Rent, year))
Muni_centroids <- rbind(Muni_centroids.12,Muni_centroids.22)
Clustered residential neighborhoods connected by highway roads, with long daily car commutes—an all-too-familiar scene in California, especially around San Francisco. When the weekend comes, people flock to beaches, Fisherman’s Wharf, or state parks, avoiding downtown areas altogether. With wealthier residents moving out of city centers, it’s easy to assume downtowns are in decline. Adding to this, public transit usage is falling as more cars flood the roads. For many housing developers, investing far from railway stations and downtown areas seems like the safer bet.
But what if that conventional wisdom is leading you away from a hidden opportunity?
Code
ggplot() +
geom_sf(data=muni_MRB, aes(fill = distance), color = "#708090", alpha = 0.3, show.legend = FALSE) +
scale_fill_continuous(low = "#0868ac", high = "#FAF9F6")+
geom_sf(data=st_union(tracts22), fill= "transparent", color = "black", size = 2) +
geom_sf(data=Muni_Metro, aes(colour = mode), show.legend = TRUE,
size= 1.5) +
scale_colour_manual(values = c("#40A2E3","#C9E6F0","#0D9276")) +
geom_sf(data = buffer, fill = "transparent", color = "#0868ac") +
labs(caption = "*TOD Areas in San Francisco*\nDistance in-between each buffer is 0.5 mile.") +
guides(fill = "none",
color = guide_legend(override.aes = list(size = 2))) +
theme(legend.position = c(.2,.85),
legend.background = element_blank(),
legend.title = element_blank(),
plot.title = element_text(size = 18),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
panel.background = element_blank(),
plot.subtitle = element_text(size = 10, face = "bold"))
Before diving in, let’s clarify an important concept: Transportation-Oriented Development (TOD). TOD is a strategic urban planning approach that promotes vibrant, mixed-use neighborhoods centered around public transit hubs. These areas include housing, offices, retail, and restaurants, creating dynamic communities where residents can live, work, and shop close to transit. A TOD area is typically defined as the region within 0.5 miles of a transit station. My research examines these areas in San Francisco over a 10-year period (2012–2022), uncovering socio-economic trends that reveal a surprising investment opportunity.
Rent Trends
Code
tracts.rings.summary.PLOT <- tracts.rings.summary %>%
filter(distance != 3.5) %>%
mutate(Mean_Rent = round(Mean_Rent, 2))
ggplot(data = tracts.rings.summary.PLOT, aes(distance, Mean_Rent, colour=year)) +
geom_point(data = tracts.rings.summary.PLOT, size=3) +
geom_line(data = tracts.rings.summary.PLOT, size = 1)+
geom_line(data = tracts.rings.summary.PLOT %>% filter(distance >= 1.5), size=2, show.legend = FALSE) +
geom_vline(xintercept = 1.5, linetype = "dashed", color = "#708090") +
geom_point(data = tracts.rings.summary %>% filter(distance == 0.5), size=5, show.legend = FALSE) +
geom_text(
data = tracts.rings.summary.PLOT %>% filter(distance == 2.5),
aes(distance, Mean_Rent,
label = paste0("$", Mean_Rent)), inherit.aes = FALSE,
color = "#708090", vjust = .3, hjust = 1.2) +
geom_text(
data = tracts.rings.summary.PLOT %>% filter(distance == 2),
aes(distance, Mean_Rent,
label = paste0("$", Mean_Rent)), inherit.aes = FALSE,
color = "#708090", vjust = -1, hjust = .6) +
geom_text(
data = tracts.rings.summary.PLOT %>% filter(distance == .5),
aes(distance, Mean_Rent,
label = paste0("$", Mean_Rent)), inherit.aes = FALSE,
color = "#708090", vjust = -1.2, hjust = 0.2, fontface = "bold") +
scale_color_manual(values = c("#0D9276", "#0868ac"), name = "Year")+
geom_text(data = tracts.rings.summary.PLOT %>% filter(distance == .5),
label = "TOD Mean Rent", fontface = "bold",
hjust = 0.2, vjust = -3, color = "#708090", show.legend = FALSE) +
scale_x_continuous(breaks = seq(0.5,2.5,0.5))+
labs(caption = "*Mean Rent within Certain Miles of Railway Stations*\nRent shows opposite trends beyond 1.5 miles far from railway stations,\nwhile that of TOD areas remains a low level.",
x = "Distance to Subway Stations", y = "Mean Rent") +
theme(legend.position = c(.1,.8),
plot.title = element_text(size = 18),
plot.subtitle = element_text(size = 10, face = "bold"),
axis.ticks.y=element_blank(),
strip.text.x = element_text(size = 10),
axis.title = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_line(),
panel.background = element_blank(),
panel.border = element_blank())
A straightforward way to evaluate investment potential is through rent trends. While rents in TOD areas have remained relatively low, rents in areas beyond 1.5 miles from transit stations show contrasting trends between 2012 and 2022. The rapid growth of rent indicates an increasing popularity far from public transit, which is consistent with common sense. High current average rent of $3501 in these non-TOD areas might seem appealing, but they come with significant risks: limited affordability, heightened competition, and the need for deep pockets to acquire and develop properties.
In contrast, TOD areas offer a more affordable entry point for developers. With government support for TOD projects and untapped growth potential, these areas present an appealing alternative. Would you rather compete in a saturated market for expensive land or secure a strategic position in TOD areas at a fraction of the cost before they gain widespread attention?
Reading Between the Lines
Code
diff <- allTracts.Summary %>%
# dplyr::select(-Rent, -Income) %>%
gather(Variable, Value, -year, -TOD) %>%
pivot_wider(names_from = TOD, values_from = Value) %>%
mutate(
diff = `Non-TOD` - TOD,
ymin = pmin(`Non-TOD`, TOD),
ymax = pmax(`Non-TOD`, TOD)
)
diff2 <- allTracts.Summary.Diff %>%
unite(year.TOD, year, TOD, sep = ": ", remove = T) %>%
gather(Variable, Value, -year.TOD) %>%
mutate(Value = round(Value, 2)) %>%
filter(year.TOD %in% c("2012: Difference(%)","2022: Difference(%)")) %>%
rename(year = year.TOD, difference = Value) %>%
mutate(year = case_when(
year == "2012: Difference(%)" ~ 2012,
year == "2022: Difference(%)" ~ 2022
))
diff3 <- diff %>% filter(Variable %in% c("Housing","Population")) %>%
mutate(color = case_when(
diff > 0 ~ "Less",
diff < 0 ~ "More"
))
plot <- allTracts.Summary %>%
# dplyr::select(-Rent, -Income) %>%
gather(Variable, Value, -year, -TOD)
ggplot(data = plot, aes(year, Value, , fill = TOD)) +
geom_bar(stat = "identity", position = "dodge", width = .5, alpha = 0.5) +
facet_wrap(~Variable, scales = "free", nrow = 2) +
geom_rect(
data = diff3,
aes(xmin = as.numeric(as.factor(year))-0.1,
xmax = as.numeric(as.factor(year)) + 0.1,
ymin = ymin, ymax = ymax, fill = color),
inherit.aes = FALSE, alpha = 0.4
) +
geom_line(
data = diff2 %>% filter(Variable == "Rent"),
aes(x = as.numeric(as.factor(year)),
y = difference*100 + 1500,
group = Variable), inherit.aes = FALSE,
color = "#0868ac", size = 1
) +
geom_line(
data = diff2 %>% filter(Variable == "Income"),
aes(x = as.numeric(as.factor(year)),
y = difference*100 + 55000,
group = Variable), inherit.aes = FALSE,
color = "#0868ac", size = 1
) +
geom_text(
data = diff2 %>% filter(Variable == "Rent"),
aes(x = as.numeric(as.factor(year)),
y = difference*100 + 1500,
label = paste0(difference, "%")), inherit.aes = FALSE,
color = "#0868ac", vjust = -0.5
) +
geom_text(
data = diff2 %>% filter(Variable == "Income"),
aes(x = as.numeric(as.factor(year)),
y = difference*100 + 55000,
label = paste0(difference, "%")), inherit.aes = FALSE,
color = "#0868ac", vjust = -0.5
) +
scale_fill_manual(values = c("Less" = "#0D9276", "More" = "#0868ac",
"Non-TOD" = "#bae4bc", "TOD" = "#43a2ca")) +
scale_y_continuous(expand = expansion(0))+
labs(caption = "*Housing & Population | Income & Rent Differences*\nPopulation delinces a lot but housing units are higher in TOD areas;\nthe differences of income and rent between TOD and non-TOD areas are getting smaller.") +
theme(legend.position = "top",
legend.title = element_blank(),
plot.title = element_text(size = 18),
plot.subtitle = element_text(size = 10, face = "bold"),
axis.ticks.y=element_blank(),
strip.text.x = element_text(size = 10),
axis.title = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_line(),
panel.background = element_blank(),
panel.border = element_blank(),
strip.background = element_rect(fill = "transparent", color = "transparent"))
Digging deeper into the data, two trends in TOD areas stand out:
1. Population and Housing Shifts:
Between 2012 and 2022, TOD areas experienced a decrease in both housing units and population, mirroring the decline seen in some urban areas. However, the population decline outpaced the reduction in housing units, indicating a shift toward smaller households. This means more single or two-person households are moving into TOD areas—a prime demographic for developers to target.
2. Income and Rent Trends:
Non-TOD areas have historically attracted higher-income households, but the income gap between TOD and non-TOD areas is narrowing due to the percentage of difference. Some high-income households are beginning to migrate to TOD areas, signaling their growing appeal. As income levels rise in these zones, rent growth is likely to follow, creating an opportunity for long-term value appreciation.
Avoid the Tourist Traps
Code
# Create bounding box using st_bbox
bbox_coords <- st_bbox(tracts12)
# Adjust the bounding box based on your requirements (here 50% and 80% of xmax, ymax)
xmin <- 0.88 * bbox_coords["xmax"]
xmax <- 0.97 * bbox_coords["xmax"]
ymin <- 0.8 * bbox_coords["ymax"]
ymax <- 0.93 * bbox_coords["ymax"]
# Create a polygon representing the rectangle (bounding box)
bbox_polygon <- st_sfc(st_polygon(list(matrix(c(
xmin, ymin, # bottom-left corner
xmax, ymin, # bottom-right corner
xmax, ymax, # top-right corner
xmin, ymax, # top-left corner
xmin, ymin # close the polygon
), ncol = 2, byrow = TRUE))), crs = st_crs(tracts12))
ggplot() +
geom_sf(data = tracts12, fill = "grey95", color = "white")+
geom_sf(data = tracts22, fill = "grey95", color = "white")+
geom_sf(data = bbox_polygon,
color = NA, fill = "#0868ac", alpha=.3, linetype = "dashed") +
geom_sf(data = Muni_centroids, aes(size = q5(Pop),color = q5(Pop))) +
scale_color_manual(values = scales::alpha(palette5, 0.3)) +
scale_size_manual(values = c(0.5,1.5,2.5,3.5,4.5)) +
geom_text(data = data.frame(year = "2022", x = xmin, y = ymax, label = "Fisherman’s Wharf, Embarcadero,\nWashington Square ..."),
aes(x = x, y = y, label = label),
color = "#0868ac", size = 4, fontface = "bold", vjust = -0.5, hjust = .65
) +
geom_segment(data = data.frame(year = "2022", x = xmin, y = ymax),
aes(x = xmin+2000, xend = xmin-8000, y = ymax-5000, yend = ymax), color = "#0868ac", size = 0.8) +
geom_segment(data = data.frame(year = "2012", x = xmax, y = ymax),
aes(x = xmax-8000, xend = xmax-3000, y = ymax-2000, yend = ymax+5000), color = "#0868ac", size = 0.8) +
geom_segment(data = data.frame(year = "2012", x = xmax, y = ymax),
aes(x = xmax+12000, xend = xmax-3000, y = ymax+5000, yend = ymax+5000), color = "#0868ac", size = 0.8) +
facet_wrap(~year) +
labs(caption= "*Population within TOD Areas*\nThe number of the residents in tourism spots decreased significantly from 2012 to 2022.")+
theme(legend.position = "none",
legend.background = element_blank(),
legend.title = element_blank(),
plot.title = element_text(size = 18),
strip.text.x = element_text(size = 10),
axis.title = element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
panel.background = element_blank(),
plot.subtitle = element_text(size = 10, face = "bold"),
strip.background = element_rect(fill = "transparent", color = "transparent"))
Investing in TOD areas really sounds like a great deal, isn’t it? But remember to avoid regions dominated by tourism when you choose available land. The population in the northeastern part of San Francisco declined significantly from 2012 to 2022, where many popular tourist attractions are located, such as Fisherman’s Wharf, Embarcadero, and Washington Square. The thriving tourism industry drives away local residents, which is not suitable for developing houses but hotels or short-term rentals.
Why TOD Makes Sense
For housing developers seeking untapped potential, TOD areas offer an attractive combination of affordability, demographic shifts, and future growth prospects. While non-TOD areas currently command higher rents and greater attention, the low cost of entry in TOD zones, coupled with rising interest from higher-income households, sets the stage for a smart, forward-looking investment.
So, is downtown really in decline? Or is this the perfect moment to seize the opportunity in TOD areas before they gain widespread attention? The answer lies in choosing wisely—and acting quickly.