· 5 years ago · Jun 16, 2020, 02:40 PM
1library(tidycensus)
2library(tidyverse)
3library(cowplot)
4library(viridis)
5library(scales)
6library(sf)
7library(tigris)
8library(rgeos)
9library(geosphere)
10library(gridExtra)
11
12### Request your Census API key here:
13# https://api.census.gov/data/key_signup.html
14#
15api_key_census <- "INSERT_YOUR_CENSUS_API_KEY_HERE"
16census_api_key(api_key_census, install = TRUE)
17
18### How to search Census for series (Painful!!!)
19# vars <- load_variables(2018, "acs5", cache = TRUE)
20# search <- vars %>% filter(grepl("TOTAL POPULATION", concept))
21
22#1 B02001_001 Estimate!!Total RACE
23#2 B02001_002 Estimate!!Total!!White alone RACE
24#3 B02001_003 Estimate!!Total!!Black or African American alone RACE
25#4 B02001_004 Estimate!!Total!!American Indian and Alaska Native alone RACE
26#5 B02001_005 Estimate!!Total!!Asian alone RACE
27#6 B02001_006 Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone RACE
28#7 B02001_007 Estimate!!Total!!Some other race alone RACE
29#8 B02001_008 Estimate!!Total!!Two or more races RACE
30#9 B02001_009 Estimate!!Total!!Two or more races!!Two races including Some other race RACE
31#10 B02001_010 Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races RACE
32
33### African-American
34my_demographic <- c("B02001_003")
35my_demographic_txt <- "African Americans"
36
37### Hispanics
38#my_demographic <- c("B03002_012")
39#my_demographic_txt <- "Hispanics"
40
41### Non-Hispanic whites
42#my_demographic <- c("B03002_003")
43#my_demographic_txt <- "Non-Hispanics Whites"
44
45### Native American
46#my_demographic <- c("B02001_004")
47#my_demographic_txt <- "Native American"
48
49### Asian
50#my_demographic <- c("B02001_005")
51#my_demographic_txt <- "Asian"
52
53### FIPS code for Syracuse, NY
54my_states <- c("36")
55my_counties <- c("067")
56
57### Pull tract area so we can compute population density
58area_tract <-
59 tracts(year = 2018,
60 state = my_states,
61 cb = TRUE,
62 class = "sf",
63 progress_bar = FALSE) %>%
64 mutate(area = ALAND / 2589988) %>%
65 select(GEOID, STATEFP, COUNTYFP, area) %>%
66 st_drop_geometry() %>%
67 filter(COUNTYFP %in% my_counties) %>%
68 select(GEOID, area)
69
70### Pull the demographic population of your chosen subset
71subset_pop <-
72 get_acs(geography = "tract",
73 variables = my_demographic,
74 state = my_states,
75 county = my_counties,
76 year = 2018,
77 geometry = TRUE,
78 cache_table = TRUE) %>%
79 select(GEOID, estimate) %>%
80 rename(subset_pop = estimate)
81
82### Pull the total population
83total_pop <-
84 get_acs(geography = "tract",
85 variables = c("B02001_001"),
86 state = my_states,
87 county = my_counties,
88 year = 2018,
89 geometry = FALSE,
90 cache_table = TRUE) %>%
91 select(GEOID, estimate) %>%
92 rename(total_pop = estimate) %>%
93 left_join(area_tract, by = "GEOID") %>%
94 mutate(pop_density = total_pop / area)
95
96### Pull median housing costs per month
97housing_costs <-
98 get_acs(geography = "tract",
99 variables = c("B25105_001"),
100 state = my_states,
101 county = my_counties,
102 year = 2018,
103 geometry = TRUE,
104 cache_table = TRUE) %>%
105 select(GEOID, estimate) %>%
106 rename(housing_costs = estimate)
107
108### Pull median income
109median_income <-
110 get_acs(geography = "tract",
111 variables = c("B19013_001"),
112 state = my_states,
113 county = my_counties,
114 year = 2018,
115 geometry = FALSE,
116 cache_table = TRUE) %>%
117 select(GEOID, estimate) %>%
118 rename(income = estimate)
119
120### Determine distance of tract from center of city, to see if housing costs depends on distance to center.
121### Lat/lon are for Nashville TN.
122center_lat <- 36.16587
123center_lon <- -86.78434
124tract_centers <- housing_costs$geometry %>% as_Spatial() %>% gCentroid(byid = TRUE)
125dst <- tibble(tract_centers$x, tract_centers$y) %>% rename(lon = "tract_centers$x", lat = "tract_centers$y")
126ctr <- c(center_lon, center_lat)
127dist <- distVincentyEllipsoid(ctr, dst, a=6378137, b=6356752.3142, f=1/298.257223563) / 1609.34
128housing_costs$distance_to_center <- dist
129
130map_housing_to_income <-
131 housing_costs %>%
132 left_join(median_income, by = "GEOID") %>%
133 mutate(housing_per = (12 * housing_costs) / income) #%>%
134 #mutate(housing_per = ifelse(housing_per < 0.28, NA, housing_per))
135
136map_subset_per_pop <-
137 subset_pop %>%
138 left_join(total_pop, by = "GEOID") %>%
139 mutate(subset_per = subset_pop / total_pop)
140
141data_combined <-
142 (subset_pop %>% st_drop_geometry()) %>%
143 left_join(total_pop, by = "GEOID") %>%
144 left_join((housing_costs %>% st_drop_geometry()), by = "GEOID") %>%
145 left_join(median_income, by = "GEOID") %>%
146 mutate(housing_per = (12 * housing_costs) / income) %>%
147 mutate(subset_per = subset_pop / total_pop) %>%
148 as_tibble()
149
150areawater <-
151 area_water(state = my_states,
152 county = my_counties,
153 class = "sf")
154
155linearwater <-
156 linear_water(state = my_states,
157 county = my_counties,
158 class = "sf")
159
160roads <-
161 roads(state = my_states,
162 county = my_counties,
163 class = "sf")
164
165interstates <- roads %>% filter(grepl("^I-|State Rte 840|Winfield", FULLNAME))
166hwy <- roads %>% filter(RTTYP %in% c("C", "S", "U")) %>% filter(!grepl("^Old", FULLNAME))
167rivers <- linearwater %>% filter(grepl(" Riv$", FULLNAME))
168lakes <- areawater
169
170color_water <- "steelblue2"
171color_interstate <- "darkred"
172
173### Use this if you want to plot population density on an absolute scale
174graph_scale <-
175 scale_fill_viridis(direction = -1,
176 option = "inferno",
177 labels = percent_format(accuracy = 0.1))
178
179### Graph the counties
180p_subset_per_pop <-
181 ggplot() +
182 theme_void() +
183 theme(
184 legend.title = element_blank(),
185 legend.position = "right",
186 plot.title = element_text(hjust = 0.5),
187 ) +
188 labs(title = paste(my_demographic_txt, " as % of Population\n[ ",
189 my_demographic, " / B02001_001 ]", sep = "")) +
190 geom_sf(data = map_subset_per_pop, size = 0.05, alpha = 0.5,
191 aes(fill = subset_per)) +
192
193 geom_sf(data = areawater, size = 0.2, fill = color_water, color = color_water) +
194 geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
195 geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
196 #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
197
198 graph_scale
199print(p_subset_per_pop)
200
201### Graph the counties
202p_housing_to_income <-
203 ggplot() +
204 theme_void() +
205 theme(
206 legend.title = element_blank(),
207 legend.position = "right",
208 plot.title = element_text(hjust = 0.5),
209 ) +
210 labs(title = "Median Yearly Housing Costs as a percent of median Houshold Income exceeds 28%\n(12 * [B25105_001] / [B19013_001] > 0.28)") +
211 geom_sf(data = map_housing_to_income, size = 0.05, alpha = 0.5,
212 aes(fill = housing_per)) +
213
214 geom_sf(data = lakes, size = 0.2, fill = color_water, color = color_water) +
215 geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
216 geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
217 #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
218
219 graph_scale
220
221fit <- lm(housing_per ~ subset_per * pop_density * income * distance_to_center, data = data_combined)
222predict_fit <- 0.282728 -0.005900 * data_combined$distance_to_center
223
224p_fit <-
225 ggplot(data = data_combined, aes(y = housing_per, x = subset_per)) +
226 theme_classic() +
227 geom_point(shape = 19, size = 0.5, color = "black") +
228 geom_smooth(method = "lm", formula = y ~ x) +
229 geom_hline(yintercept = 0.28, alpha = 0.5, linetype = "dotted") +
230 labs(
231 x = paste(my_demographic_txt, " % of Population", sep = ""),
232 y = "Median Housing to Income Ratio") +
233 scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
234 scale_y_continuous(labels = scales::percent_format(accuracy = 1))
235
236plot_list <- list(p_subset_per_pop, p_housing_to_income, p_fit)
237
238layout_list <- rbind(
239 c(1, 2),
240 c(1, 2),
241 c(1, 2),
242 c(3, 3),
243 c(3, 3)
244)
245
246charts <-
247 grid.arrange(grobs = plot_list,
248 layout_matrix = layout_list,
249 ncols = 2)