Broadband Data Visualization with zipcodeR and tigris Used
Mon, Apr 4, 2022
4-minute read
This blog post will analyze the broadband availability and usage situation across the U.S. per county. This is my first time using zipcodeR
and tigris
in a blog post, and this is also my first time making a U.S. map on county-level. Thanks TidyTuesday for providing the datasets.
library(tidyverse)
library(geofacet)
library(scales)
library(zipcodeR)
library(tigris)
library(sf)
theme_set(theme_bw())
broadband <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-11/broadband.csv') %>%
janitor::clean_names() %>%
left_join(
tibble(st = state.abb,
state = state.name),
by = "st"
) %>%
rename(county = county_name,
bb_avail = broadband_availability_per_fcc,
bb_usage = broadband_usage) %>%
mutate(county = str_remove(county, "\\s.+$"),
across(bb_avail:bb_usage, as.numeric),
county_id = sprintf("%05d", county_id)) %>%
filter(!is.na(state))
bb_zip <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-11/broadband_zip.csv") %>%
janitor::clean_names() %>%
rename(county = county_name) %>%
left_join(
tibble(st = state.abb,
state = state.name),
by = "st"
) %>%
mutate(postal_code = sprintf("%05d", postal_code))
broadband
## # A tibble: 3,142 x 6
## st county_id county bb_avail bb_usage state
## <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 AL 01001 Autauga 0.81 0.28 Alabama
## 2 AL 01003 Baldwin 0.88 0.3 Alabama
## 3 AL 01005 Barbour 0.59 0.18 Alabama
## 4 AL 01007 Bibb 0.29 0.07 Alabama
## 5 AL 01009 Blount 0.69 0.09 Alabama
## 6 AL 01011 Bullock 0.06 0.05 Alabama
## 7 AL 01013 Butler 0.78 0.11 Alabama
## 8 AL 01015 Calhoun 0.93 0.32 Alabama
## 9 AL 01017 Chambers 0.82 0.34 Alabama
## 10 AL 01019 Cherokee 0.99 0.1 Alabama
## # ... with 3,132 more rows
county_map <- state.name %>%
tibble(state = .) %>%
mutate(state = str_to_lower(state)) %>%
mutate(map_data = list(map_data("county", state))) %>%
unnest(map_data) %>%
select(-region) %>%
rename(county = subregion) %>%
mutate(across(c(state, county), str_to_title))
county_map
## # A tibble: 4,396,950 x 6
## state long lat group order county
## <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 Alabama -86.5 32.3 1 1 Autauga
## 2 Alabama -86.5 32.4 1 2 Autauga
## 3 Alabama -86.5 32.4 1 3 Autauga
## 4 Alabama -86.6 32.4 1 4 Autauga
## 5 Alabama -86.6 32.4 1 5 Autauga
## 6 Alabama -86.6 32.4 1 6 Autauga
## 7 Alabama -86.6 32.4 1 7 Autauga
## 8 Alabama -86.6 32.4 1 8 Autauga
## 9 Alabama -86.6 32.4 1 9 Autauga
## 10 Alabama -86.6 32.4 1 10 Autauga
## # ... with 4,396,940 more rows
Broadband usage and availability usage per state:
broadband %>%
ggplot(aes(bb_avail, bb_usage, color = st)) +
geom_point(alpha = 0.3, size = 1, show.legend = F) +
facet_geo(~st) +
scale_x_continuous(labels = percent) +
scale_y_continuous(labels = percent) +
theme(axis.text.x = element_text(angle = 90),
panel.grid = element_blank()) +
labs(x = "broadband availability",
y = "broadband usage",
title = "Broadband Availability and Usage Per State Per County")
broadband %>%
rename(`Broadband Availability` = bb_avail,
`Broadband Usage` = bb_usage) %>%
pivot_longer(4:5, names_to = "metric") %>%
ggplot(aes(value, state, fill = state, color = state)) +
geom_boxplot(alpha = 0.4) +
facet_wrap(~metric) +
theme(legend.position = "none",
panel.grid = element_blank()) +
labs(x = NULL,
y = NULL,
title = "Broadband Availability and Usage Per State") +
scale_x_continuous(labels = percent)
county_map %>%
inner_join(broadband %>%
select(-st, -county_id),
by = c("state", "county")) %>%
ggplot(aes(long, lat, group = group)) +
geom_polygon(aes(fill = bb_usage)) +
coord_map() +
theme_void() +
scale_fill_gradient(high = "green",
low = "red",
labels = percent) +
labs(fill = "broadband usage",
title = "Broadband Usage per County")
county_map %>%
inner_join(broadband %>%
select(-st, -county_id),
by = c("state", "county")) %>%
ggplot(aes(long, lat, group = group)) +
geom_polygon(aes(fill = bb_avail)) +
coord_map() +
theme_void() +
scale_fill_gradient(high = "green",
low = "red",
labels = percent) +
labs(fill = "broadband availability",
title = "Broadband Availability per County")
The percentage of broadband availability is generally larger than its of broadband usage across the U.S.
Broandband availability per county:
counties_sf <- counties()
counties_sf %>%
inner_join(broadband %>%
filter(!st %in% c("HI", "AK")),
by = c("GEOID" = "county_id")) %>%
st_simplify(dTolerance = .1) %>%
ggplot() +
geom_sf(aes(fill = bb_avail)) +
ggthemes::theme_map() +
coord_sf() +
scale_fill_gradient2(
high = "green",
low = "red",
mid = "pink",
midpoint = 0.25,
labels = percent
) +
labs(fill = "broadband availability",
title = "Broandband Availability per County")
Broandband usage per county:
counties_sf %>%
inner_join(broadband %>%
filter(!st %in% c("HI", "AK")),
by = c("GEOID" = "county_id")) %>%
st_simplify(dTolerance = .1) %>%
ggplot() +
geom_sf(aes(fill = bb_usage)) +
ggthemes::theme_map() +
coord_sf() +
scale_fill_gradient2(
high = "green",
low = "red",
mid = "pink",
midpoint = 0.25,
labels = percent
) +
labs(fill = "broadband usage",
title = "Broandband Uage per County")