African-American History Data Visualization (Graph & Word Cloud included)

Thu, Jan 13, 2022 5-minute read

This blog post is a little similar to the previous post I had (you can check it out here). The previous one is about the African-American achievements, but this one is about slave transportation, census data, African names, as well as some important historic events recorded. The data sets are from TidyTuesday.

library(tidyverse)
library(scales)
library(glue)
library(tidytext)
library(patchwork)
library(ggraph)
library(ggwordcloud)
theme_set(theme_bw())
blackpast <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-16/blackpast.csv')

census <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-16/census.csv')

slave_routes <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-16/slave_routes.csv')

african_names <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-16/african_names.csv')

U.S. and its subregion population in total

census %>%
  group_by(region, year) %>%
  summarize(total = sum(total)) %>%
  ungroup() %>%
  mutate(region = fct_reorder(region, -total, sum)) %>%
  ggplot(aes(year, total, color = region)) +
  geom_line(size = 1) +
  scale_y_log10(labels = comma) +
  scale_x_continuous(breaks = seq(min(census$year), max(census$year), 25)) +
  labs(y = "total population",
       title = "Total U.S. and subregion population")  

U.S. Population among various races

census %>%
  filter(region == "USA Total") %>%
  select(-division) %>%
  pivot_longer(cols = -c(region, year), names_to = "variable", values_to = "population") %>%
  filter(variable != "total") %>%
  mutate(variable = str_replace(variable, "_", " "),
         variable = str_to_title(variable),
         variable = fct_reorder(variable, -population, sum)) %>%
  ggplot(aes(year, population, color = variable)) +
  geom_line(size = 1) +
  scale_y_log10(labels = comma) +
  scale_x_log10(breaks = seq(min(census$year), max(census$year), 25)) +
  labs(color = NULL,
       title = "The total population across various races of the U.S.") +
  theme(strip.text = element_text(size = 13),
        plot.title = element_text(size = 15))

The U.S. population across four different regions

census %>%
  select(-division) %>%
  pivot_longer(cols = -c(region, year), names_to = "variable", values_to = "population") %>%
  filter(region != "USA Total",
         variable != "total") %>% 
  group_by(region, year, variable) %>%
  summarize(population = sum(population)) %>%
  ungroup() %>%
  mutate(variable = str_replace(variable, "_", " "),
         variable = str_to_title(variable)) %>%
  mutate(variable = fct_reorder(variable, -population, sum)) %>%
  ggplot(aes(year, population, color = variable)) +
  geom_line(size = 1) +
  facet_wrap(~region) +
  scale_x_log10(breaks = seq(min(census$year), max(census$year), 25)) +
  scale_y_log10(labels = comma) +
  labs(color = NULL,
       title = "The population across four regions of the U.S.") +
  theme(strip.text = element_text(size = 13),
        plot.title = element_text(size = 15))

set.seed(2022)
slave_routes %>%
  count(port_origin, port_arrival, sort = T) %>%
  filter(port_arrival != port_origin) %>%
  head(50) %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(width = n),
                 arrow = arrow(length = unit(4, 'mm'),
                               type = "open")) +
  geom_node_point() +
  geom_node_text(aes(label = name, color = name), hjust = 0.5, vjust = 1, check_overlap = T, size = 5) +
  scale_edge_width_continuous(range = c(1,2)) +
  theme_void() +
  guides(color = "none") +
  labs(edge_width = "# of transports",
       title = "How were slaves being transported between ports?") +
  theme(plot.title = element_text(size = 18))

Captains transporting slaves

captains <- slave_routes %>%
  rename(captain_name = captains_name) %>%
  separate_rows(captain_name, sep = "<br/> ") %>% 
  filter(!is.na(captain_name)) %>%
  separate(captain_name, c("last_name", "first_name"), sep = ", ") %>% 
  mutate(captain_full_name = paste(first_name, last_name)) 

captains %>%
  count(captain_full_name, sort = T) %>%
  head(20) %>%
  mutate(captain_full_name = fct_reorder(captain_full_name, n)) %>%
  ggplot(aes(n, captain_full_name)) +
  geom_col() +
  labs(x = "# of voyages",
       y = "captain name",
       title = "Top 20 captains on # of voyages of transporting slaves")

Captains transporting # of slaves

captains %>%
  group_by(captain_full_name) %>%
  summarize(n_slaves = sum(n_slaves_arrived)) %>%
  arrange(desc(n_slaves)) %>%
  head(20) %>%
  mutate(captain_full_name = fct_reorder(captain_full_name, n_slaves)) %>%
  ggplot(aes(n_slaves, captain_full_name)) +
  geom_col() +
  labs(x = "# of slaves being transported",
       y = "captain",
       title = "Top 20 captains transporting most of the slaves")

Total # of slaves being transported over the years

captains %>%
  group_by(year_arrival) %>%
  summarize(n_slaves = sum(n_slaves_arrived, na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(year_arrival, n_slaves)) +
  geom_line(size = 1, color = "pink") +
  scale_x_continuous(breaks = seq(1500, 1900, 50)) +
  labs(x = "arrival year",
       y = "# of slaves in total",
       title = glue("# of slaves arrived being transported from {min(captains$year_arrival)} to {max(captains$year_arrival)}"))

Working on african_names

Overview of age and height

age <- african_names %>%
  mutate(port_disembark = str_remove(port_disembark, " unspecified|,"),
         gender = fct_reorder(gender, age, median, na.rm = T)) %>%
  ggplot(aes(age, gender, fill = port_disembark)) +
  geom_boxplot() +
  labs(y = NULL,
       title = "Slave age and gender")


height <- african_names %>%
  mutate(port_disembark = str_remove(port_disembark, " unspecified|,"),
         gender = fct_reorder(gender, age, median, na.rm = T)) %>%
  ggplot(aes(height, gender, fill = port_disembark)) +
  geom_boxplot() +
  labs(y = NULL,
       title = "Slave height and gender")


age / height

Average age and height

african_names %>% 
  group_by(year_arrival, gender) %>%
  summarize(age = mean(age, na.rm = T),
            height = mean(height, na.rm = T)) %>%
  ungroup() %>%
  pivot_longer(cols = c("age", "height")) %>%
  filter(!is.na(gender)) %>%
  mutate(gender = fct_reorder(gender, -value, sum, na.rm = T)) %>%
  ggplot(aes(year_arrival, value, color = gender)) +
  geom_line(size = 1) +
  facet_wrap(~name) +
  theme(strip.text = element_text(size = 13),
        plot.title = element_text(size = 15)) +
  labs(x = "arrival year",
       y = NULL,
       color = NULL,
       title = "Average age and height among all slaves over the years")