Women in the Workforce Data Visualization

Tue, Oct 12, 2021 3-minute read

The three datasets this post analyzes come from TidyTuesday.

library(tidyverse)
library(scales)
library(tidytext)
jobs_gender <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
earnings_female <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/earnings_female.csv") 
employed_gender <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/employed_gender.csv") 

Female salary percent of male salary

earnings_female %>%
  mutate(group = str_replace(group, " years", "")) %>%
  mutate(group = str_remove(group, ", .+"),
         group = fct_reorder(group, -percent, sum)) %>%
  #filter(group != "Total") %>%
  ggplot(aes(Year, percent, color = group)) +
  geom_point() +
  geom_line(size = 1) +
  scale_y_continuous(labels = percent_format(scale = 1)) +
  scale_x_continuous(breaks = seq(1980, 2010, 5)) +
  labs(x = NULL,
       y = "Female salary percent of male salary",
       title  = "Female Salary Percent of Male Salary among Various Age Groups",
       subtitle = "Total group refers to 16 years and older")

Working Status among Male and Female

employed_gender %>%
  pivot_longer(-year, names_to = "work_type", values_to = "percent") %>%
  mutate(work_type = str_replace_all(work_type, "_", " "),
         work_type = fct_reorder(work_type, -percent, sum)) %>%
  ggplot(aes(year, percent, color = work_type)) +
  geom_point() +
  geom_line(size = 1) +
  scale_y_continuous(labels = percent_format(scale = 1)) +
  scale_x_continuous(breaks = seq(1965, 2020, 5)) +
  labs(x = NULL,
       y = "Percent of ",
       title  = "Percentage of People of Working Full time or Part Time",
       subtitle = "total means total employed people working either full time or part time")

Minor category female percentage and wage percent of male

jobs_gender %>%
  pivot_longer(cols = contains("percent"), names_to = "type", values_to = "percent") %>%
  group_by(minor_category, type, year) %>%
  summarize(avg_percent = mean(percent, na.rm = T)) %>%
  ungroup() %>%
  mutate(minor_category = fct_reorder(minor_category, -avg_percent, sum),
         type = str_replace_all(type, "_", " ")) %>%
  ggplot(aes(avg_percent, minor_category, fill = type)) +
  geom_col(position = "dodge") +
  scale_x_continuous(labels = percent_format(scale = 1), expand = c(0,1)) +
  theme(strip.text = element_text(size = 15, face = "bold")) +
  labs(x = "",
       y = "minor category",
       fill = NULL,
       title = "Minor Category Average Female Percentage and the Respective Wage Percent of Male") +
  expand_limits(x = 100) +
  facet_wrap(~year)

jobs_gender %>%
  group_by(year) %>%
  slice_max(total_earnings, n = 10) %>%
  ungroup() %>%
  mutate(occupation = reorder_within(occupation, total_earnings, year)) %>%
  ggplot(aes(total_earnings, occupation, fill = major_category)) +
  geom_col() +
  theme(strip.text = element_text(size = 15, face = "bold"),
        axis.title = element_text(size = 15),
        axis.text.y = element_text(size = 10),
        plot.title = element_text(size = 18)) +
  facet_wrap(~year, scales = "free") +
  scale_y_reordered() +
  scale_x_continuous(labels = dollar)+
  labs(x = "median total earnings",
       y = NULL,
       fill = "major category",
       title = "Top 10 Total Median Earning Occupation from 2013 to 2016")

jobs_gender %>%
  group_by(year, major_category, minor_category) %>%
  summarize_at(vars(contains("workers")), sum) %>% 
  ungroup() %>%
  inner_join(
    jobs_gender %>%
      group_by(year, major_category, minor_category) %>%
      summarize(avg_percent = mean(wage_percent_of_male, na.rm = T)/100),
    by = c("year", "major_category", "minor_category")
  ) %>% 
  mutate(minor_category = reorder_within(minor_category, workers_female, year)) %>%
  ggplot(aes(total_workers, minor_category, color = major_category)) +
  geom_pointrange(aes(x = workers_female, y = minor_category,
                      xmin = 0, xmax = total_workers,
                      size = avg_percent)) +
  facet_wrap(~year, scales = "free_y") +
  scale_y_reordered() +
  scale_size_continuous(range = c(0.1,1), labels = percent) +
  scale_x_continuous(labels = comma) +
  labs(
    x = "# of workers",
    y = "minor category",
    color = "major category",
    size = "average percent of male wage",
    title = "Category-wise # of Workers",
    subtitle = "The length of each line represents the total # of workers in each category\nThe position of dot refers to # of female workers\nThe size of dot refers to the average percent of male wage"
  ) +
  theme(
    strip.text = element_text(size = 15, face = "bold"),
    axis.title = element_text(size = 15),
    plot.title = element_text(size = 18)
  )