U.N. Votes Data Visualization
Mon, Mar 21, 2022
4-minute read
In this blog post, I will carry a slew of interesting data visualizations on the votes in the U.N. The datasets are from TidyTuesday. Also, this is my first time using my own package worlddatajoin
. You can download it from my Github by typing devtools::install_github("PursuitOfDataScience/worlddatajoin")
at the console.
library(tidyverse)
library(scales)
library(lubridate)
#devtools::install_github("PursuitOfDataScience/worlddatajoin")
library(worlddatajoin)
library(tidytext)
library(widyr)
theme_set(theme_bw())
unvotes <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/unvotes.csv')
roll_calls <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/roll_calls.csv') %>%
mutate(short = str_to_title(short),
descr = str_to_lower(descr))
issues <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-23/issues.csv') %>%
mutate(short_name = str_to_upper(short_name))
U.N. Amendment Votes:
unvotes %>%
group_by(rcid) %>%
summarize(avg_vote = mean(vote == "yes")) %>%
ungroup() %>%
inner_join(roll_calls %>% filter(amend == 1) %>% select(rcid, date),
by = "rcid") %>%
mutate(type = "Amendment") %>%
bind_rows(unvotes %>%
group_by(rcid) %>%
summarize(avg_vote = mean(vote == "yes")) %>%
ungroup() %>%
inner_join(roll_calls %>% filter(para == 1) %>% select(rcid, date),
by = "rcid") %>%
mutate(type = "Paragraph")) %>%
distinct(date, type, .keep_all = T) %>%
ggplot(aes(date, avg_vote, color = type)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = percent) +
scale_x_date(date_breaks = "5 years",
date_labels = "%Y") +
labs(x = NULL,
y = "approval pct",
color = NULL,
title = "U.N. Amendment Approal Percentage") +
facet_wrap(~type, ncol = 1) +
theme(legend.position = "none")
of U.N. calls:
roll_calls %>%
pivot_longer(amend:para) %>%
group_by(year = year(date), name) %>%
summarize(call_count = sum(value, na.rm = T)) %>%
ungroup() %>%
mutate(name = fct_reorder(name, -call_count, sum)) %>%
ggplot(aes(year, call_count, color = name)) +
geom_line(size = 1) +
labs(x = NULL,
y = "call count",
color = NULL,
title = "U.N. Call Count")
World map on coutries’ voting results:
map_joined <- unvotes %>%
group_by(country, country_code) %>%
summarize(pct_vote_yes = mean(vote == "yes"),
n = n()) %>%
ungroup() %>%
right_join(
worlddatajoin::world_data(2020) %>%
select(long, lat, group, region, iso2c, continent),
by = c("country_code" = "iso2c")
)
map_joined %>%
ggplot(aes(long, lat, group = group, fill = pct_vote_yes)) +
geom_polygon() +
theme_void() +
scale_fill_gradient2(high = "green",
low = "red",
mid = "pink",
midpoint = 0.5,
labels = percent) +
labs(fill = "pct voting yes",
title = "Average Pct of Voting Yes per Country")
map_joined %>%
filter(!is.na(continent)) %>%
distinct(country, pct_vote_yes, continent) %>%
mutate(continent = fct_reorder(continent, pct_vote_yes, na.rm = T)) %>%
ggplot(aes(pct_vote_yes, continent, fill = continent)) +
geom_boxplot(show.legend = F) +
scale_x_continuous(labels = percent) +
labs(x = "pct voting yes",
y = NULL,
title = "Voting Yes per Continent")
Average voting yes per continent
unvotes %>%
inner_join(issues, by = "rcid") %>%
inner_join(map_joined %>% distinct(country_code, continent), by = c("country_code")) %>%
group_by(issue, continent) %>%
summarize(avg_yes = mean(vote == "yes")) %>%
ungroup() %>%
mutate(issue = reorder_within(issue, avg_yes, continent)) %>%
ggplot(aes(avg_yes, issue, fill = issue)) +
geom_col(show.legend = F) +
scale_y_reordered() +
scale_x_continuous(labels = percent) +
facet_wrap(~continent, scales = "free_y") +
labs(x = "voting yes",
y = NULL,
title = "Average Voting Yes on Issues per Continent")
Most voted issues:
unvotes %>%
group_by(rcid) %>%
summarize(voted_yes = sum(vote == "yes"),
voted_no = sum(vote == "no")) %>%
ungroup() %>%
inner_join(
roll_calls,
by = "rcid"
) %>%
select(short, voted_yes, voted_no) %>%
mutate(total = voted_yes + voted_no,
pct_yes = voted_yes / total) %>%
filter(!is.na(short)) %>%
arrange(desc(total)) %>%
distinct(short, .keep_all = T) %>%
mutate(short = str_trunc(short, 30),
short = paste0(short, "(", total, ")")) %>%
head(20) %>%
mutate(short = fct_reorder(short, pct_yes)) %>%
ggplot(aes(pct_yes, short, fill = short)) +
geom_col(show.legend = F, position = "dodge") +
scale_x_continuous(labels = percent) +
labs(x = "pct voting yes",
y = NULL,
title = "Most Voted Issues",
subtitle = "Numbers refers to the votes")
The following ideas are inspired by David Robinson from his code.
Correlations:
country_cor <- unvotes %>%
mutate(vote_numeric = case_when(vote == "yes" ~ 1,
vote == "no" ~ -1,
TRUE ~ 0)) %>%
pairwise_cor(country, rcid, vote_numeric, sort = T)
country_cor
## # A tibble: 39,800 x 3
## item1 item2 correlation
## <chr> <chr> <dbl>
## 1 Slovakia Czechia 0.975
## 2 Czechia Slovakia 0.975
## 3 Slovakia Slovenia 0.965
## 4 Slovenia Slovakia 0.965
## 5 Lithuania Estonia 0.962
## 6 Estonia Lithuania 0.962
## 7 Lithuania Latvia 0.961
## 8 Latvia Lithuania 0.961
## 9 Bulgaria Hungary 0.957
## 10 Hungary Bulgaria 0.957
## # ... with 39,790 more rows
country_cor %>%
filter(item1 %in% c("United States", "China", "Japan", "Mexico", "France", "Israel")) %>%
group_by(item1, correlation > 0) %>%
slice_max(abs(correlation), n = 5) %>%
ungroup() %>%
mutate(item2 = reorder_within(item2, correlation, item1)) %>%
ggplot(aes(correlation, item2, fill = item1)) +
geom_col(show.legend = F) +
scale_y_reordered() +
facet_wrap(~item1, scales = "free_y") +
labs(y = NULL,
title = "How are contries correlated with others?") +
theme(strip.text = element_text(size = 15),
plot.title = element_text(size = 18))
Which countries are most disagreeable to other countries in the same continents?
country_cor %>%
inner_join(map_joined %>%
distinct(country, continent1 = continent),
by = c("item1" = "country")) %>%
inner_join(map_joined %>%
distinct(country, continent2 = continent),
by = c("item2" = "country")) %>%
filter(continent1 == continent2) %>%
group_by(item1, continent1) %>%
summarize(mean_cor = mean(correlation, na.rm = T),
median_cor = median(correlation)) %>%
ungroup() %>%
group_by(continent1) %>%
slice_min(mean_cor, n = 5) %>%
ungroup() %>%
mutate(item1 = reorder_within(item1, mean_cor, continent1)) %>%
ggplot(aes(mean_cor, item1, fill = continent1)) +
geom_col(show.legend = F) +
scale_y_reordered() +
facet_wrap(~continent1, scales = "free") +
theme(strip.text = element_text(size = 15),
plot.title = element_text(size = 18)) +
labs(x = "mean correlation",
y = "",
title = "5 Most Countries that Disagree with Neighbors")