Volcano Eruption Data Visualization
Wed, Jan 5, 2022
3-minute read
This blog post will analyze Volcano datasets provided by TidyTuesday (here is the link).
library(tidyverse)
library(lubridate)
theme_set(theme_bw())
volcano <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/volcano.csv')
eruptions <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/eruptions.csv')%>%
mutate(start_time = make_date(start_year, start_month, start_day),
end_time = make_date(end_year, end_month, end_day),
eruption_days = as.integer(end_time - start_time))
events <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/events.csv')
tree_rings <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/tree_rings.csv')
sulfur <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/sulfur.csv')
Making a map on the valcano locations
eruptions %>%
ggplot(aes(longitude, latitude, color = eruption_category)) +
geom_point(aes(size = as.integer(eruption_days))) +
geom_text(aes(label = volcano_name), hjust = 1, vjust = 1, check_overlap = T) +
borders("world") +
theme_void() +
theme(plot.title = element_text(size = 18)) +
labs(size = "days of eruption",
color = "eruption category",
title = "Volcanos and their eruption info")
When did eruptions happen the most?
eruptions %>%
count(start_year) %>%
filter(start_year > 0) %>%
ggplot(aes(start_year, n)) +
geom_point() +
geom_smooth(se = F) +
labs(x = "eruption year",
y = "# of eruptions")
It seems like the volcano activities have taken place much more in the last 500 years.
eruptions %>%
count(start_year) %>%
filter(start_year > 1500) %>%
ggplot(aes(start_year, n)) +
geom_point() +
geom_smooth(se = F) +
labs(x = "eruption year",
y = "# of eruptions")
I am not sure if the increased number of volcano eruptions had anything to do with industry revoluation.
Join volcano
and eruptions
together
eruptions_volcano_joined <- eruptions %>%
left_join(volcano, by = c("volcano_number", "volcano_name", "latitude", "longitude"))
eruptions_volcano_joined %>%
mutate(subregion = fct_lump(subregion, n = 20),
country = fct_lump(country, n = 10)) %>%
filter(subregion != "Other") %>%
mutate(subregion = fct_reorder(subregion, elevation, median)) %>%
ggplot(aes(elevation, subregion, fill = country)) +
geom_boxplot() +
labs(title = "Top 20 subregions' volcano elevation")
The most historically active volcanos
eruptions_volcano_joined %>%
mutate(country = fct_lump(country, 10)) %>%
group_by(volcano_name, country) %>%
summarize(n = n(),
sum_days = sum(eruption_days, na.rm = T)) %>%
arrange(desc(sum_days)) %>%
ggplot(aes(n, sum_days, color = country)) +
geom_point() +
geom_text(aes(label = volcano_name), hjust = 1, vjust = 1, check_overlap = T) +
scale_y_log10(label = scales::comma) +
scale_x_log10() +
labs(x = "# of eruptions",
y = "# of eruption days in total")
Working on tree_rings
tree_rings %>%
pivot_longer(cols = c(n_tree, europe_temp_index)) %>%
ggplot(aes(year, value, color = name)) +
geom_line() +
geom_smooth() +
facet_wrap(~name, ncol = 1) +
theme(legend.position = "none")
Working on sulfur
sulfur %>%
pivot_longer(cols = c(neem, wdc)) %>%
ggplot(aes(year, value, color = name)) +
geom_line() +
scale_y_log10() +
facet_wrap(~name, ncol = 1) +
labs(color = "variable") +
theme(legend.position = "none")
Working on events
events %>%
mutate(event_type = str_remove(event_type, "\\s?\\(.+")) %>%
count(volcano_name, event_type, sort = T) %>%
head(100) %>%
mutate(volcano_name = fct_reorder(volcano_name, n, sum)) %>%
ggplot(aes(n, volcano_name, fill = event_type)) +
geom_col() +
labs(x = "# of eruptions",
y = "volcano",
fill = "event type",
title = "Volcanos and their event types")