Malaria Data Visualization with World and Africa Maps included

Wed, Sep 8, 2021 5-minute read

The dataset for this blog post is from TidyTuesday about malaria situation across the world from different years. In the developed countries, this disease does not exist, yet in the developing countries across the world, especially in Africa, Malaria is still prevalent in some regions. Through this blog post, we would like to dive into the data, evaluating how malaria changed in different years.

library(tidyverse)
library(countrycode)
library(malariaAtlas)
theme_set(theme_bw())

Incidence Rate

malaria_inc <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-11-13/malaria_inc.csv") %>%
  rename(incidence_per_thousand = "Incidence of malaria (per 1,000 population at risk) (per 1,000 population at risk)", country = "Entity")

malaria_inc <- janitor::clean_names(malaria_inc)

head(malaria_inc)
## # A tibble: 6 x 4
##   country     code   year incidence_per_thousand
##   <chr>       <chr> <dbl>                  <dbl>
## 1 Afghanistan AFG    2000              107.     
## 2 Afghanistan AFG    2005               46.5    
## 3 Afghanistan AFG    2010               23.9    
## 4 Afghanistan AFG    2015               23.6    
## 5 Algeria     DZA    2000                0.0377 
## 6 Algeria     DZA    2005                0.00202
top_30_countries <- malaria_inc %>%
  group_by(country) %>%
  summarize(avg_inc = mean(incidence_per_thousand)) %>%
  arrange(desc(avg_inc)) %>%
  head(30) %>%
  select(country) %>%
  pull()
malaria_inc %>%
  filter(country %in% top_30_countries) %>%
  mutate(
    country = fct_reorder(country, incidence_per_thousand, mean)
  ) %>%
  ggplot(aes(incidence_per_thousand, country, fill = country)) +
  geom_col(show.legend = F) +
  geom_text(aes(label = round(incidence_per_thousand, 1), color = country), check_overlap = TRUE, hjust = 0.01) +
  theme(
    strip.text = element_text(size = 12, face = "bold"),
    legend.position = "none"
  ) +
  facet_wrap(~year) +
  labs(x = "incidence per thousand", y = NULL, title = "Top 30 Countries with Highest Average Malaria Incidence Rate")

malaria_inc %>%
  group_by(country) %>%
  summarize(delta = max(incidence_per_thousand) - min(incidence_per_thousand)) %>%
  arrange(desc(delta)) %>%
  head(30) %>%
  mutate(country = fct_reorder(country, delta)) %>%
  ggplot(aes(delta, country)) +
  geom_col() +
  geom_text(aes(label = round(delta, 1), color = country), check_overlap = TRUE, hjust = 0.01) +
  labs(x = "change in incidence per thousand", y = NULL, title = "Top 30 Countries in terms of the Largest Incidence Rate Change") +
  theme(
    legend.position = "none"
  )

The data points from Turkey are suspicious, as there were 1741 incidences per 1000 residents. How can we interpret that?

malaria_inc %>%
  filter(country == "Turkey")
## # A tibble: 4 x 4
##   country code   year incidence_per_thousand
##   <chr>   <chr> <dbl>                  <dbl>
## 1 Turkey  TUR    2000                  1741 
## 2 Turkey  TUR    2005                   296.
## 3 Turkey  TUR    2010                     0 
## 4 Turkey  TUR    2015                     0

The World Map

mean_world_map <- map_data("world") %>%
  group_by(region) %>%
  summarize(avg_long = mean(long), avg_lat = mean(lat)) %>%
  ungroup()


map_data("world") %>%
  left_join(malaria_inc, by = c("region" = "country")) %>%
  left_join(mean_world_map, by = "region") %>%
  filter(incidence_per_thousand > 0) %>%
  ggplot(aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = incidence_per_thousand)) +
    geom_text(aes(avg_long, avg_lat, label = if_else(incidence_per_thousand > 0, region, NULL), group = group), color = "red", check_overlap = TRUE) +
  labs(fill = "incidence per thousand", title = "The World Map with Incidence Rate among Countries Having Malaria") +
  theme_void() +
  theme(
    strip.text = element_text(size = 15)
  ) +
  facet_wrap(~year)  

Death Rate

malaria_dea <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-11-13/malaria_deaths.csv") %>%
  rename(death_per_100thousand = `Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate) (per 100,000 people)`, country = Entity) 

malaria_dea <- janitor::clean_names(malaria_dea)

head(malaria_dea) 
## # A tibble: 6 x 4
##   country     code   year death_per_100thousand
##   <chr>       <chr> <dbl>                 <dbl>
## 1 Afghanistan AFG    1990                  6.80
## 2 Afghanistan AFG    1991                  6.97
## 3 Afghanistan AFG    1992                  6.99
## 4 Afghanistan AFG    1993                  7.09
## 5 Afghanistan AFG    1994                  7.39
## 6 Afghanistan AFG    1995                  7.41
malaria_dea %>%
  group_by(country) %>%
  summarize(avg_inc = mean(death_per_100thousand)) %>%
  arrange(desc(avg_inc)) %>%
  head(30) %>%
  select(country) %>%
  pull()
##  [1] "Sierra Leone"                 "Burkina Faso"                
##  [3] "Uganda"                       "Equatorial Guinea"           
##  [5] "Cote d'Ivoire"                "Nigeria"                     
##  [7] "Niger"                        "Democratic Republic of Congo"
##  [9] "Burundi"                      "Mali"                        
## [11] "Western Sub-Saharan Africa"   "Cameroon"                    
## [13] "Mozambique"                   "Guinea"                      
## [15] "Central Sub-Saharan Africa"   "Liberia"                     
## [17] "Togo"                         "Malawi"                      
## [19] "Ghana"                        "Sub-Saharan Africa"          
## [21] "Benin"                        "Central African Republic"    
## [23] "Congo"                        "Tanzania"                    
## [25] "Low SDI"                      "Senegal"                     
## [27] "Gabon"                        "Rwanda"                      
## [29] "Eastern Sub-Saharan Africa"   "Zambia"

Worldwide malaria death in 1990

map_data("world") %>%
  left_join(malaria_dea, by = c("region" = "country")) %>%
  left_join(mean_world_map, by = "region") %>%
  filter(year == 1990, death_per_100thousand > 0) %>%
  ggplot(aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = death_per_100thousand)) +
    geom_text(aes(avg_long, avg_lat, label = if_else(death_per_100thousand > 0, region, NULL), group = group), color = "red", check_overlap = TRUE) +
  labs(fill = "death per 100 thousand", title = "The World Map with Death Rate among Countries Having Malaria in 1990") +
  theme_void() 

Worldwide malaria death in 2015

map_data("world") %>%
  left_join(malaria_dea, by = c("region" = "country")) %>%
  left_join(mean_world_map, by = "region") %>%
  filter(year == 2015, death_per_100thousand > 0) %>%
  ggplot(aes(x = long, y = lat, group = group)) +
    geom_polygon(aes(fill = death_per_100thousand)) +
    geom_text(aes(avg_long, avg_lat, label = if_else(death_per_100thousand > 0, region, NULL), group = group), color = "red", check_overlap = TRUE) +
  labs(fill = "death per 100 thousand", title = "The World Map with Death Rate among Countries Having Malaria in 2015") +
  theme_void() 

The African Continent

The idea and part of the code below is inspired by David Robinson’s code.

map_data("world") %>%
  left_join(malaria_dea, by = c("region" = "country")) %>%
  left_join(mean_world_map, by = "region") %>% 
  mutate(continent = countrycode(code, "iso3c", "continent")) %>%
  filter(continent == "Africa", year == 1990, death_per_100thousand > 0) %>% 
  ggplot(aes(long, lat, group = group, fill = death_per_100thousand)) +
  geom_polygon() +
  scale_fill_gradient2(low = "blue", high = "red", midpoint = 100) +
  theme_void() +
  geom_text(aes(avg_long, avg_lat, label = if_else(death_per_100thousand > 0, region, NULL), group = group), color = "red", check_overlap = TRUE) +
  labs(fill = "death per 100 thousand", title = "African Malaria Deaths in 1990")

map_data("world") %>%
  left_join(malaria_dea, by = c("region" = "country")) %>%
  left_join(mean_world_map, by = "region") %>% 
  mutate(continent = countrycode(code, "iso3c", "continent")) %>%
  filter(continent == "Africa", year == 2015, death_per_100thousand > 0) %>% 
  ggplot(aes(long, lat, group = group, fill = death_per_100thousand)) +
  geom_polygon() +
  scale_fill_gradient2(low = "blue", high = "red", midpoint = 100) +
  theme_void() +
  geom_text(aes(avg_long, avg_lat, label = if_else(death_per_100thousand > 0, region, NULL), group = group), color = "red", check_overlap = TRUE) +
  labs(fill = "death per 100 thousand", title = "African Malaria Deaths in 2015")

Malaria in China

chn_pr <- tbl_df(malariaAtlas::getPR(ISO = "CHN", species = "BOTH")) %>%
  filter(!is.na(pr))
chn_pr %>%
  mutate(decade = 10 * floor(year_start/10)) %>% 
  ggplot(aes(longitude, latitude, color = pr)) +
  borders("world", regions = "China") +
  geom_point() +
  facet_wrap(~decade) +
  theme_void() +
  coord_map() +
  labs(color = "Prevalence",title = "Malaria in China") +
  scale_color_continuous(labels = scales::percent)