Media Franchise Data Visualization
Thu, Nov 11, 2021
3-minute read
This blog post will analyze franchise revenue, and the data is from TidyTuesday Data Science online learning community. You can get the data from this link.
library(tidyverse)
library(tidytext)
library(scales)
media <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-07-02/media_franchises.csv") %>%
mutate(original_media = str_to_title(original_media),
revenue_category = str_to_title(revenue_category))
media
## # A tibble: 321 x 7
## franchise revenue_category revenue year_created original_media creators
## <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 A Song of I~ Book Sales 0.9 1996 Novel George R.~
## 2 A Song of I~ Box Office 0.001 1996 Novel George R.~
## 3 A Song of I~ Home Video/Enter~ 0.28 1996 Novel George R.~
## 4 A Song of I~ Tv 4 1996 Novel George R.~
## 5 A Song of I~ Video Games/Games 0.132 1996 Novel George R.~
## 6 Aladdin Box Office 0.76 1992 Animated Film Walt Disn~
## 7 Aladdin Home Video/Enter~ 1 1992 Animated Film Walt Disn~
## 8 Aladdin Merchandise, Lic~ 0.5 1992 Animated Film Walt Disn~
## 9 Aladdin Music 0.447 1992 Animated Film Walt Disn~
## 10 Aladdin Video Games/Games 2.2 1992 Animated Film Walt Disn~
## # ... with 311 more rows, and 1 more variable: owners <chr>
Revenue for all franchises from various categories
media %>%
mutate(original_media = fct_lump(original_media, n = 9),
franchise = fct_reorder(franchise, revenue, sum, na.rm = T),
original_media = fct_reorder(original_media, -revenue, sum)) %>%
ggplot(aes(revenue, franchise, fill = revenue_category)) +
geom_col() +
facet_wrap(~original_media, scales = "free_y") +
scale_x_continuous(labels = dollar) +
labs(x = "revenue (in billions)",
fill = "revenue category",
title = "Revenue of All Franchises") +
theme(legend.position = c(0.7, 0.15))
Total revenue for the top franchises
media %>%
group_by(franchise) %>%
mutate(total_revenue = sum(revenue, na.rm = T)) %>%
ungroup() %>%
arrange(desc(total_revenue)) %>%
head(150) %>%
mutate(franchise = paste0(franchise, " (", year_created, ")"),
franchise = fct_reorder(franchise, revenue, sum)
) %>%
ggplot(aes(revenue, franchise)) +
geom_col(aes(fill = revenue_category)) +
geom_text(aes(x = total_revenue,
y = franchise,
label = paste0(round(total_revenue), "B")), hjust = 0, check_overlap = T,
color = "grey40") +
scale_x_continuous(labels = dollar) +
labs(x = "revenue (billions)",
fill = "original media",
title = "Total Revenue of the Top Franchises")
Top owners’s revenue from multiple categories
media %>%
mutate(owners = str_remove(owners, " \\(.+"),
owners = str_replace(owners, " ", " ")) %>%
mutate(owners = fct_lump(owners, n = 8)) %>%
filter(owners != "Other") %>%
group_by(revenue_category, owners) %>%
summarize(total_revenue = sum(revenue)) %>%
ungroup() %>%
mutate(owners = fct_reorder(owners, -total_revenue, sum)) %>%
ggplot(aes(owners, revenue_category, fill = total_revenue)) +
geom_tile() +
scale_fill_gradient2(low = "red",
high = "green",
mid = "pink",
midpoint = 20,
label = dollar) +
theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 90)) +
labs(y = "revenue category",
fill = "revenue",
title = "Revenue of Top 8 Owners from Various Categories")
The relation between original media and revenue category
media %>%
group_by(original_media) %>%
mutate(total_revenue = sum(revenue)) %>%
filter(total_revenue > 20) %>%
ungroup() %>%
group_by(original_media, revenue_category) %>%
summarize(revenue = sum(revenue)) %>%
ungroup() %>%
mutate(revenue_category = reorder_within(revenue_category, revenue, original_media),
original_media = fct_reorder(original_media, -revenue, sum)) %>%
ggplot(aes(revenue, revenue_category, fill = revenue_category)) +
geom_col(show.legend = F) +
facet_wrap(~original_media, scales = "free_y") +
scale_y_reordered() +
scale_x_continuous(labels = dollar) +
labs(y = "revenue category",
title = "Where does original media make revenue?")
Year V.S. revenue
media %>%
group_by(year_created, franchise) %>%
mutate(total_revenue = sum(revenue)) %>%
distinct(franchise, .keep_all = T) %>%
ungroup() %>%
ggplot(aes(year_created, total_revenue, color = franchise)) +
geom_point() +
geom_text(aes(label = franchise), vjust = 1, hjust = 1, check_overlap = T) +
expand_limits(x = 1920) +
scale_y_continuous(labels = dollar) +
theme(legend.position = "none",
panel.grid = element_blank()) +
labs(x = "year created",
y = "revenue (in billions)",
title = "Franchise Total Revenue and Year Created")