::p_load(
pacman
tidyverse,
glue,
scales,
showtext,
ggtext,
shadowtext,
maps,
ggpattern,
ggrepel,
patchwork,
tidylog
)
font_add_google("DM Sans", "DM Sans", regular.wt = 400, bold.wt = 700)
showtext_auto()
showtext_opts(dpi = 300)
About the Data
This week we are exploring the Billboard Hot 100 Number Ones Database. This workbook contains substantial data about every song to ever top the Billboard Hot 100 between August 4, 1958 and January 11, 2025. It was compiled by Chris Dalla Riva as he wrote the book Uncharted Territory: What Numbers Tell Us about the Biggest Hit Songs and Ourselves. It also often powers his newsletter Can’t Get Much Higher.
7 years ago, I decided that I was going to listen to every number one hit. Along the way, I tracked an absurd amount of information about each song. Using that information, I wrote a data-driven history of popular music covering 1958 through today.
1 Initializing
1.1 Load libraries
1.2 Set theme
<- "#323955"
cool_gray0 <- "#5a6695"
cool_gray1 <- "#7e89bb"
cool_gray2 <- "#a4aee2"
cool_gray3 <- "#cbd5ff"
cool_gray4 <- "#e7efff"
cool_gray5
<- "#A31C44"
cool_red0 <- "#F01B5B"
cool_red1 <- "#F43E75"
cool_red2 <- "#E891AB"
cool_red3 <- "#FAC3D3"
cool_red4 <- "#FCE0E8"
cool_red5
theme_set(
theme_minimal() +
theme(
# axis.line.x.bottom = element_line(color = 'cool_gray0', linewidth = .3),
# axis.ticks.x= element_line(color = 'cool_gray0', linewidth = .3),
# axis.line.y.left = element_line(color = 'cool_gray0', linewidth = .3),
# axis.ticks.y= element_line(color = 'cool_gray0', linewidth = .3),
# # panel.grid = element_line(linewidth = .3, color = 'grey90'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.15, "cm"),
plot.background = element_blank(),
# plot.title.position = "plot",
plot.title = element_text(family = "DM Sans", size = 14, face = 'bold'),
plot.caption = element_text(
size = 8,
color = cool_gray3,
margin = margin(20, 0, 0, 0),
hjust = 0
),plot.subtitle = element_text(
size = 9,
lineheight = 1.15,
margin = margin(5, 0, 15, 0)
),axis.title.x = element_markdown(
family = "DM Sans",
hjust = .5,
size = 8,
color = cool_gray1
),axis.title.y = element_markdown(
family = "DM Sans",
hjust = .5,
size = 8,
color = cool_gray1
),axis.text = element_text(
family = "DM Sans",
hjust = .5,
size = 8,
color = cool_gray1
),legend.position = "top",
text = element_text(family = "DM Sans", color = cool_gray1),
# plot.margin = margin(25, 25, 25, 25)
) )
1.3 Load this week’s data
<- tidytuesdayR::tt_load('2025-08-26') tuesdata
2 Quick Exploratory Data Analysis
2.1 Top 10 artists
11 actualy because ties
$billboard |>
tuesdatacount(artist) |>
slice_max(n, n = 10) |>
mutate(artist = fct_reorder(artist, n)) |>
ggplot(aes(y = artist, x = n)) +
geom_col(fill = cool_gray1) +
labs(
x = NULL,
y = NULL,
title = "Top 10 Artists")
2.2 Top 10 topics
11 actualy because ties
$billboard |>
tuesdataselect(lyrical_topic) |>
separate_rows(lyrical_topic, sep = ";") |>
count(lyrical_topic) |>
slice_max(n, n = 10) |>
mutate(lyrical_topic = fct_reorder(lyrical_topic, n)) |>
ggplot(aes(y = lyrical_topic, x = n)) +
geom_col(fill = cool_gray1) +
labs(
x = NULL,
y = NULL,
title = "Top 10 Topics")
3 Transform Data for Plotting
<-
top_lyrical_topic $billboard |>
tuesdataseparate_rows(lyrical_topic, sep = ";") |>
count(lyrical_topic) |>
filter(!is.na(lyrical_topic)) |>
slice_max(n, n = 20) |>
pull(lyrical_topic) |>
unique()
<-
data2plot $billboard |>
tuesdatamutate(year = year(date)) |>
select(lyrical_topic, year) |>
separate_rows(lyrical_topic, sep = ";") |>
filter(lyrical_topic %in% top_lyrical_topic)
4 Time to plot!
4.1 Raw chart
|>
data2plot ggplot(aes(y = lyrical_topic, x = year)) +
geom_count() +
theme_gray()
4.2 Final chart
library(ggridges)
|>
data2plot ggplot(aes(y = lyrical_topic, x = year)) +
::geom_density_ridges(color = 'white', fill = 'black', scale = 3) +
ggridgeslabs(
x = NULL,
y = NULL,
title = "Lyrical Topics Popularity ",
subtitle = "in Billboard Hot 100",
caption = paste0("Displaying relative popularity for each lyrical topic (not absolute numbers)\n\n", str_wrap("NOTE This visualization offers a preliminary look at the data and may not capture the full complexity of the underlying reality. SOURCE #Tidytuesday 2025-06-17 GITHUB barreiro-r", width = 70))
+
) scale_x_continuous(
breaks = seq(min(data2plot$year), max(data2plot$year), length.out = 5),
label = round
+
) theme(
plot.background = element_rect(fill = 'black'),
text = element_text(color = 'white'),
axis.text = element_text(color = 'white'),
axis.text.x = element_text(color = 'white', margin = margin(20, 0, 0, 0)),
plot.margin = margin(50, 50, 50, 50),
plot.title = element_text(
hjust = .5,
margin = margin(0, 0, 5, 0)
),plot.subtitle = element_text(
hjust = .5,
margin = margin(0, 0, 30, 0)
),plot.caption = element_text(
color = 'grey40',
hjust = .5,
margin = margin(20, 0, 0, 0)
) )