The xkcd Color Survey Results

circular
scatter
Author

Rodrigo Barreiro

Published

July 8, 2025

Tip

If this code was useful to you, please consider give it a little star.

About the Data

Note

Check the data in TidyTuesday GitHub repository.

In 2010, the xkcd Color Survey asked hundreds of thousands of people to name colors they saw, revealing the different ways in which people perceive and label colors.

Color is a really fascinating topic, especially since we’re taught so many different and often contradictory ideas about rainbows, different primary colors, and frequencies of light.

1 Initializing

1.1 Load libraries

library(tidyverse)
library(glue)
library(scales)
library(showtext)
library(ggtext)
library(shadowtext)
library(maps)
library(ggpattern)
library(ggrepel)
library(patchwork)

font_add_google("Ubuntu", "Ubuntu", regular.wt = 400, bold.wt = 700)
showtext_auto()
showtext_opts(dpi = 300)

1.2 Set theme

cool_gray0 <- "#323955"
cool_gray1 <- "#5a6695"
cool_gray2 <- "#7e89bb"
cool_gray3 <- "#a4aee2"
cool_gray4 <- "#cbd5ff"
cool_gray5 <- "#e7efff"

cool_red0 <- "#A31C44"
cool_red1 <- "#F01B5B"
cool_red2 <- "#F43E75"
cool_red3 <- "#E891AB"
cool_red4 <- "#FAC3D3"
cool_red5 <- "#FCE0E8"

theme_set(
  theme_minimal() +
    theme(
      # axis.line.x.bottom = element_line(color = '#474747', linewidth = .3),
      # axis.ticks.x= element_line(color = '#474747', linewidth = .3),
      # axis.line.y.left = element_line(color = '#474747', linewidth = .3),
      # axis.ticks.y= element_line(color = '#474747', linewidth = .3),
      # # panel.grid = element_line(linewidth = .3, color = 'grey90'),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      axis.ticks.length = unit(-0.15, "cm"),
      plot.background = element_blank(),
      plot.title.position = "plot",
      plot.title = element_text(family = "Ubuntu", size = 18, face = 'bold'),
      plot.caption = element_text(
        size = 8,
        color = cool_gray4,
        margin = margin(20, 0, 0, 0),
        hjust = 0
      ),
      plot.subtitle = element_text(
        size = 9,
        lineheight = 1.15,
        margin = margin(5, 0, 15, 0)
      ),
      axis.title.x = element_markdown(
        family = "Ubuntu",
        hjust = .5,
        size = 8,
        color = cool_gray1
      ),
      axis.title.y = element_markdown(
        family = "Ubuntu",
        hjust = .5,
        size = 8,
        color = cool_gray1
      ),
      axis.text = element_text(
        family = "Ubuntu",
        hjust = .5,
        size = 8,
        color = cool_gray1
      ),
      legend.position = "top",
      text = element_text(family = "Ubuntu", color = cool_gray1),
      plot.margin = margin(25, 25, 25, 25)
    )
)

1.3 Load this week’s data

answers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/answers.csv')
color_ranks <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/color_ranks.csv')
users <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/users.csv')

2 Data analysis

How many anwsers by user?

answers |> count(user_id, sort = TRUE, name = "answers") |> count(answers, sort = TRUE)
# A tibble: 183 × 2
   answers     n
     <int> <int>
 1       1 17141
 2       2 16036
 3       3 14144
 4       4 12141
 5       5 10126
 6       6  8234
 7       7  6853
 8       8  5709
 9       9  4724
10      10  3927
# ℹ 173 more rows

Very sparse.

How often the same color was classified?

answers |> count(hex, sort = TRUE, name = "answers") |> count(answers, sort = TRUE)
# A tibble: 5 × 2
  answers      n
    <int>  <int>
1       1 965244
2       2  44042
3       3   1566
4       4     45
5       5      1

3 Transform Data for Plotting

library(plotwidgets)
data2plot <-
  answers |>
  filter(user_id %in% (users |> filter(spam_prob < 0.001) |> pull(user_id))) |>
  left_join(
    color_ranks |>
      rename(rank_hex = hex) |>
      rowwise() |>
      mutate(hsl_rank = list(col2hsl(rank_hex))) |>
      ungroup(),
    by = 'rank'
  ) |>
  rowwise() |>
  mutate(hsl = list(col2hsl(hex))) |>
  ungroup()


data2plot <-
  data2plot |>
  hoist(hsl, h = 1, s = 2, l = 3) |>
  hoist(hsl_rank, h_rank = 1, s_rank = 2, l_rank = 3)


data2plot2 <-
  color_ranks |>
  rowwise() |>
  mutate(hsl_rank = list(col2hsl(hex))) |>
  ungroup() |>
  hoist(hsl_rank, h = 1, s = 2, l = 3) |>
  filter(rank %in% data2plot$rank)

4 Time to plot!

4.1 Before

# Main
p1 <-
  data2plot |>
  left_join(users, by = 'user_id') |>
  filter(colorblind == 0) |>
  filter(spam_prob < 0.001) |>
  ggplot(aes(x = h, y = -l)) +
  annotate(geom = 'segment', x = 0, xend = Inf, y = 0, color = cool_gray5) +
  geom_point(aes(color = hex), size = 0.001) +
  geom_point(data = data2plot2, aes(color = 'white'), size = 5.5) +
  geom_point(data = data2plot2, aes(color = hex), size = 4.5) +
  geom_text_repel(
    data = data2plot2,
    aes(label = str_to_sentence(color)),
    family = "Ubuntu",
    color = cool_gray0,
    size = 3,
    nudge_x = 5,
    nudge_y = .1,
    direction = "y",
    hjust = "center",
    bg.color = 'white',
    segment.size = 0,
    bg.r = 0.2
  ) +
  scale_color_identity() +
  scale_y_continuous(limits = c(-1, 0)) +
  coord_polar() +
  labs(
    title = "Hue Knew? Your Purple Isn't Mine.",
    subtitle = str_wrap(
      "In 2010, the xkcd Color Survey asked hundreds of thousands of people to name colors they saw, revealing the different ways in which people perceive and label colors.",
      width = 90
    ),
    pattern_spacing = NULL,
    y = NULL,
    x = NULL,
    caption = str_wrap(
      "NOTE This visualization offers a preliminary look at the data and may not capture the full complexity of the underlying reality. SOURCE #Tidytuesday 2025-06-17 GITHUB barreiro-r",
      width = 110,
    ) |>
      str_replace_all("@", "\n")
  ) +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    plot.margin = margin(0, 0, 0, 0)
  )

# Faceted (copied only because of laziness)
p2 <- data2plot |>
  left_join(users, by = 'user_id') |>
  filter(colorblind == 0) |>
  filter(spam_prob < 0.001) |>
  ggplot(aes(x = h, y = -l)) +
  annotate(geom = 'segment', x = 0, xend = Inf, y = 0, color = cool_gray5) +
  geom_point(aes(color = hex), size = 0.001) +
  geom_point(data = data2plot2, aes(color = 'white'), size = 3.5) +
  geom_point(data = data2plot2, aes(color = hex), size = 2.5) +
  geom_text_repel(
    data = data2plot2,
    aes(label = str_to_sentence(color)),
    family = "Ubuntu",
    color = cool_gray0,
    size = 2,
    nudge_x = 5,
    nudge_y = .1,
    direction = "y",
    hjust = "center",
    bg.color = 'white',
    segment.size = 0,
    bg.r = 0.2
  ) +
  scale_color_identity() +
  scale_y_continuous(limits = c(-1, 0)) +
  coord_polar() +
  facet_wrap(~color, nrow = 2) +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    strip.text = element_blank(),
    plot.margin = margin(0, 0, 0, 0)
  )

# Plot both
p1 + p2