pacman::p_load(
tidyverse,
glue,
scales,
showtext,
ggtext,
shadowtext,
maps,
ggpattern,
ggrepel,
patchwork,
tidylog
)
font_add_google("Ubuntu", "Ubuntu", regular.wt = 400, bold.wt = 700)
showtext_auto()
showtext_opts(dpi = 300)About the Data
This week we are exploring type 2 diabetes data from the Pima Indian community near Phoenix, Arizona. The study includes only women aged 21 and older, all of Pima heritage, with at least five years of follow-up. Each participant underwent regular oral glucose tolerance tests, and diabetes was diagnosed using WHO criteria.
People with type 2 diabetes mellitus (DM) become less sensitive to insulin. After a glucose load, both blood glucose and insulin levels rise, but glucose does not fall as quickly as it should—leading to sustained elevations. The incidence of type 2 DM is rising in many Western cultures, as increasingly unhealthy and calorie-rich diets become common.
1 Initializing
1.1 Load libraries
1.2 Set theme
cool_gray0 <- "#323955"
cool_gray1 <- "#5a6695"
cool_gray2 <- "#7e89bb"
cool_gray3 <- "#a4aee2"
cool_gray4 <- "#cbd5ff"
cool_gray5 <- "#e7efff"
cool_red0 <- "#A31C44"
cool_red1 <- "#F01B5B"
cool_red2 <- "#F43E75"
cool_red3 <- "#E891AB"
cool_red4 <- "#FAC3D3"
cool_red5 <- "#FCE0E8"
theme_set(
theme_minimal() +
theme(
# axis.line.x.bottom = element_line(color = 'cool_gray0', linewidth = .3),
# axis.ticks.x= element_line(color = 'cool_gray0', linewidth = .3),
# axis.line.y.left = element_line(color = 'cool_gray0', linewidth = .3),
# axis.ticks.y= element_line(color = 'cool_gray0', linewidth = .3),
# # panel.grid = element_line(linewidth = .3, color = 'grey90'),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks.length = unit(-0.15, "cm"),
plot.background = element_blank(),
plot.title.position = "plot",
plot.title = element_text(family = "Ubuntu", size = 14, face = 'bold'),
plot.caption = element_markdown(
size = 8,
color = cool_gray3,
margin = margin(20, 0, 0, 0),
hjust = 0
),
plot.subtitle = element_markdown(
size = 9,
lineheight = 1.15,
margin = margin(5, 0, 15, 0)
),
axis.title.x = element_markdown(
family = "Ubuntu",
hjust = .5,
size = 8,
color = cool_gray1
),
axis.title.y = element_markdown(
family = "Ubuntu",
hjust = .5,
size = 8,
color = cool_gray1
),
axis.text = element_text(
family = "Ubuntu",
hjust = .5,
size = 8,
color = cool_gray1
),
legend.position = "top",
text = element_text(family = "Ubuntu", color = cool_gray1),
plot.margin = margin(25, 25, 25, 25)
)
)1.3 Load this week’s data
tuesdata <- tidytuesdayR::tt_load('2025-11-11')2 Quick Exploratory Data Analysis
2.1 Completude
tuesdata$diabetes |>
mutate(id = row_number()) |>
pivot_longer(-id, names_to = "name", values_to = "value",values_transform = as.character ) |>
group_by(name) |>
count(is_na = is.na(value)) |>
ggplot(aes(x = n, y = name)) +
geom_col(aes(fill = is_na))
tuesdata$diabetes |>
mutate(id = row_number()) |>
pivot_longer(-c(id, diabetes_5y)) |>
group_by(name) |>
mutate(
zscore = (value - mean(value, na.rm = TRUE)) / sd(value, na.rm = TRUE)
) |>
ggplot(aes(x = name, y = zscore)) +
ggbeeswarm::geom_quasirandom(
aes(color = diabetes_5y),
dodge.width = .7,
size = .5,
alpha = .1
) +
stat_summary(
aes(fill = diabetes_5y),
geom = "point",
fun = mean,
size = 3,
shape = 21,
color = "white",
position = position_dodge(width = .7)
)
tuesdata$diabetes |>
select(-diabetes_5y) |>
cor(use = 'pairwise.complete.obs') |>
ggcorrplot::ggcorrplot()
3 Transform Data for Plotting
data2plot <-
tuesdata$diabetes |>
mutate(id = row_number()) |>
pivot_longer(-c(id, diabetes_5y)) |>
group_by(name) |>
mutate(
zscore = (value - mean(value, na.rm = TRUE)) / sd(value, na.rm = TRUE)
) |>
ungroup()
means_data2plot <-
data2plot |>
group_by(name, diabetes_5y) |>
summarise(
mean = mean(zscore, na.rm = TRUE),
mean_original = mean(value, na.rm = TRUE)
)
my_order <- means_data2plot |>
select(name, diabetes_5y, mean) |>
pivot_wider(names_from = diabetes_5y, values_from = mean) |>
mutate(delta = abs(neg - pos)) |>
arrange(delta) |>
pull(name)
data2plot <- data2plot |>
mutate(name = factor(name, levels = rev(my_order)))
pretty_names <- c(
"pregnancy_num" = "**Number of Pregnancies**",
"glucose_mg-dl" = "**Glucose** (mg/dL)",
"dbp_mm-hg" = "**Diastolic Blood Pressure** (mmHg)",
"triceps_mm" = "**Triceps Skinfold** (mm)",
"insulin_microiu-ml" = "**Insulin** (microU/mL)",
"bmi" = "**Body Mass Index**",
"pedigree" = "**Pedigree**",
"age" = "**Age** (years)")
pretty_names[my_order] dbp_mm-hg pedigree
"**Diastolic Blood Pressure** (mmHg)" "**Pedigree**"
pregnancy_num age
"**Number of Pregnancies**" "**Age** (years)"
triceps_mm insulin_microiu-ml
"**Triceps Skinfold** (mm)" "**Insulin** (microU/mL)"
bmi glucose_mg-dl
"**Body Mass Index**" "**Glucose** (mg/dL)"
4 Time to plot!
4.1 Raw chart
data2plot |>
ggplot(aes(x = zscore)) +
geom_density(aes(fill = diabetes_5y), alpha = .2, color = NULL) +
facet_wrap(~name, ncol = 2)
4.2 Final chart
data2plot |>
ggplot(aes(x = zscore)) +
geom_density(aes(fill = diabetes_5y), linewidth = 0, show.legend = FALSE) |>
ggblend::blend("multiply") +
geom_line(
data = means_data2plot,
aes(group = name, x = mean),
y = 1,
color = cool_gray4
) +
geom_point(
data = means_data2plot,
aes(color = diabetes_5y, x = mean),
y = 1,
show.legend = FALSE
) +
geom_text(
data = means_data2plot |> filter(diabetes_5y == "pos"),
aes(
color = diabetes_5y,
x = mean + 0.2,
label = round(mean_original, digits = 1)
),
y = 1,
size = 2.5,
hjust = 0,
show.legend = FALSE
) +
geom_text(
data = means_data2plot |> filter(diabetes_5y == "neg"),
aes(
color = diabetes_5y,
x = mean - 0.2,
label = round(mean_original, digits = 1)
),
y = 1,
size = 2.5,
hjust = 1,
show.legend = FALSE,
family = "Ubuntu"
) +
facet_wrap(
# ~ name,
~ factor(
name,
levels = rev(my_order),
labels = pretty_names[rev(my_order)]
),
ncol = 2,
# labeller = labeller(name = pretty_names)
) +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(limits = c(-2, 3)) +
scale_fill_manual(values = c(pos = cool_gray1, neg = cool_red2)) +
scale_color_manual(values = c(pos = cool_gray1, neg = cool_red2)) +
theme(
axis.text = element_blank(),
strip.text.x = element_markdown(
hjust = 0,
color = cool_gray1,
size = 8
),
) +
labs(
y = NULL,
title = "Pima Indian Diabetes Data",
subtitle = str_wrap(
'This week we are exploring type 2 diabetes data from the Pima Indian community near Phoenix, Arizona. The study includes only women aged 21 and older, all of Pima heritage, with at least five years of follow-up. Each participant underwent regular oral glucose tolerance tests, and diabetes was diagnosed using WHO criteria.',
width = 105,
) |>
str_replace_all("\\n", "<br>") |>
str_c(
'<br><br>**Mean value** (<span style="color:#7e89bb">No Diabetes</span>, <span style="color:#F43E75">Developed Diabetes</span>)'
),
caption = str_c(
"Glucose: Plasma glucose concentration at 2 hours after administration of an oral glucose tolerance test<br><br>",
str_wrap(
"NOTE This visualization offers a preliminary look at the data and may not capture the full complexity of the underlying reality. SOURCE #Tidytuesday 2025-11-11 GITHUB barreiro-r",
width = 120,
) |>
str_replace_all("\\n", "<br>")
),
x = NULL,
fill = NULL
)