import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
About the Data
Note
This week we’re exploring data from the FBI Crime Data API! Specifically, we’re looking at agency-level data across all 50 states in the USA. This dataset provides details on law enforcement agencies that have submitted data to the FBI’s Uniform Crime Reporting (UCR) Program and are displayed on the Crime Data Explorer (CDE).
The Open Data Portal of Istituto Nazionale di Geofisica e Vulcanologia (INGV) gives public access to data resulting from institutional research activities in the fields of Seismology, Volcanology, and Environment.
1 Initializing
1.1 Load libraries
1.2 Set theme
'~/Documents/GitHub/tidytuesday/posts/2025-02-18/rb-style.mplstyle') plt.style.use(
1.3 Load this week’s data
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-18/agencies.csv') agencies
2 Time to plot!
2.1 Before
= (
data2plot "state").size().reset_index(name="n").sort_values(by="n").tail(10)
agencies.groupby(
)
= plt.subplots()
fig, ax 5)
fig.set_figwidth(3)
fig.set_figheight(
True)
ax.set_axisbelow(True, axis="x", which="major", linestyle="-", linewidth=0.7, color="#d3daed")
ax.grid(
"state"], data2plot["n"], color="#495373")
ax.barh(data2plot[
# Apply the formatter to the y-axis
"{x:,.0f}"))
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter(
"Number of agencies by state")
ax.set_title("Counts (n)")
ax.set_xlabel(
plt.tight_layout() plt.show()
2.2 After
"~/Documents/GitHub/tidytuesday/posts/2025-02-18/rb-style.mplstyle")
plt.style.use(
= (
data2plot "state", "agency_type"])
agencies.groupby([
.size()="n")
.reset_index(name="n")
.sort_values(by
)
# Pivot wider to make the stacked byplot
= data2plot.pivot_table(
data2plot_wide ="state", columns="agency_type", values="n", fill_value=0
index
)
# Sorting and filtering
"total_agencies"] = data2plot_wide.sum(axis=1)
data2plot_wide[= data2plot_wide.sort_values(by="total_agencies", ascending=True)
data2plot_wide_sorted = data2plot_wide_sorted.query("total_agencies > 450")
data2plot_wide_sorted ="total_agencies", inplace=True)
data2plot_wide_sorted.drop(columns
# Start plotting ------------------------------------------------------------------------
= plt.subplots()
fig, ax
# Color palette
= [
color_map "#495373",
"#ce4441",
"#ee8577",
"#eb7926",
"#ffbb44",
"#859b6c",
"#62929a",
"#004f63",
"#122451",
]
# Geom
data2plot_wide_sorted.plot(="barh", stacked=True, figsize=(5, 3), ax=ax, width=0.8, color=color_map
kind
)
# Add grid
True)
ax.set_axisbelow(True, axis="x", which="major", linestyle="-", linewidth=0.7, color="#d3daed")
ax.grid(
# Format x axis (add comma)
"{x:,.0f}"))
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter(
# Labels
"Number of agencies by state")
ax.set_title("Counts (n)")
ax.set_xlabel("")
ax.set_ylabel(
# Legend
plt.legend(="Type of Agency", title_fontproperties={"weight": "bold"}, alignment="left"
title
)
# Plot & Pray
plt.tight_layout() plt.show()