I am trying to reproduce a map from W.E.B DuBois with the original colors and values with thin black arrows in each state starting at the bottom right corner of the state polygon and pointing to the centroid of the state. I have 2 issues 1) ggplot2 would not plot the colors specified in the data frame. 2) The arrows pointing to the centroids are too big and would not start at the bottom right corner of the state polygon and would point to the centroid at a 45 degrees angle. I would appreciate any help to fix these issues. Here is the data
df_join <- structure(list(name = c("New Mexico", "Puerto Rico", "California",
"Alabama", "Georgia", "Arkansas", "Oregon", "Mississippi", "Colorado",
"Utah", "Oklahoma", "Tennessee", "Wyoming", "Indiana", "Massachusetts",
"Idaho", "Alaska", "Nevada", "Illinois", "Vermont", "New Jersey",
"North Dakota", "Iowa", "South Carolina", "Arizona", "Delaware",
"District of Columbia", "Guam", "American Samoa", "Connecticut",
"New Hampshire", "Nebraska", "Washington", "South Dakota", "Texas",
"Kentucky", "Ohio", "Wisconsin", "Pennsylvania", "Missouri",
"North Carolina", "Virginia", "West Virginia", "Louisiana", "New York",
"Michigan", "Kansas", "Florida", "United States Virgin Islands",
"Montana", "Minnesota", "Minnesota", "Maryland", "Maine", "Hawaii",
"Commonwealth of the Northern Mariana Islands", "Rhode Island"),
color = c("#d2b48c", "", "#ffd700", "#00aa00", "#000000",
"#dc143c", "#dc143c", "#d2b48c", "#dc143c", "#654321", "#ffd700",
"#654321", "#ffd700", "#ffb6c1", "#d2b48c", "#4682b4", "",
"#ffb6c1", "#ffd700", "#ffb6c1", "#ffd700", "#d2b48c", "#696969",
"#4682b4", "#4682b4", "#d2b48c", "", "", "", "#d2b48c", "#ffb6c1",
"#ffb6c1", "#696969", "#654321", "#696969", "#696969", "#d2b48c",
"#4682b4", "#dc143c", "#4682b4", "#ffb6c1", "#00aa00", "#ffa500",
"#ffb6c1", "#4682b4", "#654321", "#00aa00", "#ffd700", "",
"#00aa00", "#ffb6c1", "#ffb6c1", "#ffd700", "#ffd700", "",
"", "#dc143c"),
present_location = c(
38L, NA, 254L, 24556L, 798747L, NA, 32L, 589L, 285L, 9L,
68L, 9998L, 21L, 193L, 293L, 7L, 12142L, 1L, 556L, 11L,
229L, 5L, 120L, 347L, 48L, 12L, 320L, NA, NA, 97L, 14L,
121L, 44L, 18L, 12016L, 424L, 474L, 27L, 321L, 480L, 462L,
223L, 40L, 6025L, 866L, 51L, 480L, 3981L, 223L, NA, 62L,
38L, 148L, 7L, NA, 48L, 44L
) ))
And here are the codes that I have tried
library(sf)
library(ggplot2)
library(tigris)
# Read in state shapefile data
us_states <- states(cb = TRUE)
# Compute the centroid of each state
us_states_centroid <- st_centroid(us_states)
# Transform the centroid coordinates to WGS84 (lat/long)
us_states_centroid_wgs84 <- st_transform(us_states_centroid, 4326)
# Compute the convex hull of each state
us_states_hull <- st_cast(st_convex_hull(us_states), "MULTILINESTRING")
# Extract the coordinates of the bottom right corner of each state polygon
us_states_bottom_right <- st_coordinates(st_centroid(us_states_hull))
# Combine the centroid and bottom right coordinates into one dataframe
us_states_coords <- data.frame(
centroid_lon = st_coordinates(us_states_centroid_wgs84)[,1],
centroid_lat = st_coordinates(us_states_centroid_wgs84)[,2],
bottom_right_lon = us_states_bottom_right[,1],
bottom_right_lat = us_states_bottom_right[,2]
)
us_states <- cbind(us_states, us_states_coords)
# Merge data with color and present_location information
df_join$color <- factor(df_join$color, levels = unique(df_join$color))
us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
# Plot the map with state boundaries, colored by color column and labeled with present_location
ggplot() +
geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
geom_segment(data = us_states, aes(x = bottom_right_lon, y = bottom_right_lat, xend = centroid_lon, yend = centroid_lat),
arrow = arrow(length = unit(0.15, "cm"), type = "closed", ends = "last", angle = 25),
size = 0.5) +
scale_fill_manual(values = us_states_data$color ) +
scale_color_manual(values = us_states_data$color) +
geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location), size = 3, hjust = 1, vjust = 0.5) +
labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location",
subtitle = "",
fill = "Color", x = "Longitude", y = "Latitude") +
theme_void() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
legend.position = "none") +
xlim(c(-125, -66)) + ylim(c(25, 50))
I followed @Jul advice and modified the codes. I am getting an error here
# Combine the centroid coordinates and angles into one dataframe
> us_states_coords <- data.frame(
+ centroid_lon = us_states_centroid_coords[,1],
+ centroid_lat = us_states_centroid_coords[,2],
+ angle_degrees = us_states_centroid_coords$angle_degrees
+ )
Error in us_states_centroid_coords[, 1] : incorrect number of dimensions
library(sf)
library(ggplot2)
library(tigris)
# Read in state shapefile data
us_states <- states(cb = TRUE)
# Filter to the continental US states only
us_states <- us_states[us_states$REGION %in% c("3", "4", "5"),]
# Compute the centroid of each state
us_states_centroid <- st_centroid(us_states)
# Transform the centroid coordinates to WGS84 (lat/long)
us_states_centroid_wgs84 <- st_transform(us_states_centroid, 4326)
# Compute the convex hull of each state
us_states_hull <- st_cast(st_convex_hull(us_states), "MULTILINESTRING")
# Extract the coordinates of the bottom right corner of each state polygon
us_states_bottom_right <- st_coordinates(st_centroid(us_states_hull))
# Extract the centroid of Georgia
ga_centroid <- us_states_centroid_wgs84[us_states_centroid_wgs84$NAME == "Georgia",]
# Calculate the angle between the centroid of each state and Georgia
us_states_centroid_coords <- st_coordinates(us_states_centroid_wgs84)
us_states_centroid_coords$angle <- atan2(
ga_centroid$geometry[[1]][2] - us_states_centroid_coords[,2],
ga_centroid$geometry[[1]][1] - us_states_centroid_coords[,1]
)
# Convert the angle to degrees
us_states_centroid_coords$angle_degrees <- us_states_centroid_coords$angle * 180 / pi
# Convert negative angles to positive angles (for visualization purposes)
us_states_centroid_coords$angle_degrees[us_states_centroid_coords$angle_degrees < 0] <-
360 + us_states_centroid_coords$angle_degrees[us_states_centroid_coords$angle_degrees < 0]
# Combine the centroid coordinates and angles into one dataframe
us_states_coords <- data.frame(
centroid_lon = us_states_centroid_coords[,1],
centroid_lat = us_states_centroid_coords[,2],
angle_degrees = us_states_centroid_coords$angle_degrees
)
# Merge data with color and present_location information
us_states_data <- merge(us_states, df_join, by.x = "NAME", by.y = "name")
us_states_data <- us_states_data[us_states_data$color!="",]
# Plot the map with state boundaries, colored by color column and labeled with present_location
ggplot() +
geom_sf(data = us_states_data, aes(fill = color), color = "black", size = 0.2) +
geom_spoke(data = us_states_coords, aes(x0 = centroid_lon, y0 = centroid_lat,
angle = angle_degrees, radius = 0.5),
color = "black", size = 0.5) +
geom_text(data = us_states_data, aes(x = centroid_lon - 0.5, y = centroid_lat, label = present_location),
size = 3, hjust = 1, vjust = 0.5) +
labs(title = "US State Boundaries Colored by Color Column and Labeled with Present_Location",
subtitle = "",
fill = "Color", x = "Longitude", y = "Latitude") +
theme_void() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 14, face = "italic"),
legend.position = "none") +
xlim(c(-125, -66)) + ylim(c(25, 50))
For 1.), there's no need to convert the colours to a factor, and in the ggplot call, it shouldn't be in an
aescall. Theaescall will tell ggplot to take care of the colour mapping. If you want ggplot to use some colours directly from a dataframe, you pass them outside of theaes. Also, for simplicity it may be best to filter out the non-continental territories.For 2.), you are currently picking up the centroid of the hull, not the bottom right of it. You could potentially get close to your need by getting the bounding box of each state
st_bboxand then picking thexmaxandymaxfigures and then taking some other actions to clip the arrow to be within the state lines. But it looks like the map you are trying to recreate actually has all of the arrows emanating from Georgia? In which case, it may be best to calculate the angle between the centroids of each state and Georgia and then utilisegeom_spoketo create the arrows. https://ggplot2.tidyverse.org/reference/geom_spoke.html2023-03-08 Extension
The new error is caused by the way objects are being assigned to the
us_states_centroid_coordsobject. You appear to be inadvertently coercing the matrix output of st_coordinates to a list when assigning$angle. If you createus_states_centroid_coordsas a data.frame, the unexpected coercion will not happen.