I have a spatial plot with x and y coordinates and regions(differing gene expression). I am trying to identify the edges or borders of each region to contour. The best I could come up with is this plot I generated here: Spatial Plot.
I tried to use KNN to determine the cells at the edges of each region but the contour line isnt precise enough. I've used various k values and 5 seems to be the closest I can achieve with accuracy to the border. I used the spatstat.geom, ggplot and plotly packages in R. I was expecting a single contour line per region but instead I have gaps in the identified "border" cells.
my code is as follows:
library(dplyr)
library(spatstat.geom)
library(ggplot2)
library(plotly)
library(STew)
annotations <- readRDS("Manual_annotation_3639cells.rds")
data(dlpfc)
spatial_df <- as.data.frame(dlpfc$spatial)
spatial_df$cell_id <- rownames(spatial_df)
colnames(spatial_df) <- c("x_coordinate", "y_coordinate", "cell_id")
annotations_df <- data.frame(cell_id = names(annotations),
region = annotations)
annotations_df$cell_id <- gsub("Manual.annotation.", "", annotations_df$cell_id)
annotations_df$cell_id <- as.character(annotations_df$cell_id)
spatial_df$cell_id <- as.character(spatial_df$cell_id)
# Merge annotations with spatial data
spatial_annotated_df <- merge(spatial_df, annotations_df, by = "cell_id", all.x = TRUE)
# Check the first few rows to ensure it looks correct
head(spatial_annotated_df)
# Convert your spatial_annotated_df to a 'ppp' object (planar point pattern)
coordinates <- spatial_annotated_df[, c("x_coordinate", "y_coordinate")]
regions <- spatial_annotated_df$region
W <- owin(xrange=range(coordinates[,1]), yrange=range(coordinates[,2]))
ppp_data <- ppp(coordinates[,1], coordinates[,2], marks=spatial_annotated_df$region, window=W)
# Find the nearest neighbor for each cell
nn_index <- nnwhich(ppp_data, k = 5)
# Determine if the nearest neighbor is in a different region
is_border_cell <- mapply(function(cell_idx, nn_idx) {
ppp_data$marks[cell_idx] != ppp_data$marks[nn_idx]
}, seq_along(ppp_data$marks), nn_index)
# Add the border cell information back to the dataframe
spatial_annotated_df$border_cell <- is_border_cell
# Filter to get only the border cells
border_cells <- spatial_annotated_df[is_border_cell, ]
# View the result
head(border_cells)
# plot
ggplot() +
geom_point(data = spatial_annotated_df, aes(x = x_coordinate, y = y_coordinate, color = region), alpha = 0.5) +
geom_point(data = border_cells, aes(x = x_coordinate, y = y_coordinate), color = 'black', shape = 4, size = 3) +
scale_color_viridis_d() +
labs(title = "Border Cells in DLPFC", x = "X Coordinate", y = "Y Coordinate") +
theme_minimal() +
theme(legend.title = element_text(size = 12), legend.text = element_text(size = 10)) +
guides(color = guide_legend(title = "Region"))
# trying plot_ly to plot
fig <- plot_ly(spatial_annotated_df, x = ~x_coordinate, y = ~y_coordinate, color = ~region, type = 'scatter', mode = 'markers', marker = list(size = 10, opacity = 0.5)) %>%
layout(title = "Border Cells in DLPFC", xaxis = list(title = "X Coordinate"), yaxis = list(title = "Y Coordinate"))