How do I fix subscript out of bound error for my interaction matrices?

36 Views Asked by At

I am trying to create interaction matrices from multiple files. With some of my files, I am receiving the following error:

Error in interaction_matrix[as.numeric(pig_id), as.numeric(closest_pig)] : 
  subscript out of bounds

This is the code I am using.

create_interaction_matrices <- function(nm, behavior_code = "AFF", select_every = 10, nsel = 25) {
  NNdatafull <- read_excel(nm, sheet = 1)
  colnames(NNdatafull) <- c("Date", "Time", "Pen", "Pig_ID", "Distance", "Closest_Pig", "Behavior", "Orientation", "Location")
  datelevels <- unique(NNdatafull$Date)
  
  interaction_matrices <- list()
  
  for (date in datelevels) {
    mdat <- filter(NNdatafull, Date == date)
    mdat <- filter(mdat, Behavior == behavior_code)
    
    # Get unique pig IDs from both Pig_ID and Closest_Pig columns
    unique_pigs <- union(unique(mdat$Pig_ID), unique(mdat$Closest_Pig))
    
    # Initialize interaction matrix with the size based on the number of unique pigs
    interaction_matrix <- matrix(0, nrow = length(unique_pigs), ncol = length(unique_pigs))
    
    for (i in 1:nsel) {
      time <- min(mdat$Time) + (i - 1) * (60 * select_every)
      time_data <- filter(mdat, Time == time)
      interacting_pigs <- unique(time_data$Pig_ID)
      closest_pigs <- unique(time_data$Closest_Pig)
      
      # Update interaction matrix based on pig ID to closest pig interactions
      for (pig_id in interacting_pigs) {
        closest_pig <- time_data$Closest_Pig[time_data$Pig_ID == pig_id]
        # Increment the value in the interaction matrix
        interaction_matrix[as.numeric(pig_id), as.numeric(closest_pig)] <- interaction_matrix[as.numeric(pig_id), as.numeric(closest_pig)] + 1
      }
    }
    interaction_matrices[[as.character(date)]] <- interaction_matrix
  }
  return(interaction_matrices)
}

This is an example of data I am using: data example

I think it is because the code has the matrix as length(unique_pigs) but some of the ID numbers are greater than the number of pigs. For example, a pen has 13 pigs, but one pig has 16 as an ID number.

How would I edit the code to include only the ID numbers present in the datasheet?

Thanks!

1

There are 1 best solutions below

0
gmifflen On BEST ANSWER

You were correct in assuming that the issue arises because the pig IDs and the closest pig IDs do not necessarily correspond to the row and column numbers of the matrix. You would need to create a mapping between pig IDs and the matrix indices.

Here's the how I did it:

create_interaction_matrices <- function(nm, behavior_code = "AFF", select_every = 10,
  nsel = 25) {
  # you can keep doing as you had here, I just did this for myself
  file_path <- file.path(getwd(), paste0(nm, ".xlsx"))
  NNdatafull <- read_excel(file_path, sheet = 1)
  colnames(NNdatafull) <- c("Date", "Time", "Pen", "Pig_ID", "Distance",
    "Closest_Pig", "Behavior", "Orientation", "Location")
  datelevels <- unique(NNdatafull$Date)

  interaction_matrices <- list()

  for (date in datelevels) {
    mdat <- filter(NNdatafull, Date == date)
    mdat <- filter(mdat, Behavior == behavior_code)

    # Get unique pig IDs from both Pig_ID and Closest_Pig columns
    unique_pigs <- union(unique(mdat$Pig_ID), unique(mdat$Closest_Pig))

    # create mapping of pig IDs to matrix indices
    pig_id_to_matrix_idx <- match(unique_pigs, sort(unique_pigs))

    # Initialize interaction matrix with the size based on the number of unique pigs
    interaction_matrix <- matrix(0, nrow = length(unique_pigs),
      ncol = length(unique_pigs))

    for (i in 1:nsel) {
      time <- min(mdat$Time) + (i - 1) * (60 * select_every)
      time_data <- filter(mdat, Time == time)

      # Update interaction matrix based on pig ID to closest pig interactions
      for (row in 1:nrow(time_data)) {
        pig_id <- time_data$Pig_ID[row]
        closest_pig <- time_data$Closest_Pig[row]

        # convert pig IDs to matrix indices
        pig_idx <- which(unique_pigs == pig_id)
        closest_pig_idx <- which(unique_pigs == closest_pig)

        # check if the indices are valid; then update the interaction matrix
        if (length(pig_idx) == 1 && length(closest_pig_idx) ==
          1) {
          interaction_matrix[pig_idx, closest_pig_idx] <- interaction_matrix[pig_idx,
          closest_pig_idx] + 1
        }
      }
    }
    interaction_matrices[[as.character(date)]] <- interaction_matrix
  }
  return(interaction_matrices)
}

this doesn't return any errors on my end, and after running it through a tester function to graph it, it seems to work:

test_and_plot_interaction_matrices <- function(filename) {
  library(ggplot2)
  library(reshape2)

  interaction_matrices <- create_interaction_matrices(filename)

  some_date <- names(interaction_matrices)[1]
  matrix_to_plot <- interaction_matrices[[some_date]]

  matrix_long <- melt(matrix_to_plot)
  names(matrix_long) <- c("Pig_ID", "Closest_Pig", "Frequency")

  ggplot(matrix_long, aes(x = Pig_ID, y = Closest_Pig, fill = Frequency)) +
    geom_tile() + scale_fill_gradient(low = "white", high = "steelblue") +
    theme_minimal() + labs(title = paste("Interaction Matrix for Date:", some_date),
    x = "Pig ID", y = "Closest Pig ID", fill = "Interaction\nFrequency") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))

}

image of heatmap on stack.imgur.com

I don't know if I graphed the data correctly, but it shouldn't matter since your function works now [I hope :)]


edit:

I thought of a way to do it with vectorized operations:

library(dplyr)

create_interaction_matrices <- function(nm, behavior_code = "AFF", select_every = 10,
  nsel = 25) {
  file_path <- file.path(getwd(), paste0(nm, ".xlsx"))
  NNdatafull <- read_excel(file_path, sheet = 1)

  # preprocess
  NNdatafull <- NNdatafull |>
    filter(Behavior == behavior_code) |>
    mutate(Date = as.character(Date))

  # store interaction matrices for each date
  interaction_matrices <- list()

  # process each date
  for (date in unique(NNdatafull$Date)) {
    mdat <- filter(NNdatafull, Date == date)

    # Get unique pig IDs and create a mapping to matrix indices
    unique_pigs <- sort(unique(c(mdat$Pig_ID, mdat$Closest_Pig)))
    pig_id_to_matrix_idx <- match(unique_pigs, unique_pigs)

    # Initialize the interaction matrix with the size based on the number of unique pigs
    interaction_matrix <- matrix(0, nrow = length(unique_pigs),
      ncol = length(unique_pigs), dimnames = list(unique_pigs,
        unique_pigs))

    # iterate through selected dates
    for (i in seq_len(nsel)) {
      time <- min(mdat$Time) + (i - 1) * (60 * select_every)
      time_data <- filter(mdat, Time == time)

      # update interaction matrix for each row in time_data
      apply(time_data, 1, function(row) {
        pig_idx <- match(row["Pig_ID"], unique_pigs)
        closest_pig_idx <- match(row["Closest_Pig"], unique_pigs)

        if (!is.na(pig_idx) && !is.na(closest_pig_idx)) {
          interaction_matrix[pig_idx, closest_pig_idx] <- interaction_matrix[pig_idx,
          closest_pig_idx] + 1
        }
      })
    }

    interaction_matrices[[date]] <- interaction_matrix
  }

  return(interaction_matrices)
}

I'm not sure if it's the best way to do it, but it doesn't give any errors either,on my end at least, we'll have to see how it goes for you :)