Replacing rows of NA with another row meeting specific column matching

39 Views Asked by At

Here's a data snippet:

Tab1 <- read.table(text = "
  nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1     E_A6   3   3  0  0  0  0  0  0  0  0  1
2     E_A6   4   3 NA NA NA NA NA NA NA NA NA
3     E_A6   5   3 NA NA NA NA NA NA NA NA NA
4     E_AL   3   3  0  5  0  0  0  0  0  0  1
5     E_AL   4   3 NA NA NA NA NA NA NA NA NA
6     E_AL   5   3 NA NA NA NA NA NA NA NA NA   
", header = TRUE)

For each nodepair group, I would like to replace rows when they contain NA's with the values from the row where ES2 is equal to ES1, provided that it matches with the ES2 of the NA row. And then I need to assign an NA to the ES1 column of the previously NA row.

like this:

Tab2 <- read.table(text = "
  nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1     E_A6   3   3  0  0  0  0  0  0  0  0  1
2     E_A6   NA  3  0  0  0  0  0  0  0  0  1
3     E_A6   NA  3  0  0  0  0  0  0  0  0  1
4     E_AL   3   3  0  5  0  0  0  0  0  0  1
5     E_AL   NA  3  0  5  0  0  0  0  0  0  1
6     E_AL   NA  3  0  5  0  0  0  0  0  0  1   
", header = TRUE)

I've been trying out this kind of logic:

Tab2 <- Tab1  %>%
  rowwise %>%
  mutate(across(4:12, ~ifelse(is.na(rowsums), ???, .))))
1

There are 1 best solutions below

0
jared_mamrot On BEST ANSWER

Maybe a for-loop would work? E.g.

Tab1 <- read.table(text = "
  nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1     E_A6   3   3  0  0  0  0  0  0  0  0  1
2     E_A6   4   3 NA NA NA NA NA NA NA NA NA
3     E_A6   5   3 NA NA NA NA NA NA NA NA NA
4     E_AL   3   3  0  5  0  0  0  0  0  0  1
5     E_AL   4   3 NA NA NA NA NA NA NA NA NA
6     E_AL   5   3 NA NA NA NA NA NA NA NA NA   
", header = TRUE,  check.names = FALSE)

for (i in 1:nrow(Tab1)) {
  if (Tab1[i,2] == Tab1[i,3]) {
    stored_line <- Tab1[i,4:12]
  }
  if (all(is.na(Tab1[i,4:12]))) {
    Tab1[i,4:12] <- stored_line
    Tab1[i,2] <- NA
  }
}

Tab1
#>   nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
#> 1     E_A6   3   3  0  0  0  0  0  0  0  0  1
#> 2     E_A6  NA   3  0  0  0  0  0  0  0  0  1
#> 3     E_A6  NA   3  0  0  0  0  0  0  0  0  1
#> 4     E_AL   3   3  0  5  0  0  0  0  0  0  1
#> 5     E_AL  NA   3  0  5  0  0  0  0  0  0  1
#> 6     E_AL  NA   3  0  5  0  0  0  0  0  0  1

I tried using a join but couldn't figure out the edge-cases, e.g. if a line has NAs and values:

# add some sevens to line 2
Tab1 <- read.table(text = "
  nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1     E_A6   3   3  0  0  0  0  0  0  0  0  1
2     E_A6   4   3 NA NA NA NA NA NA NA  7  7
3     E_A6   5   3 NA NA NA NA NA NA NA NA NA
4     E_AL   3   3  0  5  0  0  0  0  0  0  1
5     E_AL   4   3 NA NA NA NA NA NA NA NA NA
6     E_AL   5   3 NA NA NA NA NA NA NA NA NA   
", header = TRUE,  check.names = FALSE)

# solution from @thelatemail's comment:
library(tidyverse)
Tab1 %>% select(nodepair,ES2) %>% left_join(Tab1 %>% filter(ES1 == ES2), by=c("nodepair","ES2"))
#>   nodepair ES2 ES1 ++ -- +- -+ 0+ +0 0- -0 00
#> 1     E_A6   3   3  0  0  0  0  0  0  0  0  1
#> 2     E_A6   3   3  0  0  0  0  0  0  0  0  1
#> 3     E_A6   3   3  0  0  0  0  0  0  0  0  1
#> 4     E_AL   3   3  0  5  0  0  0  0  0  0  1
#> 5     E_AL   3   3  0  5  0  0  0  0  0  0  1
#> 6     E_AL   3   3  0  5  0  0  0  0  0  0  1

# compared to the for-loop approach
for (i in 1:nrow(Tab1)) {
  if (Tab1[i,2] == Tab1[i,3]) {
    stored_line <- Tab1[i,4:12]
  }
  if (all(is.na(Tab1[i,4:12]))) {
    Tab1[i,4:12] <- stored_line
    Tab1[i,2] <- NA
  }
}
Tab1
#>   nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
#> 1     E_A6   3   3  0  0  0  0  0  0  0  0  1
#> 2     E_A6   4   3 NA NA NA NA NA NA NA  7  7
#> 3     E_A6  NA   3  0  0  0  0  0  0  0  0  1
#> 4     E_AL   3   3  0  5  0  0  0  0  0  0  1
#> 5     E_AL  NA   3  0  5  0  0  0  0  0  0  1
#> 6     E_AL  NA   3  0  5  0  0  0  0  0  0  1

Created on 2024-02-06 with reprex v2.1.0