Mutate multiple variables using across, starts_with and ifelse statement

51 Views Asked by At

I have the following dataframe.

library(dplyr)
data_test = data.frame(va_te=c("yes", "", "no", "yes"),
           va_ti=c("no", "", "yes", "no"),
           va_ze=c("", "no", "yes", "no"),
           jk_te=c(NA, 545, 876, 987),
           jk_ti=c(876, 567, 908, 432),
           jk_ze=c(987, 988, NA, 234),
           loc=c(345, 898, 444, 321))

> data_test
  va_te va_ti va_ze jk_te jk_ti jk_ze loc
1   yes    no          NA   876   987 345
2                no   545   567   988 898
3    no   yes   yes   876   908    NA 444
4   yes    no    no   987   432   234 321

I would like to mutate the first 6 variables using two patterns (one for each group of 3 similar variables: va_ and jk_).

For the first group "va_" I used across + starts_with + ifelse.

For the second group "jk_" I would like to use the same method but as my ifelse uses both jk and va in the test, I don't know how to write this part.

data_test_2 = data_test %>%
  mutate(across(starts_with("va_"), 
                            ~ ifelse(.x == "", NA, .x), .names = "{col}")) %>%
  mutate(jk_te = ifelse(va_te != "yes", NA, 
                 ifelse(va_te == "yes" & is.na(jk_te), loc, jk_te))) %>%
  mutate(jk_ti = ifelse(va_ti != "yes", NA, 
                 ifelse(va_ti == "yes" & is.na(jk_ti), loc, jk_ti))) %>%
  mutate(jk_ze = ifelse(va_ze != "yes", NA, 
                 ifelse(va_ze == "yes" & is.na(jk_ze), loc, jk_ze))) 

> data_test_2
  va_te va_ti va_ze jk_te jk_ti jk_ze loc
1   yes    no  <NA>   345    NA    NA 345
2  <NA>  <NA>    no    NA    NA    NA 898
3    no   yes   yes    NA   908   444 444
4   yes    no    no   987    NA    NA 321
2

There are 2 best solutions below

2
benson23 On BEST ANSWER

You can use sub "jk" with "va" on the name of the current column (cur_column()), and get the columns for the actions.

library(dplyr)

data_test %>% 
  mutate(across(starts_with("va_"), ~ ifelse(.x == "", NA, .x)),
         across(starts_with("jk_"), ~ ifelse(get(sub("jk", "va", cur_column())) != "yes", NA,
                                             ifelse(get(sub("jk", "va", cur_column())) == "yes" & is.na(.x), loc, .x))))

  va_te va_ti va_ze jk_te jk_ti jk_ze loc
1   yes    no  <NA>   345    NA    NA 345
2  <NA>  <NA>    no    NA    NA    NA 898
3    no   yes   yes    NA   908   444 444
4   yes    no    no   987    NA    NA 321
0
thothal On

One way would be to transform your data first into a long format, where we have one column for va_ columns, one column for jk_ columns and a suffix column for te, ti or ze respectively.

library(tidyr)
(data_lng <- data_test %>%
  pivot_longer(-loc, 
               names_to = c(".value", "suffix"),
               names_sep  ="_"))

# # A tibble: 12 × 4
#      loc suffix va    jk   
#    <dbl> <chr>  <chr> <chr>
#  1   345 te     "yes" NA   
#  2   345 ti     "no"  876  
#  3   345 ze     ""    987  
#  4   898 te     ""    545  
#  5   898 ti     ""    567  
#  6   898 ze     "no"  988  
#  7   444 te     "no"  876  
#  8   444 ti     "yes" 908  
#  9   444 ze     "yes" NA   
# 10   321 te     "yes" 987  
# 11   321 ti     "no"  432  
# 12   321 ze     "no"  234 

Then you can easily transform va and jk and especially for the latter you can also use the corresponding va column:

(data_lng_mutated <- data_lng %>% 
  mutate(va = if_else(!nzchar(va), NA_character_, va),
         jk = case_when(coalesce(va, "") != "yes" ~ NA_real_,
                        is.na(jk) ~ loc,
                        TRUE ~ as.numeric(jk))))
# # A tibble: 12 × 4
#      loc suffix va       jk
#    <dbl> <chr>  <chr> <dbl>
#  1   345 te     yes     345
#  2   345 ti     no       NA
#  3   345 ze     NA       NA
#  4   898 te     NA       NA
#  5   898 ti     NA       NA
#  6   898 ze     no       NA
#  7   444 te     no       NA
#  8   444 ti     yes     908
#  9   444 ze     yes     444
# 10   321 te     yes     987
# 11   321 ti     no       NA
# 12   321 ze     no       NA

Last step is to transform it back to wide format:

  
data_lng_mutated %>% 
  pivot_wider(values_from = c("va", "jk"), names_from = suffix) %>% 
  relocate(loc, .after = last_col())

# # A tibble: 4 × 7
#   va_te va_ti va_ze jk_te jk_ti jk_ze   loc
#   <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 yes   no    NA      345    NA    NA   345
# 2 NA    NA    no       NA    NA    NA   898
# 3 no    yes   yes      NA   908   444   444
# 4 yes   no    no      987    NA    NA   321

Putting everything together you have:

data_test %>%
  pivot_longer(-loc, 
               names_to = c(".value", "suffix"),
               names_sep  ="_") %>% 
  mutate(va = if_else(!nzchar(va), NA_character_, va),
         jk = case_when(coalesce(va, "") != "yes" ~ NA_real_,
                        is.na(jk) ~ loc,
                        TRUE ~ as.numeric(jk))) %>% 
  pivot_wider(values_from = c("va", "jk"), names_from = suffix) %>% 
  relocate(loc, .after = last_col())