Replace values in one vector with values from other vector(s)

726 Views Asked by At

I have a dataframe something this:

id <- c(1, 2, 3, 4, 5, 6, 7)
var1 <- c(1, NA, 2, NA, 1, 1, 2) 
var2 <- c(1, 1, 2, 2, NA, 2, 2)

However, how do I manage to create a new vector, which takes the values from var2, and replace it with NAs in var1 and otherwise just takes the value (1 or 2) from var1, as long as it has one?

I'm thinking something like:

id <- c(1, 2, 3, 4, 5, 6, 7)
var1 <- c(1, NA, 2, NA, 1, 1, 2) 
var2 <- c(1, 1, 2, 2, NA, 2, 2)
newvar <- c(1, 1, 2, 2, 1, 1, 2)

The same goes for another dataframe, in which there are more vectors:

id   <- c(1, 2, 3, 4, 5, 6, 7)
var1 <- c(1, NA,2, NA,NA,1, 2) 
var2 <- c(1, 1, 2, 2, NA,2, 2)
var3 <- c(2, 1, 2, 1, 1, 1, 2)
var4 <- c(1, 1, 2, NA,2, 1, 2)

In this case, I want to create another vector "newvar", which takes the dominant value from the var2, var3 and var4, and replace it with NA in var1.

So the starting point will always be what is in var1. However for id4 and id5 fx, there is no dominant value in the other variables - then i want to replace NA with values from the first variable with values, in this to two cases values from var 2 and var3 respectively.

id   <- c(1, 2, 3, 4, 5, 6, 7)
var1 <- c(1, NA,2, NA,NA,1, 2) 
var2 <- c(1, 1, 2, 2, NA,2, 2)
var3 <- c(2, 1, 2, 1, 1, 1, 2)
var4 <- c(1, 1, 2, NA,2, 1, 2)
newvar <- c(1, 1, 2, 2, 1, 1, 2)

How can this be done in an easy way?

Thank you!

5

There are 5 best solutions below

0
Ronak Shah On

You can use coalesce from dplyr.

library(dplyr)
df$newvar <- do.call(coalesce, select(df, starts_with('var')))
df

#  id var1 var2 var3 var4 newvar
#1  1    1    1    2    1      1
#2  2   NA    1    1    1      1
#3  3    2    2    2    2      2
#4  4   NA    2    1   NA      2
#5  5   NA   NA    1    2      1
#6  6    1    2    1    1      1
#7  7    2    2    2    2      2

data

id   <- c(1, 2, 3, 4, 5, 6, 7)
var1 <- c(1, NA,2, NA,NA,1, 2) 
var2 <- c(1, 1, 2, 2, NA,2, 2)
var3 <- c(2, 1, 2, 1, 1, 1, 2)
var4 <- c(1, 1, 2, NA,2, 1, 2)
df <- data.frame(id, var1, var2, var3, var4)
2
GKi On

Its possible to use [<- in Reduce to overwrite NA with values of the next vector(s).

var1 <- c(1, NA, 2, NA, 1, 1, 2) 
var2 <- c(1, 1, 2, 2, NA, 2, 2)
#`[<-`(var1, is.na(var1), var2[is.na(var1)]) #In case of only two vectors
Reduce(function(a, b) `[<-`(a, is.na(a), b[is.na(a)]), list(var1, var2))
#[1] 1 1 2 2 1 1 2

var1 <- c(1, NA,2, NA,NA,1, 2) 
var2 <- c(1, 1, 2, 2, NA,2, 2)
var3 <- c(2, 1, 2, 1, 1, 1, 2)
var4 <- c(1, 1, 2, NA,2, 1, 2)
Reduce(function(a, b) `[<-`(a, is.na(a), b[is.na(a)]), list(var1, var2, var3, var4))
#[1] 1 1 2 2 1 1 2

What is somehow like doing:

var1 <- c(1, NA, 2, NA, 1, 1, 2) 
var2 <- c(1, 1, 2, 2, NA, 2, 2)
newvar <- var1
i <- is.na(newvar)
newvar[i] <- var2[i]
newvar
#[1] 1 1 2 2 1 1 2
1
Jason Mathews On

Try this.

df  %>% 
  mutate(newavar = coalesce(var1,var2, var3, var4))
0
akrun On

With tidyverse, we can use invoke with coalesce

library(dplyr)
library(purrr)
df %>% 
   mutate(newvar = invoke(coalesce, select(cur_data(), starts_with('var'))))
  id var1 var2 var3 var4 newvar
1  1    1    1    2    1      1
2  2   NA    1    1    1      1
3  3    2    2    2    2      2
4  4   NA    2    1   NA      2
5  5   NA   NA    1    2      1
6  6    1    2    1    1      1
7  7    2    2    2    2      2

data

df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7), var1 = c(1, NA, 2, 
NA, NA, 1, 2), var2 = c(1, 1, 2, 2, NA, 2, 2), var3 = c(2, 1, 
2, 1, 1, 1, 2), var4 = c(1, 1, 2, NA, 2, 1, 2)),
 class = "data.frame", row.names = c(NA, 
-7L))
0
ThomasIsCoding On

A base R option using pmin + col

df$newvar <- df[-1][
    cbind(
        1:nrow(df),
        do.call(
            pmin,
            data.frame(
                replace(
                    u <- (!is.na(df[-1])) * col(df[-1]),
                    u == 0, Inf
                )
            )
        )
    )
]

gives

> df
  id var1 var2 var3 var4 newvar
1  1    1    1    2    1      1
2  2   NA    1    1    1      1
3  3    2    2    2    2      2
4  4   NA    2    1   NA      2
5  5   NA   NA    1    2      1
6  6    1    2    1    1      1
7  7    2    2    2    2      2