wilcoxon test in loop with rstatix

48 Views Asked by At

I have data frame like

library(rstatix)
grp1 <- runif(10, min = 0, max = 100)  # Exemple de données numériques
grp2 <- runif(10, min = 0, max = 100)
grp3 <- runif(10, min = 0, max = 100)
grp4 <- runif(10, min = 0, max = 100)
status <- sample(c("alive", "death", "sick"), 10, replace = TRUE)  # Exemple de données caractère

my_data_frame <- data.frame(grp1, grp2, grp3, grp4, status)

and I want to do for each group.

  wilcox_test(grp1~ status, p.adjust.method = "fdr")

I try

for (i in 4) {
 
  var <- names(my_data_frame)
  test <- my_data_frame %>% wilcox_test(var[i] ~ status, p.adjust.method = "fdr")}

but Column var[i] doesn't exist.

with other test like glm this syntax works

2

There are 2 best solutions below

0
margusl On BEST ANSWER

rstatix is designed to fit nicely into Tidyverse-based workflows and pipelines, so instead of a loop, you could first pivot your dataset to longer, group and pass grouped tibble to wilcox_test():

library(rstatix)
library(dplyr)
library(tidyr)

grp1 <- runif(10, min = 0, max = 100)  # Exemple de données numériques
grp2 <- runif(10, min = 0, max = 100)
grp3 <- runif(10, min = 0, max = 100)
grp4 <- runif(10, min = 0, max = 100)
status <- sample(c("alive", "death", "sick"), 10, replace = TRUE)  # Exemple de données caractère
my_data_frame <- data.frame(grp1, grp2, grp3, grp4, status)

my_data_frame |>
  pivot_longer(-status, names_to = "grp") |>
  group_by(grp) |>
  wilcox_test(value ~ status, p.adjust.method = "fdr")
#> # A tibble: 12 × 10
#>    grp   .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
#>  * <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
#>  1 grp1  value alive  death      3     3         3 0.7   0.857 ns          
#>  2 grp1  value alive  sick       3     4         3 0.4   0.857 ns          
#>  3 grp1  value death  sick       3     4         5 0.857 0.857 ns          
#>  4 grp2  value alive  death      3     3         3 0.7   0.7   ns          
#>  5 grp2  value alive  sick       3     4         8 0.629 0.7   ns          
#>  6 grp2  value death  sick       3     4        12 0.057 0.171 ns          
#>  7 grp3  value alive  death      3     3         7 0.4   0.629 ns          
#>  8 grp3  value alive  sick       3     4         8 0.629 0.629 ns          
#>  9 grp3  value death  sick       3     4         4 0.629 0.629 ns          
#> 10 grp4  value alive  death      3     3         3 0.7   0.7   ns          
#> 11 grp4  value alive  sick       3     4         4 0.629 0.7   ns          
#> 12 grp4  value death  sick       3     4         8 0.629 0.7   ns

Created on 2024-02-09 with reprex v2.1.0

0
Rui Barradas On

Here are two versions, with a lapply loop and with a for loop.
Both use reformulate to set up the formula.

library(rstatix)
#> 
#> Attaching package: 'rstatix'
#> The following object is masked from 'package:stats':
#> 
#>     filter
grp1 <- runif(10, min = 0, max = 100)  # Exemple de données numériques
grp2 <- runif(10, min = 0, max = 100)
grp3 <- runif(10, min = 0, max = 100)
grp4 <- runif(10, min = 0, max = 100)
status <- sample(c("alive", "death", "sick"), 10, replace = TRUE)  # Exemple de données caractère

my_data_frame <- data.frame(grp1, grp2, grp3, grp4, status)

groups <- grep("grp", names(my_data_frame), value = TRUE)

test_list <- sapply(groups, \(g) {
  fmla <- reformulate("status", g)
  wilcox_test(my_data_frame, fmla, p.adjust.method = "fdr")
}, simplify = FALSE)

test_list[[1]]
#> # A tibble: 3 × 9
#>   .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
#> * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
#> 1 grp1  alive  death      4     1         4 0.4   0.413 ns          
#> 2 grp1  alive  sick       4     5         6 0.413 0.413 ns          
#> 3 grp1  death  sick       1     5         0 0.333 0.413 ns
test_list$grp1
#> # A tibble: 3 × 9
#>   .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
#> * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
#> 1 grp1  alive  death      4     1         4 0.4   0.413 ns          
#> 2 grp1  alive  sick       4     5         6 0.413 0.413 ns          
#> 3 grp1  death  sick       1     5         0 0.333 0.413 ns

test_list_2 <- vector("list", length(groups)) |> setNames(groups)
for(g in groups) {
  fmla <- reformulate("status", g)
  test_list_2[[g]] <- wilcox_test(my_data_frame, fmla, p.adjust.method = "fdr")
}

test_list_2$grp1
#> # A tibble: 3 × 9
#>   .y.   group1 group2    n1    n2 statistic     p p.adj p.adj.signif
#> * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <chr>       
#> 1 grp1  alive  death      4     1         4 0.4   0.413 ns          
#> 2 grp1  alive  sick       4     5         6 0.413 0.413 ns          
#> 3 grp1  death  sick       1     5         0 0.333 0.413 ns

Created on 2024-02-09 with reprex v2.0.2