Let's consider the dataframe dat0 below.
Using pivot_wider, I can transpose it into a dat1 table and then, export a csv for each group of biomarker (link):

dat1 <- pivot_wider(dat0, names_from = analyser, values_from = c(result)) # then
dat1 %>%
  group_by(biomarker) %>%
  group_walk(~ write_csv(.x, paste0(.y$biomarker, ".csv")))

However, I would like that, in all final CSVs:

  1. rows with only one result (whatever the analyser) are also removed (so that to keep only paired results)
  2. empty columns are eliminated (including those resulting from the first step)
  3. NA cells appear empty

Edit
My actual spreadsheet includes >10,000 rows and >150 biomarkers; the idea is therefore to not have to open all the csvs to reformat them one by one.

Thanks for help

Initial data

dat0
# A tibble: 30 x 4
   id         biomarker analyser result
   <chr>      <chr>     <chr>     <dbl>
 1 0240307415 BICAR     A1           27
 2 0240307373 BICAR     A1           23
 3 0240326970 AMMO      A2          657
 4 0240326976 AMMO      A2          603
 5 0240322181 HAPTO     A3            6
 6 0240322185 HAPTO     A3            4
 7 0240322191 HAPTO     A3            4
 8 0240326976 AMMO      B1          744
 9 0240326970 AMMO      B1          623
10 0240326981 AMMO      B1          550
11 0240326975 AMMO      B1          490
12 0240307415 BICAR     B1           28
13 0240307415 BICAR     B2           27
14 0240307373 BICAR     B2           23
15 0240322181 HAPTO     B2            5
16 0240322185 HAPTO     B2            5
17 0240322191 HAPTO     B2            4
18 0240326976 AMMO      C1          772
19 0240326970 AMMO      C1          649
20 0240326981 AMMO      C1          570
21 0240326975 AMMO      C1          509
22 0240307415 BICAR     C1           29
23 0240252218 BICAR     C1           25
24 0240307417 BICAR     C1           23
25 0240307415 BICAR     C2           28
26 0240307373 BICAR     C2           23
27 0240322181 HAPTO     C2            5
28 0240322185 HAPTO     C2            5
29 0240322191 HAPTO     C2            4
30 0240322151 HAPTO     C2            4

dput

dat0 <-
structure(list(id = c("0240307415", "0240307373", "0240326970", 
"0240326976", "0240322181", "0240322185", "0240322191", "0240326976", 
"0240326970", "0240326981", "0240326975", "0240307415", "0240307415", 
"0240307373", "0240322181", "0240322185", "0240322191", "0240326976", 
"0240326970", "0240326981", "0240326975", "0240307415", "0240252218", 
"0240307417", "0240307415", "0240307373", "0240322181", "0240322185", 
"0240322191", "0240322151"), biomarker = c("BICAR", "BICAR", 
"AMMO", "AMMO", "HAPTO", "HAPTO", "HAPTO", "AMMO", "AMMO", "AMMO", 
"AMMO", "BICAR", "BICAR", "BICAR", "HAPTO", "HAPTO", "HAPTO", 
"AMMO", "AMMO", "AMMO", "AMMO", "BICAR", "BICAR", "BICAR", "BICAR", 
"BICAR", "HAPTO", "HAPTO", "HAPTO", "HAPTO"), analyser = c("A1", 
"A1", "A2", "A2", "A3", "A3", "A3", "B1", "B1", "B1", "B1", "B1", 
"B2", "B2", "B2", "B2", "B2", "C1", "C1", "C1", "C1", "C1", "C1", 
"C1", "C2", "C2", "C2", "C2", "C2", "C2"), result = c(27, 23, 
657, 603, 6, 4, 4, 744, 623, 550, 490, 28, 27, 23, 5, 5, 4, 772, 
649, 570, 509, 29, 25, 23, 28, 23, 5, 5, 4, 4)), row.names = c(NA, 
-30L), class = "data.frame")
1

There are 1 best solutions below

3
r2evans On

One option is to group_nest then pivot_wider, then iterate over each nested frame to remove all-NA columns.

dat1 <- dat0 |>
  group_nest(biomarker) |>
  mutate(data = lapply(data, function(d) pivot_wider(d, names_from = analyser, values_from = result)))
dat1 |>
  group_by(biomarker) |>
  group_walk(~ write_csv(.x$data[[1]], paste0(.y$biomarker, ".csv")))

The process produced:

list.files(pattern = "csv$")
# [1] "AMMO.csv"  "BICAR.csv" "HAPTO.csv"
lapply(list.files(pattern = "csv$"), readr::read_csv, show_col_types = FALSE)
# [[1]]
# # A tibble: 4 × 4
#   id            A2    B1    C1
#   <chr>      <dbl> <dbl> <dbl>
# 1 0240326970   657   623   649
# 2 0240326976   603   744   772
# 3 0240326981    NA   550   570
# 4 0240326975    NA   490   509
# [[2]]
# # A tibble: 4 × 6
#   id            A1    B1    B2    C1    C2
#   <chr>      <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0240307415    27    28    27    29    28
# 2 0240307373    23    NA    23    NA    23
# 3 0240252218    NA    NA    NA    25    NA
# 4 0240307417    NA    NA    NA    23    NA
# [[3]]
# # A tibble: 4 × 4
#   id            A3    B2    C2
#   <chr>      <dbl> <dbl> <dbl>
# 1 0240322181     6     5     5
# 2 0240322185     4     5     5
# 3 0240322191     4     4     4
# 4 0240322151    NA    NA     4

(I know I'm mixing base-lapply with purrr functions ... old habits. Nothing wrong with the purrr variants :-)


Edit: in order to remove single-value rows, you can expand the lapply a little.

dat1 <- dat0 |>
  group_nest(biomarker) |>
  mutate(data = lapply(data, function(d) {
    pivot_wider(d, names_from = analyser, values_from = result) |>
      filter(rowSums(!is.na(pick(-id))) > 1)
  }))
dat1$data
# [[1]]
# # A tibble: 4 × 4
#   id            A2    B1    C1
#   <chr>      <dbl> <dbl> <dbl>
# 1 0240326970   657   623   649
# 2 0240326976   603   744   772
# 3 0240326981    NA   550   570
# 4 0240326975    NA   490   509
# [[2]]
# # A tibble: 2 × 6
#   id            A1    B1    B2    C1    C2
#   <chr>      <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0240307415    27    28    27    29    28
# 2 0240307373    23    NA    23    NA    23
# [[3]]
# # A tibble: 3 × 4
#   id            A3    B2    C2
#   <chr>      <dbl> <dbl> <dbl>
# 1 0240322181     6     5     5
# 2 0240322185     4     5     5
# 3 0240322191     4     4     4