unnest_longer() can't recycle input

52 Views Asked by At

I ran each line individually and found out unnest_longer() is the issue. How can I fix this?

df %>%
filter(n() > 2L, .by = sn2) %>%
  pivot_wider(names_from = sn3, values_from = leisure:hyear,
              values_fn = list) %>% 
  unnest_longer(leisure_1:hyear_2)



Error in `unnest_longer()`:
! In row 1, can't recycle input of size 14 to size 12.
Backtrace:
 1. ... %>% unnest_longer(leisure_1:hyear_2)
 2. tidyr::unnest_longer(., leisure_1:hyear_2)

Data has been pivoted to wide form. I noticed there are NULL within the dataset. I suspect those NULLs are causing the problem and tried replacing them with NAs but was not able to remove them using drop_na().

  df %>%
  filter(n() > 2L, .by = sn2) %>%
  pivot_wider(names_from = sn3, values_from = leisure:hyear,
              values_fn = list) %>% 
  mutate(across(everything(), ~ifelse(. == "NULL", NA, .))) %>%
  drop_na() %>% 
  unnest_longer(leisure_1:hyear_2)

Data:

library(tidyverse)
dput(df)
structure(list(sn2 = structure(c(171, 182, 230), label = "household number", format.stata = "%8.0g"), 
    leisure_1 = list(c(180, 370, 430, 470, 560, 320, 370, 470, 
    260, 310, 260, 150, 310, 390, 350, 110, 100), c(540, 270, 
    210, 430, 270, 320, 80, 60, 180, 140, 170, 350), c(170, 290, 
    480, 560)), leisure_2 = list(c(350, 410, 410, 500, 490, 180, 
    380, 290, 270, 350, 370, 440), c(550, 380, 440, 330, 60, 
    460), NULL), hmonth_1 = list(structure(c(8, 8, 8, 8, 1, 1, 
    1, 1, 4, 4, 9, 9, 6, 1, 1, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
    february = 2, march = 3, april = 4, may = 5, june = 6, july = 7, 
    august = 8, september = 9, october = 10, november = 11, december = 12
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
        structure(c(8, 8, 1, 1, 9, 9, 2, 2, 9, 9, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
        february = 2, march = 3, april = 4, may = 5, june = 6, 
        july = 7, august = 8, september = 9, october = 10, november = 11, 
        december = 12), class = c("haven_labelled", "vctrs_vctr", 
        "double")), structure(c(1, 1, 6, 6), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
        february = 2, march = 3, april = 4, may = 5, june = 6, 
        july = 7, august = 8, september = 9, october = 10, november = 11, 
        december = 12), class = c("haven_labelled", "vctrs_vctr", 
        "double"))), hmonth_2 = list(structure(c(8, 8, 1, 1, 
    4, 4, 9, 9, 6, 6, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
    february = 2, march = 3, april = 4, may = 5, june = 6, july = 7, 
    august = 8, september = 9, october = 10, november = 11, december = 12
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
        structure(c(8, 8, 9, 9, 2, 2), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
        february = 2, march = 3, april = 4, may = 5, june = 6, 
        july = 7, august = 8, september = 9, october = 10, november = 11, 
        december = 12), class = c("haven_labelled", "vctrs_vctr", 
        "double")), NULL), hyear_1 = list(structure(c(2001, 2001, 
    2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 
    2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        structure(c(2001, 2001, 2001, 2001, 2001, 2001, 2001, 
        2001, 2000, 2000, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        structure(c(2001, 2001, 2000, 2000), label = "year of household interview", format.stata = "%8.0g")), 
    hyear_2 = list(structure(c(2001, 2001, 2001, 2001, 2001, 
    2001, 2001, 2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        structure(c(2001, 2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        NULL)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", 
"data.frame"))

Desired output copied from prior question:

sn2 leisure.1 hmonth.1 hyear.1 leisure.2 hmonth.2 hyear.2
227 230 6 2000 540 6 2000
227 130 6 2000 170 6 2000
250 370 6 2000 380 6 2000
250 380 6 2000 190 6 2000
294 120 6 2000 210 6 2000
294 200 6 2000 310 6 2000
307 130 7 2000 220 7 2000
307 480 7 2000 270 7 2000
1

There are 1 best solutions below

9
Jon Spring On

Your nested data has internal misalignment where, for instance, leisure_1 has 17 values while leisure_2 has 12 values for sn2 171. I'm going to assume the first leisure_1 should be listed with the first leisure_2 etc and you'll have NAs for that when leisure_1 is on its 13th-17th values.

I bet that's not what you want though, in which case please explain further in your question.

library(tidyverse) # worked using tidyverse 1.3.2 and 2.0.0
df |>
  pivot_longer(-sn2, names_to = c("type", "obs"), names_sep = "_") |>
  unnest_longer(value) |> 
  mutate(num = row_number(), .by = c(sn2, type, obs)) |>
  pivot_wider(names_from = c(type, obs), values_from = value, names_vary = "slowest")

Result

# A tibble: 33 × 8
     sn2   num leisure_1 leisure_2    hmonth_1      hmonth_2   hyear_1   hyear_2
   <dbl> <int> <dbl+lbl> <dbl+lbl>   <dbl+lbl>     <dbl+lbl> <dbl+lbl> <dbl+lbl>
 1   171     1       180       350 8 [august]  8 [august]         2001      2001
 2   171     2       370       410 8 [august]  8 [august]         2001      2001
 3   171     3       430       410 8 [august]  1 [january]        2001      2001
 4   171     4       470       500 8 [august]  1 [january]        2001      2001
 5   171     5       560       490 1 [january] 4 [april]          2001      2001
 6   171     6       320       180 1 [january] 4 [april]          2001      2001
 7   171     7       370       380 1 [january] 9 [september]      2001      2001
 8   171     8       470       290 1 [january] 9 [september]      2001      2001
 9   171     9       260       270 4 [april]   6 [june]           2001      2001
10   171    10       310       350 4 [april]   6 [june]           2001      2001
# ℹ 23 more rows