How to nest and loop step function in R, then output into separate Excel sheets

35 Views Asked by At

I am trying to loop the step function in R through several Excel sheets of countries (AUD, EUR and SGD). I have merged the sheets into a single dataframe and I am trying to use nest and then step to do stepwise regression for each country. However, I am unable to integrate the loop into the step function. After looping the step function, I would like to output the coefficients data (including t-stats, p-value etc.) into separate Excel sheets labelled by each country. Would someone be able to help? I have included my attempt at looping, the step code without loop, and the data in dput.

### Loop with error
dfraw %>% group_by(dfraw, 'Country') %>%
          nest() %>%
          mutate(both_model = map(data, ~lm(VarY ~ ., data = dfraw))) %>%
          step(both_model, direction = "both") %>%
          group_by(Country) %>%
          summary(both_model)
### Functional Step code
both_model <- lm(VarY ~ ., data = dfSGD)
both_model <- step(both_model, direction = "both")
summary(both_model)
Out <- createWorkbook()
addWorksheet(Out, "SGD")
writeData(Out, sheet = "SGD", x = both_model$coefficients)
saveWorkbook(Out, "output.xlsx")
### dput(dfraw)
structure(list(Country = c("AUD", "AUD", "AUD", "AUD", "AUD", 
"AUD", "AUD", "AUD", "AUD", "AUD", "EUR", "EUR", "EUR", "EUR", 
"EUR", "EUR", "EUR", "EUR", "EUR", "EUR", "SGD", "SGD", "SGD", 
"SGD", "SGD", "SGD", "SGD", "SGD", "SGD", "SGD"), VarY = c(-0.0244845360824741, 
-0.0624174372523117, 0.0452624163437831, 0.0530749789385003, 
0.00624000000000002, -0.0295754491970107, 0.0327707684745209, 
-0.0182452800253847, 0.025048480930834, 0.0425666088601608, 0.0141000829416644, 
0.00154489276626668, 0.0621540695036749, 0.00905518537502115, 
-0.0209109380291228, 0.0147859922178988, -0.0318677573278799, 
-0.0293082203837354, -0.024208903799075, -0.0178405500836277, 
-0.0188187608569775, -0.0269326121253097, 0.0527939257325898, 
0.0383738835848475, -0.0163586791881248, 0.000606244316459614, 
-0.0250029554320841, -0.0177659080352996, -0.00352907144923342, 
0.0195835545331209), VarX1 = c(-0.000207994636117426, -0.14021214097687, 
0.019987000188707, 0.0905973411305785, 0.0606271485403591, 0.0460107119360711, 
0.0197995343444735, -0.0253560472301735, 0.045979341688096, 0.0432113473174549, 
-0.000207994636117426, -0.14021214097687, 0.019987000188707, 
0.0905973411305785, 0.0606271485403591, 0.0460107119360711, 0.0197995343444735, 
-0.0253560472301735, 0.045979341688096, 0.0432113473174549, -0.000207994636117426, 
-0.14021214097687, 0.019987000188707, 0.0905973411305785, 0.0606271485403591, 
0.0460107119360711, 0.0197995343444735, -0.0253560472301735, 
0.045979341688096, 0.0432113473174549), VarX2 = c(-0.04, 0.390000000000001, 
-1.015, -0.149999999999999, 0.234999999999999, 0.0650000000000004, 
0.0750000000000002, 0.22, -0.0449999999999999, 0.115, -0.115, 
-0.241, -0.252, -0.102, -0.0249999999999999, -0.0840000000000001, 
-0.47202, 0.28388, -0.21801, -0.2159, 0.43, 1.02, -0.919999999999999, 
-0.65, 0.0700000000000003, -0.63, -0.02, 0, -0.44, -0.29), VarX3 = c(0.0199999999999996, 
-0.734999999999999, 0.229, 0.220000000000001, 0.0739999999999998, 
-0.0760000000000005, 0.00800000000000001, 0.151, -0.04, -0.0739999999999998, 
0.146, -0.117999999999999, -0.234, -0.024, 0.266, 0.101999999999999, 
0.31286, 0.25693, -0.0321999999999996, 0.236699999999999, -0.0819999999999999, 
-0.778, -0.116, 1.075, -0.151, 0.073999999999999, 0.133000000000001, 
0.545999999999999, 0.175000000000001, 0.346), VarX4 = c(-0.0602317110633257, 
-0.0633604956143422, 0.0775846309806723, -0.0297417548946277, 
-0.0720518811692337, -0.0339499828708462, -0.00436185680343282, 
-0.0348429263427839, 0.0958382153499828, 0.0404280217883632, 
-0.0602317110633257, -0.0633604956143422, 0.0775846309806723, 
-0.0297417548946277, -0.0720518811692337, -0.0339499828708462, 
-0.00436185680343282, -0.0348429263427839, 0.0958382153499828, 
0.0404280217883632, -0.0602317110633257, -0.0633604956143422, 
0.0775846309806723, -0.0297417548946277, -0.0720518811692337, 
-0.0339499828708462, -0.00436185680343282, -0.0348429263427839, 
0.0958382153499828, 0.0404280217883632)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -30L))
1

There are 1 best solutions below

2
Till On

Just as you are wrapping the lm() in a map() call, you need to wrap the step() part in map() too. Since step() expects the data used in lm() we need to make sure that we also pass the original data to each step() call. We can use map2() to pass the lm object and the data to the step() calls. Here is what that can look like:

library(tidyverse)

res <- 
  dfraw |> 
  # If you want to run one model for each country you only need to
  # `group_by()` `Country`.
  group_by(Country) |>
  nest() |>
  mutate(both_model = map(data, ~ lm(VarY ~ ., data = .x)),
         step_model = map2(data, both_model, ~step(.y, direction = "both", trace = 0)))

summary() is not that useful for programmatically evaluating model results. You can use broom::glance() and broom::tidy() instead:

library(broom)

eval_res <- 
  res |> 
  mutate(quality = map(step_model, glance),
         estimates = map(step_model, tidy)) 

eval_res |> 
  select(Country, quality) |> 
  unnest(quality)
#> # A tibble: 3 × 13
#> # Groups:   Country [3]
#>   Country r.squared adj.r.squared  sigma statistic p.value    df logLik   AIC
#>   <chr>       <dbl>         <dbl>  <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl>
#> 1 AUD         0.728         0.650 0.0230      9.35 0.0106      2   25.3 -42.6
#> 2 EUR         0.551         0.495 0.0206      9.81 0.0140      1   25.8 -45.5
#> 3 SGD         0.628         0.582 0.0180     13.5  0.00625     1   27.1 -48.2
#> # ℹ 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>, nobs <int>

eval_res |> 
  select(Country, estimates) |> 
  unnest(estimates)
#> # A tibble: 7 × 6
#> # Groups:   Country [3]
#>   Country term        estimate std.error statistic p.value
#>   <chr>   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
#> 1 AUD     (Intercept)  0.00355   0.00773     0.459 0.660  
#> 2 AUD     VarX1        0.370     0.126       2.93  0.0219 
#> 3 AUD     VarX4        0.291     0.134       2.17  0.0670 
#> 4 EUR     (Intercept)  0.00831   0.00732     1.14  0.289  
#> 5 EUR     VarX3       -0.116     0.0370     -3.13  0.0140 
#> 6 SGD     (Intercept) -0.00519   0.00587    -0.883 0.403  
#> 7 SGD     VarX2       -0.0383    0.0104     -3.68  0.00625

NOTE: There are widely published concerns with using stepwise model selection.