Append row that sums up selected rows in R

63 Views Asked by At

Working in R, I have a dataframe df that shows the number of students enrolled in each grade.

df <- structure(list(Grade = c("PK3", "PK4", "KG", "Grade 1", "Grade 2", 
"Grade 3", "Grade 4", "Grade 5", "Grade 6", "Grade 7", "Grade 8", 
"Grade 9", "Grade 10", "Grade 11", "Grade 12", "Ungraded"), Enrolled = c(4967, 
6481, 7378, 7041, 6760, 6590, 6473, 6191, 5790, 5693, 5614, 7254, 
4951, 4250, 3792, 238)), row.names = c(NA, -16L), class = c("tbl_df", 
"tbl", "data.frame"))

I want to append two rows at the end: one which shows total enrollment in grades K-12 and the other which shows total enrollment for grades PK-12. I wrote code to do this using the {janitor} package, but this code seems unnecessarily long and complicated. Is there a better way to tell the code to append one row that sums up rows 3 to 16, and another row that sums up rows 1 to 16?

df2 = df %>% filter(Grade != "PK3" & Grade != "PK4") %>%
  adorn_totals(where="row", name="k_12_total") %>%
  filter(Grade == "k_12_total")
df = rbind(df, df2)

df2 = df %>% filter(Grade != "k_12_total") %>%
  adorn_totals(where="row", name="pk_12_total") %>%
  filter(Grade == "pk_12_total")
df = rbind(df, df2)
3

There are 3 best solutions below

2
AndS. On BEST ANSWER

What if we simply just append two 1-row dataframes with the sums you want. Assuming you already know the indices its as easy as:

rbind(df, 
      data.frame(Grade = "k_12_total", Enrolled = sum(df$Enrolled[3:16])),
      data.frame(Grade = "pk_12_total", Enrolled = sum(df$Enrolled[1:16])))
#>          Grade Enrolled
#> 1          PK3     4967
#> 2          PK4     6481
#> 3           KG     7378
#> 4      Grade 1     7041
#> 5      Grade 2     6760
#> 6      Grade 3     6590
#> 7      Grade 4     6473
#> 8      Grade 5     6191
#> 9      Grade 6     5790
#> 10     Grade 7     5693
#> 11     Grade 8     5614
#> 12     Grade 9     7254
#> 13    Grade 10     4951
#> 14    Grade 11     4250
#> 15    Grade 12     3792
#> 16    Ungraded      238
#> 17  k_12_total    78015
#> 18 pk_12_total    89463

For multiple years use colSums:

#example data
df$Enrolled2020 <- df$Enrolled + sample(-100:100, nrow(df))


rbind(df, 
      data.frame(Grade = "k_12_total", t(colSums(df[3:16,2:3])) ),
      data.frame(Grade = "pk_12_total", t(colSums(df[1:16,2:3]))))
#>          Grade Enrolled Enrolled2020
#> 1          PK3     4967         5066
#> 2          PK4     6481         6422
#> 3           KG     7378         7359
#> 4      Grade 1     7041         6977
#> 5      Grade 2     6760         6713
#> 6      Grade 3     6590         6585
#> 7      Grade 4     6473         6386
#> 8      Grade 5     6191         6232
#> 9      Grade 6     5790         5765
#> 10     Grade 7     5693         5701
#> 11     Grade 8     5614         5701
#> 12     Grade 9     7254         7198
#> 13    Grade 10     4951         4912
#> 14    Grade 11     4250         4189
#> 15    Grade 12     3792         3730
#> 16    Ungraded      238          256
#> 17  k_12_total    78015        77704
#> 18 pk_12_total    89463        89192
0
TarJae On

Something like this:

library(dplyr)
library(janitor)

df %>%
  bind_rows(
    df %>%
      filter(!grepl("PK", Grade)) %>% 
      adorn_totals(where = "row", name = "k_12_total"),
    df %>%
      filter(Grade != "k_12_total") %>%
      adorn_totals(where = "row", name = "pk_12_total")
  ) %>% 
  distinct()
A tibble: 18 × 2
   Grade       Enrolled
   <chr>          <dbl>
 1 PK3             4967
 2 PK4             6481
 3 KG              7378
 4 Grade 1         7041
 5 Grade 2         6760
 6 Grade 3         6590
 7 Grade 4         6473
 8 Grade 5         6191
 9 Grade 6         5790
10 Grade 7         5693
11 Grade 8         5614
12 Grade 9         7254
13 Grade 10        4951
14 Grade 11        4250
15 Grade 12        3792
16 Ungraded         238
17 k_12_total     78015
18 pk_12_total    89463
0
Darren Tsai On

Doing adorn_totals() twice makes your data size inflate. You need an extra distinct() to remove duplicate rows. That seems to be memory-consuming and inefficient.

You can use the following method instead:

library(dplyr)
library(tidyr)

df %>%
  summarise(k_12_total  = sum(Enrolled[!grepl("PK", Grade)]),
            pk_12_total = sum(Enrolled)) %>%
  pivot_longer(everything(), names_to = "Grade", values_to = "Enrolled") %>%
  bind_rows(df, .)

# # A tibble: 18 × 2
#    Grade       Enrolled
#    <chr>          <dbl>
#  1 PK3             4967
#  2 PK4             6481
#  3 KG              7378
#  4 Grade 1         7041
#  5 Grade 2         6760
#  6 Grade 3         6590
#  7 Grade 4         6473
#  8 Grade 5         6191
#  9 Grade 6         5790
# 10 Grade 7         5693
# 11 Grade 8         5614
# 12 Grade 9         7254
# 13 Grade 10        4951
# 14 Grade 11        4250
# 15 Grade 12        3792
# 16 Ungraded         238
# 17 k_12_total     78015
# 18 pk_12_total    89463