Calculating conditional proportions from R data.frame

30 Views Asked by At

There is a multiway crosstable saved as a frequency table like this:

df = data.frame(
    Correct = c('yes','no','yes','no','yes','no','yes','no',
        'yes','no','yes','no','yes','no','yes','no'),
    Type = c('t1','t1','t2','t2','t1','t1','t2','t2',
        't1','t1','t2','t2','t1','t1','t2','t2'),
    Subtype = c('st1','st1','st1','st1','st2','st2','st2','st2',
        'st1','st1','st1','st1','st2','st2','st2','st2'),
    Level = c('a','a','a','a','a','a','a','a',
        'b','b','b','b','b','b','b','b'),
    Freq = c(115,99,140,81,104,100,156,52,61,160,59,164,41,160,48,159)
)

How can one recalculate Freq into proportions conditioned on all variables but `Correct', resulting in this new variable:

df$Prop = c(0.54,0.46,0.63,0.37,0.51,0.49,0.75,0.25,
    0.28,0.72,0.26,0.74,0.20,0.80,0.23,0.77)
df
#    Correct Type Subtype Level Freq Prop
# 1      yes   t1     st1     a  115 0.54
# 2       no   t1     st1     a   99 0.46
# 3      yes   t2     st1     a  140 0.63
# 4       no   t2     st1     a   81 0.37
# 5      yes   t1     st2     a  104 0.51
# 6       no   t1     st2     a  100 0.49
# 7      yes   t2     st2     a  156 0.75
# 8       no   t2     st2     a   52 0.25
# 9      yes   t1     st1     b   61 0.28
# 10      no   t1     st1     b  160 0.72
# 11     yes   t2     st1     b   59 0.26
# 12      no   t2     st1     b  164 0.74
# 13     yes   t1     st2     b   41 0.20
# 14      no   t1     st2     b  160 0.80
# 15     yes   t2     st2     b   48 0.23
# 16      no   t2     st2     b  159 0.77
1

There are 1 best solutions below

0
s_baldur On BEST ANSWER

In base R:

df$Prop <- with(df, ave(x = Freq, Type, Level, Subtype, FUN = \(x) x / sum(x)))
# or
df$Prop <- with(df, Freq / ave(x = Freq, Type, Level, Subtype, FUN = sum))
 
#    Correct Type Subtype Level Freq      Prop
# 1      yes   t1     st1     a  115 0.5373832
# 2       no   t1     st1     a   99 0.4626168
# 3      yes   t2     st1     a  140 0.6334842
# 4       no   t2     st1     a   81 0.3665158
# 5      yes   t1     st2     a  104 0.5098039
# 6       no   t1     st2     a  100 0.4901961
# 7      yes   t2     st2     a  156 0.7500000
# 8       no   t2     st2     a   52 0.2500000
# 9      yes   t1     st1     b   61 0.2760181
# 10      no   t1     st1     b  160 0.7239819
# 11     yes   t2     st1     b   59 0.2645740
# 12      no   t2     st1     b  164 0.7354260
# 13     yes   t1     st2     b   41 0.2039801
# 14      no   t1     st2     b  160 0.7960199
# 15     yes   t2     st2     b   48 0.2318841
# 16      no   t2     st2     b  159 0.7681159