Conjoint Experiment - Conditional Marginal means for categorical variable

36 Views Asked by At

I'm looking to adapt the codes shared in Andrew Heiss' amazingly detailed page on conjoint experiment, more specifically computing conditional marginal means for categorical variable using the dataset candidate.

I would like to generate the same graphs, but instead of having two dichotomous variables (military - has served/not served; and political party - Democrat/Republican), I was wondering how I could use the same code if either or both my variables aren't dichotomous, in such a way that the graph output would make sense.

For example, let's say that there are four parties (Party A, Party B, Party C and Party D) and three military status (Not served/Served a couple of years/Served for more than two years), how can I adapt the group_diffs_terms matrix to compute all potential differences, for example:

Not served - Party A minus Party B Not served - Party A minus Party C Not served - Party A minus Party D Not served - Party B minus Party C Not served - Party B minus Party D Not served - Party C minus Party D

and again for all military status, ideally all combined in the same graph. Sorry if it's an easy question, I am very very new to R.

Thank you!

##############################################################################
##############################################################################
### Load packages
library(tidyverse)        # ggplot, dplyr, and friends
library(haven)            # Read Stata files
library(broom)            # Convert model objects to tidy data frames
library(cregg)            # Automatically calculate frequentist conjoint AMCEs and MMs
library(survey)           # Panel-ish regression models
library(scales)           # Nicer labeling functions
library(marginaleffects)  # Calculate marginal effects
library(broom.helpers)    # Add empty reference categories to tidy model data frames
library(ggforce)          # For facet_col()
library(brms)             # The best formula-based interface to Stan
library(tidybayes)        # Manipulate Stan results in tidy ways
library(ggdist)           # Fancy distribution plots
library(patchwork)        # Combine ggplot plots

# Custom ggplot theme to make pretty plots
# Get the font at https://fonts.google.com/specimen/Jost
theme_nice <- function() {
  theme_minimal(base_family = "Jost") +
    theme(panel.grid.minor = element_blank(),
          plot.title = element_text(family = "Jost", face = "bold"),
          axis.title = element_text(family = "Jost Medium"),
          axis.title.x = element_text(hjust = 0),
          axis.title.y = element_text(hjust = 1),
          strip.text = element_text(family = "Jost", face = "bold",
                                    size = rel(0.75), hjust = 0),
          strip.background = element_rect(fill = "grey90", color = NA))
}

# Set default theme and font stuff
theme_set(theme_nice())
update_geom_defaults("text", list(family = "Jost", fontface = "plain"))
update_geom_defaults("label", list(family = "Jost", fontface = "plain"))

# Party colors from the Urban Institute's data visualization style guide, for fun
# http://urbaninstitute.github.io/graphics-styleguide/
parties <- c("#1696d2", "#db2b27")

# Functions for formatting things as percentage points
label_pp <- label_number(accuracy = 1, scale = 100, 
                         suffix = " pp.", style_negative = "minus")

label_amce <- label_number(accuracy = 0.1, scale = 100, suffix = " pp.", 
                           style_negative = "minus", style_positive = "plus")

# Data from https://doi.org/10.7910/DVN/THJYQR
# It's public domain
candidate <- read_stata("XXXXXXX/candidate.dta") %>% 
  as_factor()  # Convert all the Stata categories to factors

# Make a little lookup table for nicer feature labels
variable_lookup <- tribble(
  ~variable,    ~variable_nice,
  "atmilitary", "Military",
  "atreligion", "Religion",
  "ated",       "Education",
  "atprof",     "Profession",
  "atmale",     "Gender",
  "atinc",      "Income",
  "atrace",     "Race",
  "atage",      "Age"
) %>% 
  mutate(variable_nice = fct_inorder(variable_nice))

##############################################################################
##############################################################################
## Subgroup differences in AMCEs and Marginal Means
# No respondent-level characteristics in their data, so we'll invent a pretend column

## Code for generating fake respondent_party column
# Wrap this in withr::with_seed() so that the randomness is reproducible but the
# overall document seed doesn't get set or messed up
withr::with_seed(1234, {
  respondents_party_military <- candidate %>%
    group_by(resID, atmilitary) %>%
    # Find the proportion of times each respondent selected the candidate when
    # military service history was "Served"
    summarize(prob_select = mean(selected)) %>%
    filter(atmilitary == "Served") %>%
    select(-atmilitary) %>%
    ungroup() %>%
    # If a respondent selected the candidate more than 60% of the time when
    # seeing that they had served in the military, there's a 90% chance they're
    # a Republican. If they didn't select the military candidate 60% of the
    # time, there's a 75% chance they're a Democrat.
    mutate(respondent_party = case_when(
      prob_select >= 0.6 ~
        sample(
          c("Democrat", "Republican"), n(),
          replace = TRUE, prob = c(0.1, 0.9)
        ),
      prob_select < 0.6 ~
        sample(
          c("Democrat", "Republican"), n(),
          replace = TRUE, prob = c(0.75, 0.25)
        )
    )) %>%
    mutate(respondent_party = factor(respondent_party))
})

candidate_fake <- candidate %>% 
  left_join(respondents_party_military, by = join_by(resID))

model_military_party <- lm(
  selected ~ atmilitary * respondent_party, 
  data = candidate_fake
)
tidy(model_military_party)



##############################################################################
## Conditional Marginal Means

# To be thorough and allow for any type of regression family 
# (logistic, multinomial) with any other types of covariates, 
# manually with our own regression model fed through 
# marginaleffects::marginal_means()
# By specifying cross = TRUE, get all combinations of 
# military service and party 
# (without it, we'd get separate marginal means for just 
# the two levels of military and the two levels of party)
party_mms_mfx <- marginal_means(
  model_military_party,
  newdata = c("atmilitary", "respondent_party"),
  cross = TRUE,
  wts = "cells"
)
party_mms_mfx %>% as_tibble()


# To find the differences in those marginal means, Want the differences between rows 2 and 1
# (Republican - Democrat for "Not served") and between rows 4 and 3 
# (Republican - Democrat for "Served")
group_diffs_terms <- matrix(
  c(-1, 1, 0, 0,
    0, 0, -1, 1),
  ncol = 2
) %>% 
  magrittr::set_colnames(levels(candidate_fake$atmilitary))
group_diffs_terms

party_mms_mfx_diff <- marginal_means(
  model_military_party,
  newdata = c("atmilitary", "respondent_party"),
  cross = TRUE,
  wts = "cells",
  hypothesis = group_diffs_terms
) %>% 
  as_tibble()
party_mms_mfx_diff

# Can plot these conditional marginal means along with the party-based differences
mm_party_plot1 <- party_mms_mfx %>% 
  as_tibble() %>% 
  mutate(estimate_nice = case_when(
    estimate != 0 ~ label_percent()(estimate),
    estimate == 0 ~ NA
  )) %>% 
  ggplot(aes(x = estimate, y = atmilitary, color = respondent_party)) +
  geom_vline(xintercept = 0.5) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  geom_label(
    aes(label = estimate_nice), 
    position = position_dodge(width = -1.2),
    size = 3.5, show.legend = FALSE
  ) +
  scale_x_continuous(labels = label_percent()) +
  scale_color_manual(values = parties) +
  labs(
    x = "Marginal means",
    y = NULL,
    color = NULL,
    title = "Marginal means by respondent party"
  ) +
  facet_wrap(vars("Military")) +
  theme(
    legend.position = "bottom",
    legend.justification = "left",
    legend.margin = margin(l = -7, t = -5)
  )

mm_party_plot2 <- party_mms_mfx_diff %>% 
  mutate(estimate_nice = label_amce(estimate)) %>% 
  ggplot(aes(x = estimate, y = term)) +
  geom_vline(xintercept = 0) +
  geom_pointrange(aes(
    xmin = conf.low, xmax = conf.high, 
    color = "Republican marginal mean − Democrat marginal mean"
  )) +
  geom_label(
    aes(label = estimate_nice), size = 3.5,
    nudge_y = 0.3, color = "#85144b"
  ) +
  scale_x_continuous(labels = label_pp) +
  scale_color_manual(values = "#85144b") +
  labs(
    x = "Difference in marginal means",
    y = NULL,
    color = NULL,
    title = "Difference in marginal means by respondent party",
    subtitle = "Positive differences = Republicans prefer the level"
  ) +
  facet_wrap(vars("Military")) +
  theme(
    legend.position = "bottom",
    legend.justification = "left",
    legend.margin = margin(l = -7, t = -5)
  )

mm_party_plot1 | mm_party_plot2
0

There are 0 best solutions below