I made a function to automate the production of .tex files, it was working so far until I decided to put on the stargazer the result from a dplyr aggregation.
mean_outcome is not working if I put within it i, i, outcome or eval(i). I always get this error:
Warning message:
There was 1 warning in `summarise()`.
ℹ In argument: `mean = mean(i, na.rm = T)`.
Caused by warning in `mean.default()`:
! argument is not numeric or logical: returning NA
I want to show in stargazer in the part of add.lines.
reg_coef_tables <- function(df, y) {
### Function: Converts panel data to data usable for .tex tables
# - df = panel data for the country
# - y = outcome (fcs, fies, etc.) <
# - Full example: reg_pooled_lasso(df, fcs)
df <- df %>%
filter(round_pooled != 0)
# First, we need to get the list of lasso controls to be used for the regression
outcome = eval(`i`)
mean_outcome <- df %>%
filter(round_pooled == 11 & treatment_arm == 0) %>%
summarise(mean = mean(i, na.rm = T)) %>%
pull(mean)
# define control variable (i.e. outcome at baseline), which has the suffix _imp
control <- paste0(outcome, "_imp")
# remove rows with NAs in the outcome variable, otherwise we can't run glmnet
df <- df[complete.cases(df[, c(outcome, control)]),]
# vars
vars <- df[complete.cases(df[, c(outcome, control,
'hhh_age_imp', 'sex_hhh_imp',
'hh_total_imp', 'hasplots_imp',
'hh_own_bus_imp', 'own_livestock_imp',
'tot_lstock_count_tlu_imp', 'wage_job_any_imp',
'hh_assets_tot_imp', 'farm_assets_tot_imp',
'round_cont', 'level1')]),]
# define response variable
depvar <- data.matrix(vars[, outcome])
# define matrix of possible predictor variables
indepvars <- data.matrix(vars[, c('hhh_age_imp', 'sex_hhh_imp',
'hh_total_imp', 'hasplots_imp',
'hh_own_bus_imp', 'own_livestock_imp',
'tot_lstock_count_tlu_imp', 'wage_job_any_imp',
'hh_assets_tot_imp', 'farm_assets_tot_imp',
'round_cont', 'level1')])
# run a lasso to get a value for lambda
model <- cv.glmnet(indepvars,
depvar,
alpha = 1,
type.measure = "mse")
# get the coefficients with the best lambda
c <- coef(model, s = "lambda.1se", exact = TRUE)
# get the list of selected indicators (where coefficient is not 0, i.e. not dropped through the lasso regression)
inds <- which(c!=0)
# store the indicators in a vector
variables <- row.names(c)[inds]
# remove unnecessary variables from the lasso vector, these are the fixed effects and intercept using the function we defined above
variables <- variables[!(variables %in% c('(Intercept)', 'round_cont', 'level1'))]
# convert the character vector into one string with + in between for the regression below
controls <- paste0(variables, collapse = " + ")
controls <- paste0("+ ", controls)
control <- paste0("+ ", control)
# for some outcomes, no lasso controls will be selected, for this reason we need to adjust the vector of controls so that the regression below still runs
variables_all <- toString(variables)
controls <- ifelse(variables_all == "", "", controls)
# Now, we can run the regression with the selected lasso controls
reg1 <- felm(as.formula(paste(outcome, "~ treatment", " | level1 + round_cont | 0 | level4 + hhid")), data = df)
reg2 <- felm(as.formula(paste(outcome, "~ treatment", control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
reg3 <- felm(as.formula(paste(outcome, "~ treatment", controls, control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
reg4 <- felm(as.formula(paste(outcome, "~ treat_uct + treat_ffa", " | level1 + round_cont | 0 | level4 + hhid")), data = df)
reg5 <- felm(as.formula(paste(outcome, "~ treat_uct + treat_ffa", control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
reg6 <- felm(as.formula(paste(outcome, "~ treat_uct + treat_ffa", controls, control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
stargazer(reg1, reg2,
reg3, reg4,
reg5, reg6,
align = TRUE,
dep.var.labels = c("(1)", "(2)","(3)", "(4)", "(5)", "(6)"),
omit.stat = c("f", "adj.rsq", "ser"),
no.space = TRUE,
digits = 3, # number of decimals
add.lines = list(c('Mean outcome', rep(`mean_outcome`, 6)),
c('Lasso controls', "No", "No", "Yes", "No", "No", "Yes"),
c('County FE', rep("Yes", 6)),
c('Month FE', rep("Yes", 6))),
dep.var.caption = "", # remove dep var header
out = paste0(high_frequency_output_filepath, outcome, ".tex"))
}
liv_vars = c("hasplots")
liv_out_list = vector("list", 1)
for(i in liv_vars) {
liv_out_list[[i]] <- reg_coef_tables(df, i)
}