I have simulated some data in my code and want to fit a non-linear mixed model to using lme4::nlmer, however I keep getting the following error:
Error in devfun(rho$pp$theta) : Downdated VtV is not positive definite
My code is:
library(nlme)
library(ggplot2)
library(lme4)
library(gridExtra)
library(MASS)
nparams <- c("delta1", "delta2", "delta3", "Time")
fpl1 <- deriv(as.formula(paste0("~((delta1) * exp(Time - (delta2) )) / (1 + exp((Time - (delta2)) / (delta3)))")), c("delta1", "delta2", "delta3"), function.arg = nparams, envir=environment())
createDataset <- function(nGroups, totalDaysMeasured, d1, d2, d3) {
residualSD <- 0.939921
nIndividuals <- 600
D <- rbind(c(51.88633, -1.0498620, -0.05460614), c(-1.0498620, 15.8465000, -0.04587260), c(-0.05460614, -0.04587260, 0.01362791))
random_effects <- mvrnorm(n = nIndividuals, mu = rep(0, 3), Sigma = D)
nDays <- totalDaysMeasured
groups <- rep(paste0("g",1:nGroups),each=3)
days = seq(0, 25, length.out = totalDaysMeasured)
data <- data.frame(Day = rep(days,each=nIndividuals),
Group = rep(groups,nDays),
Individual = rep(1:nIndividuals,nDays))
asymp.RNoiseByIndividual1 <- random_effects[, 1]
asymp.RNoiseByIndividual2 <- random_effects[, 2]
asymp.RNoiseByIndividual3 <- random_effects[, 3]
data$X <- with(data, fpl1(d1+asymp.RNoiseByIndividual1[Individual], d2+asymp.RNoiseByIndividual2[Individual], d3+asymp.RNoiseByIndividual3[Individual], Day)+rnorm(nrow(data),mean=0,sd=residualSD))
data
}
totalDaysMeasured <- 25
delta1 <- 5.051236
delta2 <- 13.86703
delta3 <- 0.8486555
data2Groups <- createDataset(1,totalDaysMeasured, delta1, delta2, delta3)
ggplot(data2Groups,aes(x=Day,y=X,colour=Group))+geom_point()
initialValues2Groups <- c( delta1=delta1,
delta2=delta2,
delta3=delta3)
nparams <- c("A","C","D")
fpl <- deriv(as.formula(paste0("~(A*exp(x-C))/(1+exp((x-C)/D))")),
nparams,
function.arg=c("x",nparams),
envir=environment())
attr(fpl,"pnames") <- nparams
tmpstr <- deparse(fpl)
L1 <- grep("^ +\\.value +<-",tmpstr)
L2 <- grep("^ +attr\\(\\.value",tmpstr)
hej <- deparse(nparams)[1]
tmpstr2 <- c(tmpstr[1:L1],
paste0(".actualArgs <- as.list(match.call()[",
hej,"])"),
tmpstr[(L1+1):(L2-1)],
"dimnames(.grad) <- list(NULL, .actualArgs)",
tmpstr[L2:length(tmpstr)])
fpl <- eval(parse(text=tmpstr2),envir = environment())
nlmerString <- paste0("X ~ fpl(delta1, delta2, delta3, Day) ~ (delta1|Individual)+(delta3|Individual)+(delta3|Individual)")
startTimeNLMER <- Sys.time()
nlmerFit <- do.call("nlmer",list(
as.formula(nlmerString),
start=initialValues2Groups,
data=data2Groups,
control = nlmerControl(optimizer = "Nelder_Mead", optCtrl = list(maxfun = 100000)
)))
endTimeNLMER <- Sys.time()
#print(paste0("Time required for the ",deparse(substitute(data)),": ",endTimeNLMER-startTimeNLMER))
#nlmerFit
Anyone got any solutions to this problem?
I have also tried:
library("R.utils")
# Packages
library(parallel)
library(nlme)
library(ggplot2)
library(Matrix)
library(MASS)
library(nlraa)
library(lme4)
# Initial settings
num_subjects <- 600
num_time_points <- 25
n_sim <- 1000
N_T = num_subjects * num_time_points
# Define fixed effects
delta1 <- 5.051236
delta2 <- 13.86703
delta3 <- 0.8486555
initial_params <- c(delta1 = 5.051236, delta2 = 13.86703, delta3 = 0.8486555)
# Define the upper triangular matrix for the covariance matrix D
D <- rbind(c(51.88633, -1.0498620, -0.05460614), c(-1.0498620, 15.8465000, -0.04587260), c(-0.05460614, -0.04587260, 0.01362791))
# Parallel ---------------------------------------------------------------------
# Model function
nparams <- c("delta1", "delta2", "delta3", "eta1i", "eta2i","eta3i", "Time")
fpl <- deriv(as.formula(paste0("~((delta1+eta1i) * exp(Time - (delta2+eta2i) )) / (1 + exp((Time - (delta2+eta2i)) / (delta3+eta3i)))")), c("eta1i", "eta2i","eta3i"), function.arg = nparams, envir=environment())
nlmerString <- paste0("value ~ fpl(delta1, delta2, delta3, eta1i, eta2i,eta3i, Time) ~ (eta1i|Subject)+(eta2i|Subject)+(eta3i|Subject)")
random_effects <- mvrnorm(n = num_subjects, mu = rep(0, 3), Sigma = D)
eps <- c()
# Create an empty list to store the data
data_list <- list()
# Generate data for each subject
for (subject_id in 1:num_subjects) {
# Get the random effects for the subject
eta1i <- random_effects[subject_id, 1]
eta2i <- random_effects[subject_id, 2]
eta3i <- random_effects[subject_id, 3]
eps_i <- rnorm(num_time_points, mean = 0, sd = 0.939921)
eps <- append(eps, eps_i)
# Create a data frame for the subject
subject_data <- data.frame(
Subject = factor(subject_id),
Time = seq(0, 25, length.out = num_time_points),
eps = eps_i,
eta1i = eta1i,
eta2i = eta2i,
eta3i = eta3i
)
# Calculate delta values for the subject
delta1i <- delta1 + eta1i
delta2i <- delta2 + eta2i
delta3i <- delta3 + eta3i
# Generate data based on the specified model
subject_data$value <- with(subject_data, (delta1i * exp(Time - delta2i)) / (1 + exp((Time - delta2i) / delta3i)) + eps_i)
# Add the subject's data to the list
data_list[[subject_id]] <- subject_data
}
simulated_data_normal <- do.call("rbind", data_list)
nlmerFit <- do.call("nlmer",list(
as.formula(nlmerString),
start = initial_params,
data=simulated_data_normal)
)
Where I instead gets
Error: step factor reduced below 0.001 without reducing pwrss
Where I get the other error if I try and debug it and "jump over" the line that gives this error.
I have tried removing two of the random effects to make the model more simple, changing the staring values multiple times, tried other optimizers in "nlmerControl". Furthermore, when I try fitting the non-linear mixed model with nlme::nlme, it works fine. However, I would like to compare the results of the two functions.
Summarizing the comments discussion:
fpl1data-generating andfplmodelling functions do not accept their arguments in the same order,fpltakesDayas first argument.Expanding on the latter - you didn't seed your simulation but a representative dataset might look like this scatterplot of observations with the fixed-effect trend as a red line:
Clearly, even the fixed part of your data-generating model is a pretty poor fit! If you were to scale down
Dorrandom_effectsby an order of magnitude or two you get something like this, where your model actually has a chance of fitting the data:Finally, summarizing the usual source of these errors:
Downdated VtV is not positive definitesuggests (quasi)complete separation, andstep factor reduced below 0.001 without reducing pwrsspoints to starting values being too far from optimal for the model to proceed.