My code was running just fine with an 80/20 training test split. For own reasons, I want to test out performance at 70/30 split but DeepSurv is giving an error. Although no errors before, and other deep survival models are running on both data splits (Deephit and coxtime).
The error is
Error in check_prediction_data.PredictionDataSurv(pdata, train_task = task) : Assertion on 'pdata$crank' failed: Contains missing values (element 1).
Any help on what is causing the error or where I should change the code is appreciated.
This is a small part of the dataframe
df <- data.frame(
AGE = c(75.6, 78.9, 63.9),
PTGENDER = c(2, 1, 1),
PTEDUCAT = c(16, 16, 18),
PTETHCAT = c(2, 2, 2),
PTRACCAT = c(4, 4, 1),
PTMARRY = c(2, 2, 2),
CDRSB.bl = c(0, 0.5, 0),
ADAS11.bl = c(11, 3, 4),
ADAS13.bl = c(16, 4, 6),
ADASQ4.bl = c(5, 1, 2),
MMSE.bl = c(27, 29, 30),
RAVLT.immediate.bl = c(30, 50, 66),
RAVLT.learning.bl = c(2, 10, 8),
RAVLT.forgetting.bl = c(4, 4, 1),
RAVLT.perc.forgetting.bl = c(57.14286, 26.66667, 6.666667),
LDELTOTAL.bl = c(14, 13, 13),
FAQ.bl = c(1, 0, 1),
Comorbidity_Alcohol.Abuse = c(0, 0, 1),
Comorbidity_Allergies.or.Drug.Sensitivities = c(1, 1, 1),
Comorbidity_Cardiovascular = c(1, 0, 0),
Comorbidity_Dermatologic.Connective.Tissue = c(1, 1, 1),
Comorbidity_Drug.Abuse = c(0, 0, 0),
Comorbidity_Renal.Genitourinary = c(1, 1, 2),
Comorbidity_Respiratory = c(0, 0, 0),
Comorbidity_Smoking = c(0, 0, 0),
time = c(72, 24, 24),
status = c(0, 0, 0)
)
This is my MLR 3 code: Define the task
task = TaskSurv$new("data",
backend = alldata,
time = "time",
event = "status")
task$set_col_roles("status", c("target", "stratum"))
split = partition(task, ratio = 0.7, stratify = TRUE)
#define learners
lrn_deepsurv = lrn("surv.deepsurv",
dropout = to_tune(0,1),
optimizer = to_tune(c("adadelta")),
lr = to_tune(0,1)
)
lrn_deephit = lrn("surv.deephit",
dropout = to_tune(0,1),
optimizer = to_tune(c("adadelta")),
lr = to_tune(0,1)
)
#Hyperparameter tuning
at_deepsurv = auto_tuner(
tuner = tuner,
learner = lrn_deepsurv,
resampling = rsmp("cv", folds = 5),
measure = msr("surv.cindex"),
terminator = trm("evals", n_evals = 100, k = 0)
)
rr_deepsurv = resample(task, at_deepsurv, resampling_outer, store_models = FALSE)
#Save model
lrn_deepsurv_tuned = at_deepsurv
saveRDS(lrn_deepsurv_tuned, "lrn_deepsurv_tuned.rds")
###########################################
at_deephit = auto_tuner(
tuner = tuner,
learner = lrn_deephit,
resampling = rsmp("cv", folds = 5),
measure = msr("surv.cindex"),
terminator = trm("evals", n_evals = 100, k = 0)
)
rr_deephit = resample(task, at_deephit, resampling_outer, store_models = FALSE)
#Save model
lrn_deephit_tuned = at_deephit
saveRDS(lrn_deephit_tuned, "lrn_deephit_tuned.rds")
###############################################
#Training and testing
lrn_deepsurv_tuned <- readRDS('lrn_deepsurv_tuned.rds')
lrn_deepsurv_tuned$train(task, split$train)
predictiontrain_deepsurv = lrn_deepsurv_tuned$predict(task, split$train)
prediction_deepsurv = lrn_deepsurv_tuned$predict(task, split$test)
lrn_deephit_tuned <- readRDS('lrn_deephit_tuned.rds')
#lrn_deephit_tuned = at_deephit
lrn_deephit_tuned$train(task, split$train)
predictiontrain_deephit = lrn_deephit_tuned$predict(task, split$train)
prediction_deephit = lrn_deephit_tuned$predict(task, split$test)