I have a bunch of predictors that could be useful for a diagnosis. Since I have more than 30 predictors, I selected only the predictors that are more useful in this diagnosis, using the AUC measure (area under the ROC curve).
library(pROC)
library(dplyr)
library(purrr)
#Sorted my variables by AUC in table.AUC (one col with AUCs, one col with variable names)
# Then extracted a list of all variables with an AUC>=0.7
list.predictors=table.auc %>% filter(AUC>=0.7) %>% dplyr::select(Variable) %>% unlist()
# Then did a purrr::map() to calculate the bests thresholds, senibility, specificity and positive and negative predictive values
map(list.predictors, .f=~{
threshold=coords(roc(data$Illness,data[[.x]], levels=c("Healthy","Sick")), "best", ret = "threshold") %>% unlist()
c2=data
c2$NM=ifelse(data[[.x]]>=threshold,"1","0")
TP=c2 %>% filter(NM!="0" & Illness=="Sick") %>% nrow()
TN= c2 %>% filter(NM=="0" & Illness=="Healthy") %>% nrow()
FP=c2 %>% filter(NM!="0" & Illness=="Healthy") %>% nrow()
FN= c2 %>% filter(NM=="0" & Illness=="Sick") %>% nrow()
tribble(~"name",~"threshold",~"Sen",~"Spe",~"PPV",~"PNV",
.x,threshold, TP/(TP+FN), TN/(TN+FP), TP/(TP+FP), TN/(TN+FN)
) %>%
mutate(across((Sen:PNV),~round(.x,2)),
threshold=round(threshold,3)) %>% return()
}) %>% bind_rows()
However, I have some weird sensibility and specificity for one predictor : Sen = 18%, Spe = 41%. Looking at the ROC curve, I think my function has failed to find the best threshold (and found one that doesn't even exist ??) .
Looking at the coords() documentation, it seems that the threshold is calculated by maxing (Sen+Spe). However, here, it is clearely possible to find a threshold that have a higher Sen+Spe value.
Is the error coming from my code ? (doing coords(roc(data$Illness,data$the_predictor, levels=c("Healthy","Sick")), "best", ret = "threshold") %>% unlist() just gives the same threshold. Is it coming from the pROC function ?
Help me, I am so lost here.
Edit : code to copy/paste
data=structure(list(Predictor = c(-30.0265168079189, 1.15099363860717,
-11.9804372907826, -5.77111036023964, -2.12543529096544, 3.92087977078597,
5.34247848261018, 4.22335287554391, -23.6137844687066, -19.9373894314097,
-7.66163269551192, 9.13278170787256, -1.36786740246491, -2.72758047450363,
-12.1955983263269, 4.12642888543807, 0.163977606901045, 1.46547604262282,
-1.8412138361035, -6.14477150770652, -3.17570617995096, -30.064951386853,
10.9975933111011, 4.69525214654113, 7.17071677098559, -42.7958812479347,
-22.0530194817619, 5.22959301386168, -5.37225791749525, -8.41992810521969,
0.214693153960777, -10.2699595330821, 7.5188942409622, -8.35948733431006,
-28.3236974772852, -0.334108822977305, 10.3570377885479, -6.21627576016198,
-1.34577632302221, -0.704958574099198, -8.21298595801565, -0.673991917300366,
10.1477239063002, -13.3154432636137, -24.7929589691277, -13.7662638672144,
-11.8388358393301, 1.2811315099285, -16.6351572937165, -11.2898600124526,
-22.8066837752388, 3.56811422380944, -23.3577893771017, -14.1165843206214,
-3.60022726353809, -3.11503373374385, -12.2597492695888, 7.31226134848711,
-11.5833482372835, -16.7535225022961, -8.56422928031759, -47.9699360179454,
-19.7539978196463, -26.3666055391003, -17.2614678085344, 13.2702046763866,
-13.2264594680634, 15.7449331695833, -5.50032293388125, 2.81599579215209,
1.92271901789842, -1.99266757116988, -27.2703581251132, 0.476318001224534,
-6.65722119385071, 2.35294755630814, 13.2977149882188, -15.6037631636617,
1.38135215321582, -9.71125135113497, -4.11971899794967, 5.21192589771364,
5.63060382748609, 3.93730115204109, 12.9318052438389, 6.16873004521188,
0.987650782375663, -11.352515305607, 13.4704068306721, -8.43764175284198,
18.5651453322091, 2.45966987595847, -42.9176544778171, -3.89773417236148,
-28.3070154341685, -13.6189396001791, -8.10091523989799, 2.45341660282897,
3.57427638074422, 16.1254084424853, -13.234491473745, -37.5823778107753,
3.89815090155948, 11.4130217096843, -26.9867338207, -21.9465772218131,
7.34257329115229, -11.3782968157392, -34.7142009728594, -49.8201162965836,
9.29909876231373, -10.0850944178935, 11.6016303844531, 5.23411199811675,
11.7946167683302, 3.28158141564784, 2.03385869625709, -1.89888171264814,
-23.3807367058501, -4.08156864378285, 9.00363191604415, 12.8526886439116,
-13.0772932176236, -8.00976153286771, 8.65199795385856, 15.0607427725054,
14.1375633988393, -8.66285501996315, 10.6594655613105, -17.5706905139599,
6.12042615000835, -3.31329113354379, -3.99151135257908, 8.04726385642244,
-0.56725238509163, -9.36165609862254, -4.62609609004956, -54.3425894866824,
-31.5916887347727, -22.8418309700193, -5.8349028529916, -24.2643112312585,
10.0928208337933, -10.663258246763, 0.154714176468552, 1.94161020180308,
1.23173520914207, -11.5359195732191, 10.4662441932512, -1.15329871483222,
2.48304873375732, 11.1247749589697, 13.600816862598, 5.4048677111595,
7.75142783090626, 12.407508070537, 14.2936521098022, 8.57492908453424,
7.78937753060301, 15.5758817729099, 12.9689634444848, 19.7716027797074,
17.3722434464835, 25.8398368056633, 22.9286256713902, 12.4332591080305,
20.1646775468313, 13.6359691315215, 22.4075156394212, 2.52052660698105,
-16.1294988287829, 24.2580803169934, 6.53302505803349, 28.0007397454365,
12.5997533567436, 19.8231609146357, 20.4758235394322, -3.14132866132124,
15.3652243575294, 5.51333807966991, 12.6701530511479, 7.76875005452657,
9.50414582122118, 7.18440942260985, 17.0577133339618, 1.75973853154878,
19.3991204516457, 17.2086372124398, 11.4946377474413, 18.1208643720225,
13.4371964205874, -1.41932411310972, -0.217642491330989, 0.559828427316018,
14.9513204550895, 8.40789113945641, 7.32690822343274, -10.6306122848244,
8.78982973978855, 19.9236377854904, -16.146128022031, -4.91319750465569,
1.94096133981224, 19.1978399847426, 5.68816798940584, 16.0286843888299,
10.8532049397931, 14.9332042247407, 7.89131242660244, 4.84817793483408,
-23.5709741092812, -5.0750006627721, 9.82990315423877, 13.445883799078,
-16.6078061062201, 8.09109746282265, 4.03027064373712, 7.04329554573276,
-21.7969169325793, 5.17566711827446, 17.7920853961723, 19.5637542942757,
-2.64115652660286, 15.8666090101656, 19.8301694557142, 6.12840423007928,
10.6956110325132, 20.6264711334891, 13.7039419874073, -5.48902063664391,
-13.5307133212758, -3.57547635643989, 8.28126733549754, -28.1686872403507,
21.1407803859056, -1.65377737760872, 0.831092132680901, 16.9496509311116,
-9.18907250852743, 14.8757097896787, 12.2295286749314), Illness = structure(c(1L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), levels = c("Healthy", "Sick"), class = "factor")), row.names = c(NA,
-241L), class = c("data.table", "data.frame"))
list.predictors=list("Predictor")
summary(data)
map(list.predictors, .f=~{
threshold=coords(roc(data$Illness,data[[.x]], levels=c("Healthy","Sick")), "best", ret = "threshold") %>% unlist()
c2=data
c2$NM=ifelse(data[[.x]]>=threshold,"1","0")
TP=c2 %>% filter(NM!="0" & Illness=="Sick") %>% nrow()
TN= c2 %>% filter(NM=="0" & Illness=="Healthy") %>% nrow()
FP=c2 %>% filter(NM!="0" & Illness=="Healthy") %>% nrow()
FN= c2 %>% filter(NM=="0" & Illness=="Sick") %>% nrow()
tribble(~"name",~"threshold",~"Sen",~"Spe",~"PPV",~"PNV",
.x,threshold, TP/(TP+FN), TN/(TN+FP), TP/(TP+FP), TN/(TN+FN)
) %>%
mutate(across((Sen:PNV),~round(.x,2)),
threshold=round(threshold,3)) %>% return()
}) %>% bind_rows()

I believe pROC gives the correct optimal threshold. The problem lies in your code which calculates the parameters incorrectly.
Notice the following output line when running the code:
This indicates that
Predictortakes lower values inSickcases. As a consequence, you are thresholding the data incorrectly. If you don't know the direction your predictor goes a priori, you should replace:with:
which produces the correct output:
In addition you should be able to get the results you want much more easily and without the need to compute the performance values yourself with a pipeline like this:
This pipeline produces the same output as yours, using the performance values computed by the coords function.