foreach %dopar% do not produce all results

60 Views Asked by At

I was trying to using parallel computing by using foreach %dopar%. In short, when I try set nCores > 1, I received incomplete results. When I set nCores = 1, the results are fine. Here are the codes and outputs.

means <- rowMeans(geData)
sds <- apply(geData, 1, sd)nCores=7
nCores=15
registerDoParallel(nCores)
ans <- foreach(ss = 1:ncol(geData)) %dopar% {
    currentSample <- (geData[, ss] - means)/sds
    rankedList <- sort(currentSample, decreasing = T)
    aMwwGST <- lapply(geneSet, function(x) mwwGST(rankedList = rankedList, geneSet = x, minLenGeneSet = 20, alternative = "two.sided", verbose = F))
    aMwwGST <- aMwwGST[sapply(aMwwGST, length) != 0]
    tmp_NES <- sapply(aMwwGST, function(x) x$log.pu)
    tmp_pValue <- sapply(aMwwGST, function(x) x$p.value)

    ans <- list(tmp_NES = tmp_NES, tmp_pValue = tmp_pValue)
    print(ss)
    return(ans)
  }

Here are the output for print(ss)

[1] 14
[1] 15
[1] 13
[1] 30
[1] 28
[1] 29
[1] 45
[1] 43
[1] 44
[1] 60
[1] 58
[1] 59
[1] 75
[1] 73
[1] 74
[1] 90
[1] 88
[1] 89
[1] 105
[1] 103
[1] 104
[1] 120
[1] 118
[1] 119
...

You see that not all iterations have been run through the loops. I am not sure what is the reasons.

1

There are 1 best solutions below

0
M.Viking On

Consider explicitly registering the number of cores into a cl cluster class object using makePSOCKcluster

library(doParallel)

iris2<-iris[1:4] ## sample data

nCores<-1
#nCores<-7
#nCores<-15

cl <- makePSOCKcluster(nCores)
registerDoParallel(cl)

foreach(ss = 1:ncol(iris2)) %dopar% { ## sample data
  # currentSample <- (geData[, ss] - means)/sds
  # rankedList <- sort(currentSample, decreasing = T)
  # aMwwGST <- lapply(geneSet, function(x) mwwGST(rankedList = rankedList, geneSet = x, minLenGeneSet = 20, alternative = "two.sided", verbose = F))
  # aMwwGST <- aMwwGST[sapply(aMwwGST, length) != 0]
  # tmp_NES <- sapply(aMwwGST, function(x) x$log.pu)
  # tmp_pValue <- sapply(aMwwGST, function(x) x$p.value)
  # 
  # ans <- list(tmp_NES = tmp_NES, tmp_pValue = tmp_pValue)
  print(ss)
  # return(ans)
}
stopCluster(cl)