Get leading element for the lexicographical order

96 Views Asked by At

If M is a numeric matrix, I can order its rows with respect to the lexicographical order by running lexsort(M) where

lexorder <- function(M) {
  do.call(order, lapply(seq_len(ncol(M)), function(i) M[, i]))
}
lexsort <- function(M) {
  M[lexorder(M), ]
}

But I'm only interested in getting the greater row (the last one of the ordered matrix). Can we avoid ordering everything to more efficiently extract the last row?

2

There are 2 best solutions below

4
Onyambu On BEST ANSWER

You couls write a recursive function that works faster:

lex_max <- function(M,i=1){
  d <- dim(M)
  if(d[1] == 1 | i > d[2])M[1,]
  else lex_max(M[max(M[,i]) == M[,i],,drop = FALSE], i+1)
}

a <- matrix(sample(500, 1e5, TRUE), 10)

The timings:

Large number of columns:

microbenchmark::microbenchmark(lex_max(a), 
        OP=lexsort(a)[nrow(a),],lexmaxrow(a), check = 'equal')

Unit: microseconds
         expr     min       lq      mean   median       uq     max neval
   lex_max(a)    45.7    57.95    80.442    65.55    87.45   306.4   100
           OP 15819.5 19437.25 25579.100 22246.55 29002.80 68948.8   100
 lexmaxrow(a) 16393.9 18739.65 25210.846 22022.40 29098.75 47731.1   100

a <- matrix(sample(500, 1e5, TRUE), 500)

Unit: microseconds
         expr   min     lq    mean median      uq     max neval
   lex_max(a)   5.7   9.90  93.152  12.85   19.70  7124.0   100
           OP 577.0 629.75 907.524 699.20 1017.70  7771.4   100
 lexmaxrow(a) 470.5 521.05 875.526 619.45  908.85 10049.1   100

Large number of rows

a <- matrix(sample(500, 1e5, TRUE), ncol=10)

Unit: microseconds
         expr   min    lq     mean median      uq     max neval
   lex_max(a)  60.2  97.5  137.462  120.0  164.40   650.5   100
           OP 594.0 775.9 1359.959  966.8 1251.35 14719.9   100
 lexmaxrow(a) 475.1 624.1 1013.927  769.5  936.60 11775.1   100

In all the instances the lex_max function performs >~10x faster

Edit:

If you need the position, you could simply do:

which_lexmax <- function(M,i=1, b = seq_len(nrow(M))){
  d <- dim(M)
  if(d[1] == 1 | i > d[2])b[1]
  else lex_max(M[mx <- max(M[,i]) == M[,i],,drop = FALSE], i+1, b[mx])
}
 which_lexmax(a)
9
ThomasIsCoding On

Update

If you want both the row with highest lexicographical order and the row index, you can try the code below

lex_max2 <- function(M) {
    i <- 1
    nc <- ncol(M)
    idx <- 1:nrow(M)
    repeat {
        p <- max(M[, i]) == M[, i]
        idx <- idx[p]
        M <- M[p, ]
        if (length(idx) == 1) {
            return(list(lexmaxval = M, index = idx))
        } else {
            i <- i + 1
        }
    }
}

and an example is

> M <- rbind(c(1, 2, 3), c(1, 2, 2), c(2, 3, 2), c(2, 2, 3))

> lex_max2(M)
$lexmaxval
[1] 2 3 2

$index
[1] 3

Previous Solution

Similar idea to Onyambu's answer but using repeat rather than recursion

lex_max2 <- function(M) {
    i <- 1
    nc <- ncol(M)
    repeat {
        M <- M[max(M[, i]) == M[, i], ]
        if (length(M) == nc) {
            return(M)
        } else {
            i <- i + 1
        }
    }
}

and you can see a bit speed improvement

> set.seed(0)

> M1 <- matrix(sample(500, 1e6, TRUE), ncol = 100)

> microbenchmark(
+     lex_max(M1),
+     lex_max2(M1),
+     check = "equal"
+ )
Unit: microseconds
         expr  min    lq    mean median     uq    max neval
  lex_max(M1) 67.3 88.10 154.193  90.45 101.30 5700.1   100
 lex_max2(M1) 60.0 85.75  94.461  87.80  97.15  158.9   100

> M2 <- matrix(sample(500, 1e7, TRUE), 500)

> microbenchmark(
+     lex_max(M2),
+     lex_max2(M2),
+     check = "equal"
+ )
Unit: microseconds
         expr   min     lq    mean median     uq   max neval
  lex_max(M2) 135.9 187.00 232.651 200.25 254.95 597.1   100
 lex_max2(M2)  89.6 113.15 152.559 126.20 159.40 528.7   100