Elegant way to count number of elements in each column of a matrix that are greater than those in every other column?

Question 1

Edit:

See below for details but you can do this in a single apply loop:

apply( mat , 2 , function(x) colSums( x > mat )

apply is fast here, because is optimised to work on matrices. A lot of the time spent using apply is usually in the conversion of a data.frame to a matrix which is not needed here.

Original:

It's possible to do this entirely vectorised because > has an method for matrices and data.frames. Therefore you can get the indices of columns to compare using expand.grid(), use this to subset the matrix, do the logical comparison and then use colSums to get the result and matrix to wrap it back up to the correct size. All this in 4 lines (really it could be 2):

n <- 1:ncol(mat)
ind <- expand.grid(n,n)
out <- colSums( mat[,c(ind[,1])] > mat[,c(ind[,2])] )
   
matrix( out , ncol(mat) , byrow = TRUE )
#     [,1] [,2] [,3] [,4] [,5]
#[1,]    0    1    0    0    0
#[2,]    2    0    1    1    2
#[3,]    3    2    0    2    2
#[4,]    2    3    1    0    1
#[5,]    3    2    1    1    0

Update:

apply seems even faster, and combining apply with @Ricardo's comparison of the whole matrix leads us to a single, fastest (?) apply solution which is approximately 4 times quicker than the OP:

#  Single apply loop
f1 <- function(mat) apply( mat , 2 , function(x) colSums( x > mat ) )

#  OP double apply loop
f2 <- function(mat) {apply(mat, MARGIN = 2,
                function(x) {
                  return(apply(mat, MARGIN = 2, function(y) {
                    return(sum(x > y))
                  }))})}

require(microbenchmark)
microbenchmark( f1(mat) , f2(mat) )

#Unit: microseconds
#    expr     min       lq   median       uq      max neval
# f1(mat)  95.190  97.6405 102.7145 111.4635  159.584   100
# f2(mat) 361.862 370.7860 398.7830 418.3810 1336.506   100

Question 2

I think the results you have are transposed:

## This gives you what you show as output
sapply(seq(ncol(mat)), function(i) colSums(mat[, i] > mat))

## This gives you what you _describe_ in the question
t(sapply(seq(ncol(mat)), function(i) colSums(mat[, i] > mat)))

     [,1] [,2] [,3] [,4] [,5]
[1,]    0    2    3    2    3
[2,]    1    0    2    3    2
[3,]    0    1    0    1    1
[4,]    0    1    2    0    1
[5,]    0    2    2    1    0

Question 3

Here's a way using only 1 apply, but replacing the other with sweep, so not sure it counts:

apply(mat,2,function(x) colSums(sweep(mat,1,x,"-")<0))
     [,1] [,2] [,3] [,4] [,5]
[1,]    0    1    0    0    0
[2,]    2    0    1    1    2
[3,]    3    2    0    2    2
[4,]    2    3    1    0    1
[5,]    3    2    1    1    0

Question 4

Benchmarking:

 bigmat<-matrix(sample(0:5,200,rep=T),nr=10)
    gridfoo <- function(mat) {
    n <- 1:ncol(mat)
    ind <- expand.grid(n,n)
    out <- colSums( mat[,c(ind[,1])] > mat[,c(ind[,2])] )
    }

    appfoo<- function(mat) apply(mat,2,function(x) colSums(sweep(mat,1,x,"-")<0))


    app2foo<- function(mat) t(sapply(seq(ncol(mat)), function(i) colSums(mat[, i] > mat)))

 microbenchmark(gridfoo(bigmat),appfoo(bigmat),app2foo(bigmat),times=10)
Unit: microseconds
            expr      min       lq    median       uq      max neval
 gridfoo(bigmat)  363.909  369.895  381.4410  413.086  522.557    10
  appfoo(bigmat) 1208.892 1231.129 1238.1850 1252.083 1521.913    10
 app2foo(bigmat)  298.482  310.883  317.0835  323.284  762.454    10

But...(notice difference in time unit)

Rgames> bigmat<-matrix(sample(0:5,20000,rep=T),nr=100)
Rgames> microbenchmark(gridfoo(bigmat),appfoo(bigmat),app2foo(bigmat),times=10)
Unit: milliseconds
            expr       min        lq   median        uq       max neval
 gridfoo(bigmat) 106.15115 112.98458 149.5746 183.87987 249.35418    10
  appfoo(bigmat) 127.44553 127.92874 132.5372 136.42562 199.12123    10
 app2foo(bigmat)  14.64483  14.99676  18.6089  20.51824  20.91122    10

Question 5

This works, but it might not scale well because of the heavy merge step.

library(reshape2)
matmelted <- melt(mat)
matmeltedcross<- merge(matmelted,matmelted,by = 'Var1', allow.cartesian = TRUE)
matmeltedcross$count <- matmeltedcross$value.x > matmeltedcross$value.y
mat.pm <- with(
  matmeltedcross[matmeltedcross$count == TRUE,],
  table(Var2.y,Var2.x)
)

Output -

> mat.pm
      Var2.x
Var2.y 1 2 3 4 5
     1 0 1 0 0 0
     2 2 0 1 1 2
     3 3 2 0 2 2
     4 2 3 1 0 1
     5 3 2 1 1 0