Vectorisation of for-loops for matrix subset comparisons

Question 1

First, you should really not rbind the labels c('A', 'B', 'C') to your dataframe, because this causes all the numbers in foo to become strings, not numbers! Keep them separate (you never use that first row of foo in your code, anyway).

I can think of a few ways to do this, and I'm sure there are others I haven't thought of too.

First I'll make a matrix that's like yours but just without the c('A', 'B', 'C') so that my numbers are actually numbers, not characters.

foo <- data.frame(matrix(rexp(150, rate=.1), ncol=15))
labels <- c('A', 'B', 'C')
colnames(foo) <- make.unique(rep(paste("ID", c(1:5), sep=""), rep(3, 5)))

First method I can think of (pretty direct) - flatten your dataframe to a vector and find the max of every 3 values, then reshape back into the shape you wanted res to be.

foo.flat <- as.vector(t(foo)) # transpose as R is column-wise and I want row-wise
# split(foo.flat, ceiling(1:length(foo.flat)/3)) # splits into chunks of 3, so:
ms <- vapply(split(foo.flat, ceiling(1:length(foo.flat)/3)),
             which.max, # function to apply to each chunk of 3
             -1, # template value for vapply
             USE.NAMES=F)

Now just convert 1 to A, 2 to B, 3 to C and reshape back into matrix (res):

res <- matrix(labels[ms], byrow=T, ncol=ncol(foo)/3)

Second method I can think of - reshape your matrix to long form (reshape2) and use plyr to do the calculation for each (row, ID). (maybe more elegant but more confusing?, up to you)

foo$observation <- 1:nrow(foo)
library(reshape2)
foo.long <- melt(foo, id='observation', variable.name='ID')
# fix IDs, i.e. ID1.2 --> ID1 etc
foo.long$ID <- gsub('\\.[1-9]+$', '', foo.long$ID)
# > head(foo.long[order(foo.long$observation, foo.long$ID),])
#    observation  ID     value
# 1            1 ID1 15.751959
# 11           1 ID1 20.386724
# 21           1 ID1  9.423799
# 31           1 ID2  4.560623
# 41           1 ID2  1.140642
# 51           1 ID2 37.009728

observation is just the row each number came from, with ID being the ID. Now for each (observation, variable) find the index of the maximum.

library(plyr)
intermediate <- ddply(foo.long, .(observation, ID), function (x) which.max(x$value))
> head(intermediate)
#  observation  ID V1
# 1           1 ID1  2
# 2           1 ID2  3
# 3           1 ID3  3
# 4           1 ID4  2
# 5           1 ID5  3
# 6           2 ID1  1

Now just reshape the V1 column back to a matrix (converting indices to your labels)

res <- matrix(labels[intermediate$V1], byrow=T, ncol=floor(ncol(foo)/3)))

You could do something similar with data.table as well which could be faster depending on the size of your matrix.

Question 2

Suggested data structure

First, having headings and sub-headings isn't really ideal for automation and is prone to problems. I'd break it up to a list of IDs, each of which is a dataframe of the three runs. (We'll add the ID1 naming convention at the end, if it's necessary.)

set.seed(1234)
foo1 <- lapply(1:5,
               function(id) data.frame(matrix(rexp(30, rate=.1), ncol=3)))
head(foo1[[1]], n=3)
##           X1       X2        X3
## 1 25.01758605 18.80077 19.962787
## 2  2.46758883 15.96105  7.283865
## 3  0.06581957 16.58662  3.835416

This greatly facilitates the *apply family of functions. This first batch operates on each list element and converts it into a single column

foo2 <- lapply(foo1, function(ff) apply(ff, 1, which.max))
head(foo2, n=2)
## [[1]]
##  [1] 1 2 2 2 2 3 2 3 2 3
## 
## [[2]]
##  [1] 3 2 2 3 3 2 1 1 3 3

Now it's easy enough to combine these into one data.frame:

foo3 <- Reduce(cbind, foo2)
head(foo3, n=3)
##      init        
## [1,]    1 3 1 2 3
## [2,]    2 2 3 2 3
## [3,]    2 2 2 2 2

Lastly, let's put it back into letter-mode (if you must), and add the column names (again, if you must):

foo4 <- apply(foo3, c(1,2), function(x) c('A','B','C')[x])
colnames(foo4) <- paste0('ID', seq.int(ncol(foo4)))
head(foo4, n=3)
##      ID1 ID2 ID3 ID4 ID5
## [1,] "A" "C" "A" "B" "C"
## [2,] "B" "B" "C" "B" "C"
## [3,] "B" "B" "B" "B" "B"

Your data structure

Assuming we must use it, I'll still break it up neatly into element-size, and continue with the *apply stuff:

# Mock data
set.seed(1234)
foo5 <- data.frame(matrix(rexp(150, rate=.1), ncol=15))
head(foo5[,1:5], n=3)
##            X1       X2        X3          X4        X5
## 1 25.01758605 18.80077 19.962787  4.34543487  1.291397
## 2  2.46758883 15.96105  7.283865  0.09091824 20.895804
## 3  0.06581957 16.58662  3.835416 16.10286033 25.188229

Instead of trying to loop through everything, how about subsetting the data.frame into smaller chunks:

foo6 <- lapply(seq(1, ncol(foo5), by=3),
               function(ii) foo5[,ii:(ii+2)])

... and then use the rest of the code above to do the rest.

foo7 <- Reduce(cbind,
               lapply(foo6, function(ff) apply(ff, 1, which.max)))
foo8 <- apply(foo7, c(1,2), function(x) c('A','B','C')[x])
colnames(foo8) <- paste0('ID', seq.int(ncol(foo8)))
head(foo8, n=3)
##      ID1 ID2 ID3 ID4 ID5
## [1,] "A" "C" "A" "B" "C"
## [2,] "B" "B" "C" "B" "C"
## [3,] "B" "B" "B" "B" "B"

(The way I work some of these problems, I'd really like it if SO allowed Rmd files or at least full-up markdown.)

Question 3

I think this problem seems hard because you have your data in wide-wide format. I would use reshape2 first, then it doesn't seem so hard, we can just use which.max to do the work:

foo <- data.frame(matrix(rexp(150, rate=.1), ncol=15))
foo <- rbind(c("A","B","C"), foo)
colnames(foo) <- paste0("ID", rep(1:5, each=3), rep(LETTERS[1:3], times=5))

require(reshape2)

#make an id variable
foo$id <- 1:nrow(foo)

foo.melt <- melt(foo, "id")

#take apart ID1A into two seperate variables
foo.melt$num <- rep(1:5, each=3)[foo.melt$variable]
foo.melt$rep <- rep(1:3, times=5)[foo.melt$variable]

res <- do.call(rbind, by(foo.melt, interaction(foo.melt$id, foo.melt$num),
       function(x) {
           id <- x[1,"id"]
           num <- x[1,"num"]
           #which.max gets us the index of the max, look it up and get a letter.
           type <- LETTERS[x[which.max(x$value), "rep"]]
           data.frame(id=id, num=num, type=type);
       }
       )
)
dcast(res, id~num)

Giving us:

R>dcast(res, id~num)
Using type as value column: use value.var to override.
   id 1 2 3 4 5
1   1 A C A A B
2   2 C A B C C
3   3 C B A A B
4   4 B C C A C
5   5 A C B B C
6   6 A B A C B
7   7 B B B A A
8   8 A C A A B
9   9 A B C C B
10 10 A B C A B