If you have multiple cores available to you, why not leverage the fact that you can quickly filter & group rows in a data.table using its key:
library(doMC)
registerDoMC(cores=4)
setkey(dt, "a")
finalRowOrderMatters = FALSE # FALSE can be faster
foreach(x=unique(dt[["a"]]), .combine="rbind", .inorder=finalRowOrderMatters) %dopar%
dt[.(x) ,list(b = paste(b, collapse=""), d = paste(d, collapse=""), e = e[[1]])]
Note that if the number of unique groups (ie length(unique(a))
) is relatively small, it will be faster to drop the .combine
argument, get the results back in a list, then call rbindlist
on the results. In my testing on two cores & 8GB RAM, the threshold was at about 9,000 unique values. Here is what I used to benchmark:
# (otion a)
round(rowMeans(replicate(3, system.time({
# ------- #
foreach(x=unique(dt[["a"]]), .combine="rbind", .inorder=FALSE) %dopar%
dt[.(x) ,list(b = paste(b, collapse=""), d = paste(d, collapse=""), e = e[[1]])]
# ------- #
}))), 3)
# [1] 1.243 elapsed for N == 1,000
# [1] 11.540 elapsed for N == 10,000, length(unique(dt[["a"]])) == 8617
# [1] 57.404 elapsed for N == 50,000
# (otion b)
round(rowMeans(replicate(3, system.time({
# ------- #
results <-
foreach(x=unique(dt[["a"]])) %dopar%
dt[.(x) ,list(b = paste(b, collapse=""), d = paste(d, collapse=""), e = e[[1]])]
rbindlist(results)
# ------- #
}))), 3)
# [1] 1.117 elapsed for N == 1,000
# [1] 10.567 elapsed for N == 10,000, length(unique(dt[["a"]])) == 8617
# [1] 76.613 elapsed for N == 50,000
## And used the following to create the dt
N <- 5e4
set.seed(1)
a = sample(1:N, N*2, replace = TRUE)
b = sample(c("3m","2m2d2m","3m2d1i3s2d","5m","4m","9m","1m"), N*2, replace = TRUE)
d = sample(c("3m","2m2d2m","3m2d1i3s2d","5m","4m","9m","1m"), N*2, replace = TRUE)
e = a
dt = data.table(a = a, b = b, d = d, e = e, key="a")