It turns out that the fastest solution from the above-mentioned evals
is
e <- parse(text = paste0("list(", time.col,",", "Value=cumsum(Close))"))
DT[, eval(e),by=ID]
However, :=
solution is even faster. See also Arun's note regarding copying.
Dataset
dim(DT); object.size(DT); DT
[1] 1354402 8
81291568 bytes
Instrument Date Open High Low Close Volume Adjusted Close
1: GOOG/AMEX_ABI 1981-03-11 NA NA 6.56 6.75 217200 NA
2: GOOG/AMEX_ABI 1981-03-12 NA NA 6.66 6.88 616400 NA
3: GOOG/AMEX_ABI 1981-03-13 NA NA 6.81 6.84 462000 NA
4: GOOG/AMEX_ABI 1981-03-16 NA NA 6.81 7.00 306400 NA
5: GOOG/AMEX_ABI 1981-03-17 NA NA 6.88 6.88 925600 NA
---
1354398: YAHOO/TSX_AMM_TO 2014-04-24 1.56 1.58 1.56 1.58 2700 1.58
1354399: YAHOO/TSX_AMM_TO 2014-04-25 1.60 1.62 1.59 1.62 11000 1.62
1354400: YAHOO/TSX_AMM_TO 2014-04-28 1.59 1.61 1.54 1.54 7200 1.54
1354401: YAHOO/TSX_AMM_TO 2014-04-29 1.58 1.60 1.58 1.59 500 1.59
1354402: YAHOO/TSX_AMM_TO 2014-04-30 1.55 1.55 1.50 1.52 36800 1.52
Benchmarking
time.col <- "Date"
fun <- function(){
out <- DT[, list(get(time.col), Value=cumsum(Close)),by=Instrument]
setnames(out, "V1", time.col)
}
fun2 <- function() {
DT[, Value := cumsum(Close), by=Instrument]
out <- DT[ , c("Instrument", ..time.col, "Value")]
DT[, Value:=NULL] # cleanup
out
}
fun2. <- function() {
DT[, Value := cumsum(Close), by=Instrument]
# out <- DT[,c("Instrument", ..time.col, "Value")]
# DT[, Value:=NULL] # cleanup
# out
}
fun3 <- function() {
DT[,list( eval(as.name(time.col),envir=.SD), Value=cumsum(Close)),by=Instrument]
}
fun4 <- function() {
e <- parse(text = paste0("list(", time.col,",", "Value=cumsum(Close))"))
DT[, eval(e),by=Instrument]
}
Result
library(rbenchmark)
benchmark(fun(),
fun2(),
fun3(),
fun4(),
replications=200)
test replications elapsed relative user.self sys.self user.child sys.child
1 fun() 200 5.40 1.327 5.29 0.11 NA NA
2 fun2() 200 5.18 1.273 4.72 0.45 NA NA
3 fun2.() 200 2.70 1.000 2.70 0.00 NA NA
3 fun3() 200 4.12 1.012 3.90 0.22 NA NA
4 fun4() 200 4.07 1.000 3.91 0.16 NA NA