If you are looking for something like rbind.fill
but for ffdf objects. Maybe this is what you are looking for. This worked for me without memory issues on the test example Jan prepared.
require(ff)
require(ffbase)
smartffdfbind <- function(..., clone=TRUE){
x <- list(...)
columns <- lapply(x, FUN=function(x) colnames(x))
columns <- do.call(c, columns)
columns <- unique(columns)
for(element in 1:length(x)){
missingcolumns <- setdiff(columns, colnames(x[[element]]))
for(missingcolumn in missingcolumns){
x[[element]][[missingcolumn]] <- ff(NA, vmode = "logical", length = nrow(x[[element]]))
}
}
if(clone){
result <- clone(x[[1]][columns])
}else{
result <- x[[1]][columns]
}
for (l in tail(x, -1)) {
result <- ffdfappend(result[columns], l[columns], recode=TRUE)
}
result
}
ffdf1 <- ffdf(a = ffrandom(1E8, rnorm), b = ffrandom(1E8, rnorm))
ffdf2 <- ffdf(b = ffrandom(1E8, rnorm), c = ffrandom(1E8, rnorm))
x <- smartffdfbind(ffdf1, ffdf2)
nrow(x)
[1] 200000000
class(x)
"ffdf"