This is a tricky problem. I have found that most base R solutions for these "rolling calc" type of problems are way way too slow for data of any significant size. I have had lots of luck by using the data.table
package (especially if speed is an issue). I included the parallel
package just in case you have a ton of observations and you need to do this faster. It is set to mc.cores=1
right now, but if you are running Mac or Linux, you can obviously up it.
lanec <- structure(list(vehicle.id = c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), frame.id = c(1L, 2L, 3L, 4L, 5L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 6L, 7L, 8L, 9L, 10L, 11L,
12L), svel = c(55, 75, 53, 50, 32, 49, 55, 55, 43, 45, 52, 50,
38, 42, 45, 48, 50, 52, 55, 49, 52, 54, 58, 60, 63, 70), PrecVehVel = c(59,
59, 57, 54, 52, 53, 59, 59, 47, 49, 56, 54, 42, 46, 49, 52, 54,
56, 59, 53, 56, 58, 62, 64, 67, 74)), .Names = c("vehicle.id",
"frame.id", "svel", "PrecVehVel"), class = "data.frame", row.names = c(NA,
-26L))
#Load data.table package
require("data.table")
require("parallel")
data <- data.table(lanec)
#What length of correlation vector do you want?
cor.vec <- 2
##Assign each customer an ID
data[,ID:=.GRP,by=c("vehicle.id")]
##Group values at the list level
Ref <- data[,list(frame.id=list(I(frame.id)),svel.list=list(I(svel)),PrecVehVel.list=list(I(PrecVehVel))),by=list(vehicle.id)]
#Calculate rolling calculation
data$Roll.Corr <- mcmapply(FUN = function(RD, NUM) {
#mcmapply is a multicore version of mapply. If running Linux or Mac, you can up the amount of cores and have the code run faster
#d is the difference between the current frame.id and all other frame.id's within the same vehicle id.
d <- (Ref$frame.id[[NUM]] - RD)
#The following checks whether d is within the "window" you want. If not in the desired "window", then svel1 and prec1 will have zero values. If in desired "window", then its value will be the respective "svel" and "prec" value in original data.
svel1 <- (d >= 0 & d <= cor.vec)*Ref$svel.list[[NUM]]
prec1 <- (d >= 0 & d <= cor.vec)*Ref$PrecVehVel.list[[NUM]]
#Following discards all data points not in sliding "window" (deletes all of the zeros)
keep <- which(d >= 0 & d <= cor.vec)
svel1 <- svel1[keep]
prec1 <- prec1[keep]
#Following makes sure a correlation value is only provided if the number of points within the window is larger than the correlation "window" length
if (length(svel1)>cor.vec){
cor(svel1,prec1)
} else {
NA
}
}, RD = data$frame.id,NUM=data$ID,mc.cores=1)
#Print data
data[,frame.start:=ifelse(is.na(Roll.Corr),NA,frame.id)]
data[,frame.end:=ifelse(is.na(Roll.Corr),NA,frame.id+cor.vec)]
head(data,10)
vehicle.id frame.id svel PrecVehVel ID Roll.Corr frame.start frame.end
1: 2 1 55 59 1 0.5694948 1 3
2: 2 2 75 59 1 0.8635894 2 4
3: 2 3 53 57 1 0.8746393 3 5
4: 2 4 50 54 1 NA NA NA
5: 2 5 32 52 1 NA NA NA
6: 3 3 49 53 2 1.0000000 3 5
7: 3 4 55 59 2 1.0000000 4 6
8: 3 5 55 59 2 1.0000000 5 7
9: 3 6 43 47 2 1.0000000 6 8
10: 3 7 45 49 2 1.0000000 7 9