How to find correlation for every n rows in R?

Question 1

This is a tricky problem. I have found that most base R solutions for these "rolling calc" type of problems are way way too slow for data of any significant size. I have had lots of luck by using the data.table package (especially if speed is an issue). I included the parallel package just in case you have a ton of observations and you need to do this faster. It is set to mc.cores=1 right now, but if you are running Mac or Linux, you can obviously up it.

lanec <- structure(list(vehicle.id = c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L), frame.id = c(1L, 2L, 3L, 4L, 5L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L), svel = c(55, 75, 53, 50, 32, 49, 55, 55, 43, 45, 52, 50, 
38, 42, 45, 48, 50, 52, 55, 49, 52, 54, 58, 60, 63, 70), PrecVehVel = c(59, 
59, 57, 54, 52, 53, 59, 59, 47, 49, 56, 54, 42, 46, 49, 52, 54, 
56, 59, 53, 56, 58, 62, 64, 67, 74)), .Names = c("vehicle.id", 
"frame.id", "svel", "PrecVehVel"), class = "data.frame", row.names = c(NA, 
-26L))

#Load data.table package
require("data.table")
require("parallel")
data <- data.table(lanec)

#What length of correlation vector do you want?
cor.vec <- 2

##Assign each customer an ID
data[,ID:=.GRP,by=c("vehicle.id")]

##Group values at the list level
Ref <- data[,list(frame.id=list(I(frame.id)),svel.list=list(I(svel)),PrecVehVel.list=list(I(PrecVehVel))),by=list(vehicle.id)]

#Calculate rolling calculation
data$Roll.Corr <- mcmapply(FUN = function(RD, NUM) {

  #mcmapply is a multicore version of mapply. If running Linux or Mac, you can up the amount of cores and have the code run faster

  #d is the difference between the current frame.id and all other frame.id's within the   same vehicle id.
  d <- (Ref$frame.id[[NUM]] - RD)

  #The following checks whether d is within the "window" you want. If not in the desired "window", then svel1 and prec1 will have zero values. If in desired "window", then its value will be the respective "svel" and "prec" value in original data.
  svel1 <- (d >= 0 & d <= cor.vec)*Ref$svel.list[[NUM]]
  prec1 <- (d >= 0 & d <= cor.vec)*Ref$PrecVehVel.list[[NUM]]

  #Following discards all data points not in sliding "window" (deletes all of the zeros)
  keep <- which(d >= 0 & d <= cor.vec)
  svel1 <- svel1[keep]
  prec1 <- prec1[keep]

  #Following makes sure a correlation value is only provided if the number of points within the window is larger than the correlation "window" length
  if (length(svel1)>cor.vec){
      cor(svel1,prec1) 
    } else {
      NA
    }   
}, RD = data$frame.id,NUM=data$ID,mc.cores=1)

#Print data
data[,frame.start:=ifelse(is.na(Roll.Corr),NA,frame.id)]
data[,frame.end:=ifelse(is.na(Roll.Corr),NA,frame.id+cor.vec)]
head(data,10)

    vehicle.id frame.id svel PrecVehVel ID Roll.Corr frame.start frame.end
 1:          2        1   55         59  1 0.5694948           1         3
 2:          2        2   75         59  1 0.8635894           2         4
 3:          2        3   53         57  1 0.8746393           3         5
 4:          2        4   50         54  1        NA          NA        NA
 5:          2        5   32         52  1        NA          NA        NA
 6:          3        3   49         53  2 1.0000000           3         5
 7:          3        4   55         59  2 1.0000000           4         6
 8:          3        5   55         59  2 1.0000000           5         7
 9:          3        6   43         47  2 1.0000000           6         8
10:          3        7   45         49  2 1.0000000           7         9

Question 2

You may try a rolling correlation using rollapply from package zoo:

library(zoo)
ddply(lanec, 'vehicle.id', function(dat){
  dat$speed.cor = rollapply(data = dat, width = 3,
                            FUN = function(x) cor(x[ , "svel"], x[ , "PrecVehVel"]),
                            by.column = FALSE, fill = NA)
  dat
})

Note that the window width is fixed to 3. Thus, this alternative will not give you the last '2 frame' correlation.

Edit following comment from OP. This may be closer to your desired output. I keep the first and last frame in separate columns. Easy to paste together if you wish.

ddply(lanec, 'vehicle.id', function(dat){
  speed.cor <- rollapply(data = dat, width = 3,
                         FUN = function(x) cor(x[ , "svel"], x[ , "PrecVehVel"]),
                         by.column = FALSE)
  len <- length(speed.cor)
  vehicle.id <- head(dat$vehicle.id, len)
  first.frame.id <- head(dat$frame.id, len)
  last.frame.id <- tail(dat$frame.id, len)
  data.frame(vehicle.id, first.frame.id, last.frame.id, speed.cor)
})

#    vehicle.id first.frame.id last.frame.id speed.cor
# 1           2              1             3         1
# 2           2              2             4         1
# 3           2              3             5         1
# 4           3              3             5         1
# 5           3              4             6         1
# 6           3              5             7         1
# 7           3              6             8         1
# 8           3              7             9         1
# 9           4              1             3         1
# 10          4              2             4         1
# 11          4              3             5         1
# 12          4              4             6         1
# 13          4              5             7         1
# 14          5              6             8         1
# 15          5              7             9         1
# 16          5              8            10         1
# 17          5              9            11         1
# 18          5             10            12         1

Question 3

This makes a matrix of which rows should get used in each group:

> bandSparse(5,4,-2:0)
5 x 4 sparse Matrix of class "ngCMatrix"

[1,] | . . .
[2,] | | . .
[3,] | | | .
[4,] . | | |
[5,] . . | |

We can apply over it's columns for each vehicle:

> vehicle2 <- subset(lanec, vehicle.id == 2)
> apply(bandSparse(5,4,-2:0), 2, function(i) list(r= cor(vehicle2[i,3:4]) ) )

Then we can do it for each vehicle:

by(lanec vehicle$vehicle.id, function(x) apply(bandSparse(nrow(x),nrow(x)-1,-2:0), 2, function(i) list(r= cor(x[i,3:4]) ) ) )