Running a time series lag in for loops in R

Question

I think some of your cv lags have all missing values (at the landcover level of detail). This code requires that the cv.lags (within the landcover class) have at least 3 observations.

#Create fake dataset in your same mold
set.seed(1)
d <- data.frame(row.names=1:40,timestamp=rep(1:10,4),station=c(rep("A",10),rep("B",10),rep("C",10),rep("D",10)),
                year=rep(c(2000,2000,2000,2001,2001,2001,2001,2002,2002,2002),4),
                month=rep(c("jan","feb","mar","april","may","jun","jul","aug","sep","oct"),4),ndvi=runif(40,min=0.3,max=0.6),
                landcover=c(rep("Mixed Forest",10),rep("Sand",10),rep("other1",10),rep("other2",10)),altitude=runif(40,min=1500,max=5000), 
                precipitation=runif(40,min=2,max=20))

#Convert to data.table
require("data.table")
d <- data.table(d)

##Create your desired variables
r <- d[,list(altitude=mean(altitude,na.rm=T),
             max.ndvi=max(ndvi,na.rm=T),
             max.month=month[ndvi==max(ndvi,na.rm=T)],
             max.timestamp=timestamp[ndvi==max(ndvi,na.rm=T)],
             max.precipitation=precipitation[ndvi==max(ndvi,na.rm=T)],
             cv=sd(precipitation,na.rm=T)/mean(precipitation,na.rm=T)
),by=c("station","landcover","year")]

##Create lagged variables
r[, c(paste("cv.lag", 1:10, sep="")) := lapply(1:10, function(i) c(rep(NA, i), head(cv, -i))), by=list(station,landcover)]

#Create list of the cv variable positions plus station,landcover, and year positions
pos <- grep("cv", names(r))
pos <- c(1:3,pos)

#Melt lagged variables
require("reshape2")
r.long <- melt(r[,pos,with=F],id.vars=c("station","landcover","year"),variable.name="cv",value.name="cv.val")

#Merge back on max ndvi
r.long <- merge(r.long,r[,list(station,landcover,year,max.ndvi)],by=c("station","landcover","year"),all.x=T)

#Only use landcovers and lags with at least 3 non-missing observations
r.ref <- r.long[,list(Obs=sum(is.na(cv.val)==0)),by=c("landcover","cv")][Obs>2]
r.long <- merge(r.ref,r.long,by=c("landcover","cv"))

#Print out anovas
r.long[,print(summary(aov(max.ndvi~cv.val))),by=c("landcover","cv")]

#If you just care about pvalues, use this code
lmp <- function (modelobject) {
  if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
  f <- summary(modelobject)$fstatistic
  p <- pf(f[1],f[2],f[3],lower.tail=F)
  attributes(p) <- NULL
  return(p)
}

#Print out results
r.long[,list(PVAL=lmp(lm(max.ndvi~cv.val))),by=c("landcover","cv")]