سؤال

I am having issues with for loops that calculate and run lags for the coefficient of variation of precipitation. I'm not quite sure how to generalize the question so I've added all the steps I've taken so far.

My main dataset "d" looks like this:

      row.names timestamp  station      year   month   ndvi  landcover    altitude precipitation
1         1         1      A            2000   jan   0.4138  Mixed forest     2143          16.0
2      1769         2      A            2000   feb   0.4396  Mixed forest     2143           4.0

I would like to find the effects of lag 0:10 of the coefficient of variation of precipitation on the max ndvi of a year per station. Basically my code looks like this:

r <- aggr(d,c("station","landcover","year"), c("altitude=mean(altitude)","max.ndvi=NA","max.month=NA","max.timestamp=NA","max.precipitation=NA", "cv=NA"))


head(r)
    station    landcover year altitude max.ndvi max.month max.timestamp max.precipitation cv
1     A      Mixed forest 2000     2143       NA        NA            NA          NA     NA
2     A      Mixed forest 2001     2143       NA        NA            NA          NA     NA

for(i in 1:nrow(r)) {
  tmp <- d[d$station==r$station[i] & d$year==r$year[i],]
  idx <- which.max(tmp$ndvi);
  r$max.month[i] <- as.character(tmp$month[idx]);   
  r$max.ndvi[i] <- tmp$ndvi[idx];
  r$max.timestamp[i] <- tmp$timestamp[idx];
  r$max.precipitation[i] <- tmp$precipitation[idx];
  r$cv[i] <- sd(tmp$precipitation, na.rm = TRUE)/mean(tmp$precipitation, na.rm = TRUE) 
}

for(lag in 0:10) {
  cat("\n\n***** lag =",lag,"*****\n\n");
  for(i in 1:nrow(r)) {
    timestamp <- r$max.timestamp[i]-lag;
    if(timestamp>0){
    r$cv[i] <- r$cv[d$station==r$station[i] & d$timestamp==timestamp];
    }
  }
  r <- na.omit(r)
  print(summary(aov(max.ndvi~cv, data=r)));
  for(lu in sort(unique(as.character(r$landcover)))) {
  cat("\n----------------- Analysis for LU =",lu,"\n\n");
  print(summary(aov(max.ndvi~cv,data=r[r$landcover==lu,])));
  }
}

The problem I am getting is with the last part which assigns/loops the lags for every max.ndvi value. I would like a summary for each lag over all rows as well as a summary per land cover type.

I have tried various different combinations, but I keep getting errors. For the above code I get this error:

Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) : 
  0 (non-NA) cases 

Can anyone offer any advice?

Thanks a lot.

هل كانت مفيدة؟

المحلول

I think some of your cv lags have all missing values (at the landcover level of detail). This code requires that the cv.lags (within the landcover class) have at least 3 observations.

#Create fake dataset in your same mold
set.seed(1)
d <- data.frame(row.names=1:40,timestamp=rep(1:10,4),station=c(rep("A",10),rep("B",10),rep("C",10),rep("D",10)),
                year=rep(c(2000,2000,2000,2001,2001,2001,2001,2002,2002,2002),4),
                month=rep(c("jan","feb","mar","april","may","jun","jul","aug","sep","oct"),4),ndvi=runif(40,min=0.3,max=0.6),
                landcover=c(rep("Mixed Forest",10),rep("Sand",10),rep("other1",10),rep("other2",10)),altitude=runif(40,min=1500,max=5000), 
                precipitation=runif(40,min=2,max=20))

#Convert to data.table
require("data.table")
d <- data.table(d)

##Create your desired variables
r <- d[,list(altitude=mean(altitude,na.rm=T),
             max.ndvi=max(ndvi,na.rm=T),
             max.month=month[ndvi==max(ndvi,na.rm=T)],
             max.timestamp=timestamp[ndvi==max(ndvi,na.rm=T)],
             max.precipitation=precipitation[ndvi==max(ndvi,na.rm=T)],
             cv=sd(precipitation,na.rm=T)/mean(precipitation,na.rm=T)
),by=c("station","landcover","year")]

##Create lagged variables
r[, c(paste("cv.lag", 1:10, sep="")) := lapply(1:10, function(i) c(rep(NA, i), head(cv, -i))), by=list(station,landcover)]

#Create list of the cv variable positions plus station,landcover, and year positions
pos <- grep("cv", names(r))
pos <- c(1:3,pos)

#Melt lagged variables
require("reshape2")
r.long <- melt(r[,pos,with=F],id.vars=c("station","landcover","year"),variable.name="cv",value.name="cv.val")

#Merge back on max ndvi
r.long <- merge(r.long,r[,list(station,landcover,year,max.ndvi)],by=c("station","landcover","year"),all.x=T)

#Only use landcovers and lags with at least 3 non-missing observations
r.ref <- r.long[,list(Obs=sum(is.na(cv.val)==0)),by=c("landcover","cv")][Obs>2]
r.long <- merge(r.ref,r.long,by=c("landcover","cv"))

#Print out anovas
r.long[,print(summary(aov(max.ndvi~cv.val))),by=c("landcover","cv")]

#If you just care about pvalues, use this code
lmp <- function (modelobject) {
  if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
  f <- summary(modelobject)$fstatistic
  p <- pf(f[1],f[2],f[3],lower.tail=F)
  attributes(p) <- NULL
  return(p)
}

#Print out results
r.long[,list(PVAL=lmp(lm(max.ndvi~cv.val))),by=c("landcover","cv")]
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top