individual random effects model with standard errors clustered on a different variable in R (R-project)

Question

You may be interested in this: https://stats.stackexchange.com/questions/85909/why-does-a-fixed-effect-ols-need-unique-time-elements

Here's my solution for "within" models:

  #' Fixed effect cluster regression, estimated efficiently using plm()
  #' @param form The model formula.
  #' @param data The data.frame.
  #' @param index Character vector giving the column name indexing individual units.
  #' @param cluster Character vector giving the column name indexing clusters, or "robust" to avoid the bootstrap and just return robust SE.
  #' @param param A list of control parameters, with named elements as follows:  R is the number of bootstrap replicates. 
  #' @return Coefficients plus clustered standard errors
  feClusterRegress <- function( form, data, index, cluster = "robust", param = list( R = 30 ) ) {
    if( "data.table" %in% class(data) )  data <- as.data.frame(data) # Not ideal efficiency-wise since I re-convert it later but necessary until I generalize the code to data.tables (the plm call doesn't work with them, for instance)
    stopifnot( class(form)=="formula" )
    mdl <- plm( form, data = data, model = "within", effect="individual", index = index )
    if( cluster=="robust" ) {
      res <- summary( mdl, robust=TRUE )
    } else { # Bootstrap
      require(foreach)
      require(data.table)
      # Prepare data structures for efficient sampling
      clusters <- unique( data[[cluster]] )
      if( is.null(clusters) )  stop("cluster must describe a column name that exists!")
      clusterList <- lapply( clusters, function(x) which( data[[cluster]] == x ) )
      names(clusterList) <- clusters
      progressBar <- txtProgressBar( 0, param$R )
      # Convert to data.table and drop extraneous variables
      data <- as.data.table( data[ , c( all.vars(form), index ) ] ) # For faster sub-setting
      # Sample repeatedly
      coefList <- foreach( i = seq( param$R ) ) %dopar% {
        setTxtProgressBar( progressBar, i )
        clusterSample <- sample( as.character( clusters ), replace=TRUE )
        indexSample <- unlist( clusterList[ clusterSample ], use.names=FALSE )
        dataSample <- data[ indexSample, ]
        dataSample[ , fakeTime := seq(.N), by = index ] # fakeTime is necessary due to a potential bug in plm.  See https://stats.stackexchange.com/questions/85909/why-does-a-fixed-effect-ols-need-unique-time-elements
        try( coefficients( plm( form, data = as.data.frame(dataSample), model = "within", effect="individual", index = c( index, "fakeTime") ) ) )
      }
      failed <- vapply( coefList, function(x) class(x) == "try-error", FUN.VALUE=NA )
      if( any(failed) ) {
        warning( "Some runs of the regression function failed" )
        coefList <- coefList[ !failed ]
      }
      coefMat <- stack( coefList )
      SE <- apply( coefMat, 2, sd )
      res <- structure( 
        list( 
          cbind( coefficients( mdl ), SE ),
          model = mdl
        ),
        class = "feClusterPLM",
        R = param$R
      )
    }
    res         
  }

I suspect you actually need the variables, so instead of generating a fake time generate a "fake" group--just make up a new group identifier right after you grab each bootstrap sample.