Here is a slightly convoluted solution using cumsum()
and data.table()
- making use of the .SD
object to only flag "Flags" which have a color following. I'm sure it could be made more concise with a bit of thought.
6.24sec for 650k rows
require(data.table)
# function to return leading 1 and trailing 0s for each instance of flag
# no 1 returned for single instance (duplicate)
get_s<-function(x){
ifelse(x==1,
y<-c(0),
y<-c(1,rep(0,x-1))
)
return(y)
}
system.time({
df<-data.frame(V1=sample(c("red", "white", "blue", "Flag", "yellow", "black"),650000,T)) #650k rows
df$V2<-cumsum(ifelse(df$V1=="Flag",1,0)) #index each "Flag"
df$V2<-cumsum(data.table(df,key="V2")[,list(get_s(nrow(.SD))),by="V2"][,V1]) #return 1 for Flags with following color
})
#user system elapsed
#6.16 0.06 6.24