Use rle
, rep
, and split
:
a <- rle(x)
split(x, rep(seq_along(a$lengths), a$lengths))
# $`1`
# [1] 1 1 1
#
# $`2`
# [1] 2 2 2 2 2
#
# $`3`
# [1] 4 4
#
# $`4`
# [1] 2 2 2 2
In this, rle
computes the "run lengths" of the input vector. The result is a list
with lengths
and values
. We only need the lengths
, from which we can create a "grouping" variable on which we can split
the original vector.
Update: Benchmarks on a larger vector
I didn't benchmark the while
loop because it was taking too long to complete with this long vector.
library(microbenchmark)
set.seed(1)
x <- sample(1:5, 1e5, replace = TRUE)
fun1 <- function() {
a <- rle(x)
split(x, rep(seq_along(a$lengths), a$lengths))
}
fun2 <- function() {
splits = which(diff(x) != 0)
split.locs = rbind(c(1, splits+1), c(splits, length(x)))
apply(split.locs, 2, function(y) x[y[1]:y[2]])
}
fun3 <- function() split(x, c(0, cumsum(as.logical(diff(x)))))
microbenchmark(fun1(), fun2(), fun3(), times = 20)
# Unit: milliseconds
# expr min lq median uq max neval
# fun1() 142.0386 147.7061 154.2853 158.0239 196.4665 20
# fun2() 363.5707 386.0575 423.1791 444.4695 543.9427 20
# fun3() 305.5331 316.0356 320.5203 329.7177 376.3236 20