Here is an implementation based on your findInterval
suggestion which is 5-6 times faster than classical cut
:
cut2 <- function(x, breaks) {
labels <- paste0("(", breaks[-length(breaks)], ",", breaks[-1L], "]")
return(factor(labels[findInterval(x, breaks)], levels=labels))
}
library(microbenchmark)
set.seed(1)
data <- rnorm(1e4, mean=0, sd=1)
microbenchmark(cut.default(data, my_breaks), cut2(data, my_breaks))
# Unit: microseconds
# expr min lq median uq max neval
# cut.default(data, my_breaks) 3011.932 3031.1705 3046.5245 3075.3085 4119.147 100
# cut2(data, my_breaks) 453.761 459.8045 464.0755 469.4605 1462.020 100
identical(cut(data, my_breaks), cut2(data, my_breaks))
# TRUE