Speed up `strsplit` when possible output are known

Question 1

This should probably offer quite an increase:

library(data.table)
DT <- data.table(data.df)


DT[, c("Species", "SizeClass", "Infected") 
      := as.list(strsplit(Class, "\\.")[[1]]), by=Class ]

The reasons for the increase:

data.table pre allocates memory for columns
every column assignment in data.frame reassigns the entirety of the data (data.table in contrast does not)
the by statement allows you to implement the strsplit task once per each unique value.

Here is a nice quick method for the whole process.

# Save the new col names as a character vector 
newCols <- c("Species", "SizeClass", "Infected") 

# split the string, then convert the new cols to columns
DT[, c(newCols) := as.list(strsplit(as.character(Class), "\\.")[[1]]), by=Class ]
DT[, c(newCols) := lapply(.SD, factor), .SDcols=newCols]

# remove the old column. This is instantaneous. 
DT[, Class := NULL]

## Have a look: 
DT[, lapply(.SD, class)]
#       Time Location Replicate Population Species SizeClass Infected
# 1: integer  integer   integer    numeric  factor    factor   factor

DT

Question 2

You could get a decent increase in speed by just extracting the parts of the string you need using gsub instead of splitting everything up and trying to put it back together:

data <- readRDS("~/Downloads/data.rds")
data.df <- reshape2:::melt.array(data)

# using `strsplit`
system.time({
cl <- which(names(data.df)=="Class")
Classes <- do.call(rbind, strsplit(as.character(data.df$Class), "\\."))
colnames(Classes) <- c("Species", "SizeClass", "Infected")
data.df <- cbind(data.df[,1:(cl-1)],Classes,data.df[(cl+1):(ncol(data.df))])
})

user  system elapsed 
3.349   0.062   3.411 

#using `gsub`
system.time({
data.df$Class <- as.character(data.df$Class)
data.df$SizeClass <- gsub("(\\w+)\\.(\\d+)\\.(\\w+)", "\\2", data.df$Class,
  perl = TRUE)
data.df$Infected  <- gsub("(\\w+)\\.(\\d+)\\.(\\w+)", "\\3", data.df$Class, 
  perl = TRUE)
data.df$Class  <- gsub("(\\w+)\\.(\\d+)\\.(\\w+)", "\\1", data.df$Class, 
  perl = TRUE)
})

user  system elapsed 
0.812   0.037   0.848

Question 3

Looks like you have a factor, so work on the levels and then map back. Use fixed=TRUE in strsplit, adjusting to split=".".

Classes <- do.call(rbind, strsplit(levels(data.df$Class), ".", fixed=TRUE))
colnames(Classes) <- c("Species", "SizeClass", "Infected")
df0 <- as.data.frame(Classes[data.df$Class,], row.names=NA)
cbind(data.df, df0)