My simplistic approach
# Millisecond precision for deltas
options(digits.secs=3)
# Load file
log.file <- read.csv("data/raw.csv", header=T)
log.file$StartTime <- as.POSIXct(log.file$StartTime ,
format="%Y-%m-%d %H:%M:%OS")
log.file$StopTime <- as.POSIXct(log.file$StopTime ,
format="%Y-%m-%d %H:%M:%OS")
range <- seq(min(log.file$StartTime), max(log.file$StopTime), by=1)
getsum <- function(tstamp) {
ret <- nrow(subset(log.file, StartTime<=tstamp & StopTime>=tstamp))
return(ret)
}
dset <- frame()
dset$TIME <- range
dset$COUNT <- lapply(range, getsum)
plot(dset$TIME, dset$COUNT)
A better solution
While this doesn't exactly meet the criteria I laid out earlier (i.e. doesn't fill in time gaps), it does give me a graph close to what I'm looking for. However, if there is a solution that hits the two points I asked for earlier, I'm still interested.
# Millisecond precision for deltas
options(digits.secs=3)
# Load file
log.file <- read.csv("data/raw.csv", header=T)
log.file$StartTime <- as.POSIXct(log.file$StartTime ,
format="%Y-%m-%d %H:%M:%OS")
log.file$StopTime <- as.POSIXct(log.file$StopTime ,
format="%Y-%m-%d %H:%M:%OS")
# Create data frames for query start times and query stop times
# and give them +1 and -1 values
queries.start <- data.frame(Time=log.file$StartTime, Value=1)
queries.stop <- data.frame(Time=log.file$StopTime, Value=-1)
# Merge the two together and order by time
queries.both <- rbind(queries.start, queries.stop)
queries.both <- queries.both[with(queries.both, order(Time)), ]
# Create a cumulative sum data frame, and plot the results
queries.sum <- data.frame(Time=queries.both$Time, Queries=cumsum(queries.both$Value))
plot(queries.sum, type="l")