How to condensed a data frame based on nearest matching times in R

Question 1

Here are a few solutions.

1) ave This uses chron times as well as subset and ave from the base of R:

library(chron)

delta <- as.vector(times(df$time1) - times(df$time2))
df2 <- subset(df, delta > 0)
df2[ave(delta, df2$id, FUN = function(delta) delta == min(delta)) == 1, ]

2) dplyr This uses chron times and the dplyr package:

library(chron)
library(dplyr) 

df %.% 
   mutate(delta = as.vector(times(time1) - times(time2))) %.% 
   filter(delta > 0) %.% 
   group_by(id) %.% 
   filter(delta == min(delta)) %.% 
   select(- delta)

3) sqldf

library(sqldf)

sqldf("select *, min(strftime('%s', time1) - strftime('%s', time2)) delta
  from (select * from df where strftime('%s', time1) > strftime('%s', time2))
  group by id")[seq_along(df)]

or perhaps this variation where we calculate delta in R and then use sqldf:

library(sqldf)
library(chron)

df2 = transform(df, delta = as.vector(times(time1) - times(time2)))

sqldf("select *, min(delta) delta
  from (select * from df2 where delta > 0)
  group by id")[-ncol(df2)]

4) data.table

library(data.table)
library(chron)

DT <- data.table(df)
DT[, delta := times(time1) - times(time2)
 ][delta > 0
 ][, .SD[delta == min(delta)], by = id
 ][, seq_along(df), with = FALSE]

ADDED additional solutions. Corrected library and subset statements. Minor improvements.

Question 2

Here's an approach with the powerful dplyr package:

library(dplyr)

(df %.%
   mutate(timeDiff = as.integer(strptime(time1, "%X") - strptime(time2, "%X")),
          posDiff = timeDiff >= 0) %.%
   filter(posDiff) %.%
   group_by(id) %.%
   filter(min(timeDiff) == timeDiff))[names(df)]

#   id count    time1    time2  afn    dfn
# 1  1    23 00:13:00 00:00:00 0.63 157.27
# 2  2    45 01:13:00 00:00:00 0.63 157.27
# 3  3    67 18:14:00 18:00:00 3.36 201.67
# 4  4    88 07:18:00 06:00:00 1.77 103.55

Question 3

An approach using ddply and merge. (Assuming that the "nearest match times" are the minimum absolute values of the difftimes)

t1 <- strptime(df$time1, "%H:%M:%S")
t2 <- strptime(df$time2, "%H:%M:%S")
df$min.diff <- abs(as.numeric(difftime(t1, t2, units='mins')))

d1 <- ddply(df, .(id), summarize, min.diff = min(min.diff))

> merge(df, d1, by = c("id", "min.diff"))
  id min.diff count    time1    time2  afn    dfn
1  1       13    23 00:13:00 00:00:00 0.63 157.27
2  2       73    45 01:13:00 00:00:00 0.63 157.27
3  3       14    67 18:14:00 18:00:00 3.36 201.67
4  4       78    88 07:18:00 06:00:00 1.77 103.55