Question

I have the following data frame called workfile:

   head(workfile)  
                 times        users     signal log  
    14 2014-01-13 00:00:16 00250902DC7D   true  ON  
    28 2014-01-13 00:00:47 00250902DC7D   true  ON  
    42 2014-01-13 00:01:18 00250902DC7D   true  ON  
    56 2014-01-13 00:01:48 00250902DC7D   true  ON  
    70 2014-01-13 00:02:19 00250902DC7D   true  ON  
    84 2014-01-13 00:02:50 00250902DC7D   true  ON 

I have 14 different users, essentially I would like to compute the time differences for each user. I am using ddply but it doesn't work and I don't understand the reason

hope<-ddply(workfile,.
.(users),transform,diff=c(difftime(tail(workfile$times,-1),head(workfile$times,-1)), NA)) 

here is the error message:
*Error in attributes(out) <- attributes(col) : 'names' attribute [9] must be the same length as the vector [8]*'

In addition if I attach my dataframe and I launch the same command, it occurs that the error message changes:
attach(workfile)

hope<-ddply(workfile, .(users), transform, diff = c(difftime(tail(times, -1),head(times,-1)), NA))

*Error in as.POSIXct.POSIXlt(time1) : invalid 'x' argument*
and it's quite strange that too...

I have noticed that, as far as the user (in the 3rd column) is the same, there is not any error. It occurs if I try with a dataframe with different users...Evidently my code is not correct in the part of tail(...),head(...) My goal is to compute the time differences row by and distinctly for all the users...

I have useddput with a sample of my dataframe:

head(pino)  
> pino
                     times        users signal log  
319001 2014-01-16 21:57:46 00250902FA92   true  ON  
319006 2014-01-16 21:57:46 002509030E53   true  ON  
319007 2014-01-16 21:57:46 002509030C41   true  ON  
319011 2014-01-16 21:57:46 00250902DC7D   true  ON  
319014 2014-01-16 21:57:49 00250902FB05   true  ON  
319015 2014-01-16 21:57:49 00250902FA92   true  ON  
319020 2014-01-16 21:57:49 002509030E53   true  ON  
319021 2014-01-16 21:57:49 002509030C41   true  ON  
319025 2014-01-16 21:57:49 00250902DC7D   true  ON  
319028 2014-01-16 21:57:58 00250902FB05   true  ON  
319029 2014-01-16 21:57:58 00250902FA92   true  ON  
319034 2014-01-16 21:57:58 002509030E53   true  ON  
319035 2014-01-16 21:57:58 002509030C41   true  ON  
319039 2014-01-16 21:57:58 00250902DC7D   true  ON  
319042 2014-01-16 21:58:04 00250902FB05   true  ON  
319043 2014-01-16 21:58:04 00250902FA92   true  ON

> dput(pino)
 structure(list(times = structure(list(sec = c(46, 46, 46, 46, 
 49, 49, 49, 49, 49, 58, 58, 58, 58, 58, 4, 4), min = c(57L, 57L, 
 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 57L, 58L, 
 58L), hour = c(21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
 21L, 21L, 21L, 21L, 21L, 21L), mday = c(16L, 16L, 16L, 16L, 16L, 
 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), mon = c(0L, 
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
 year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 
 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L), wday = c(4L, 
 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L
 ), yday = c(15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
 15L, 15L, 15L, 15L, 15L, 15L, 15L), isdst = c(0L, 0L, 0L, 
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("sec", 
 "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
 ), class = c("POSIXlt", "POSIXt")), users = structure(c(3L, 13L, 
 10L, 1L, 4L, 3L, 13L, 10L, 1L, 4L, 3L, 13L, 10L, 1L, 4L, 3L), .Label = c("00250902DC7D",
 "00250902FA91", "00250902FA92", "00250902FB05", "00250902FB2E", 
 "00250902FE0A", "00250902FE63", "002509030AD2", "002509030B9D", 
 "002509030C41", "002509030C8D", "002509030CE4", "002509030E53", 
 "002509030E63"), class = "factor"), signal = structure(c(2L, 
 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("false", 
 "true"), class = "factor"), log = structure(c(2L, 2L, 2L, 2L, 
 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("OFF", 
 "ON"), class = "factor")), .Names = c("times", "users", "signal", 
 "log"), row.names = c(319001L, 319006L, 319007L, 319011L, 319014L, 
 319015L, 319020L, 319021L, 319025L, 319028L, 319029L, 319034L, 
 319035L, 319039L, 319042L, 319043L), class = "data.frame")

Now I have the following error message:

Error in attributes(out) <- attributes(col) : 
  'names' attribute [9] must be the same length as the vector [3]

What do you think about it?

Was it helpful?

Solution

In short, if you convert your time representation, your function will work:

# Fails
ddply(pino, .(users), transform, diff = c(difftime(tail(times, -1),head(times,-1)), NA))
# Convert time format
pino$times<-as.POSIXct(pino$times)  
# Works as expected
ddply(pino, .(users), transform, diff = c(difftime(tail(times, -1),head(times,-1)), NA))

#                  times        users signal log diff
# 1  2014-01-16 21:57:46 00250902DC7D   true  ON    3
# 2  2014-01-16 21:57:49 00250902DC7D   true  ON    9
# 3  2014-01-16 21:57:58 00250902DC7D   true  ON   NA
# 4  2014-01-16 21:57:46 00250902FA92   true  ON    3
# 5  2014-01-16 21:57:49 00250902FA92   true  ON    9
# 6  2014-01-16 21:57:58 00250902FA92   true  ON    6
# 7  2014-01-16 21:58:04 00250902FA92   true  ON   NA
# 8  2014-01-16 21:57:49 00250902FB05   true  ON    9
# 9  2014-01-16 21:57:58 00250902FB05   true  ON    6
# 10 2014-01-16 21:58:04 00250902FB05   true  ON   NA
# 11 2014-01-16 21:57:46 002509030C41   true  ON    3
# 12 2014-01-16 21:57:49 002509030C41   true  ON    9
# 13 2014-01-16 21:57:58 002509030C41   true  ON   NA
# 14 2014-01-16 21:57:46 002509030E53   true  ON    3
# 15 2014-01-16 21:57:49 002509030E53   true  ON    9
# 16 2014-01-16 21:57:58 002509030E53   true  ON   NA

I am not quite clear on why this works. This has been mentioned before in another question, and @hadley just stated that you should use POSIXct and not POSIXlt in data.frame. I'm not clear as to why.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top