Question

I have a time series in R. I want to construct a matrix, where each row is the current observation, and each column represent the future values of that series, starting from that point. eg:

x <- ts(1:25,start=2000, frequency=12)
maxHorizon <- 12

freq <- frequency(x)
st <- tsp(x)[1]-(1/freq)

actuals <- matrix(NA,length(x)-1,maxHorizon)
for(i in seq(1, (length(x)-1))) {
    xnext <- window(x, start=st+(i+1)/freq, end=st+(i+maxHorizon)/freq)
    actuals[i,1:length(xnext)] <- xnext
}
actuals

In this case, we've got a time series with 25 observations, so our final matrix has 24 rows. Starting with row 1, the next 12 ovbservations are 2-13. Row 2 is 3-13, etc. At the end of the matrix we fill it in with NA values.

> x
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2000   1   2   3   4   5   6   7   8   9  10  11  12
2001  13  14  15  16  17  18  19  20  21  22  23  24
2002  25

> actuals
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
 [1,]    2    3    4    5    6    7    8    9   10    11    12    13
 [2,]    3    4    5    6    7    8    9   10   11    12    13    14
 [3,]    4    5    6    7    8    9   10   11   12    13    14    15
 [4,]    5    6    7    8    9   10   11   12   13    14    15    16
 [5,]    6    7    8    9   10   11   12   13   14    15    16    17
 [6,]    7    8    9   10   11   12   13   14   15    16    17    18
 [7,]    8    9   10   11   12   13   14   15   16    17    18    19
 [8,]    9   10   11   12   13   14   15   16   17    18    19    20
 [9,]   10   11   12   13   14   15   16   17   18    19    20    21
[10,]   11   12   13   14   15   16   17   18   19    20    21    22
[11,]   12   13   14   15   16   17   18   19   20    21    22    23
[12,]   13   14   15   16   17   18   19   20   21    22    23    24
[13,]   14   15   16   17   18   19   20   21   22    23    24    25
[14,]   15   16   17   18   19   20   21   22   23    24    25    NA
[15,]   16   17   18   19   20   21   22   23   24    25    NA    NA
[16,]   17   18   19   20   21   22   23   24   25    NA    NA    NA
[17,]   18   19   20   21   22   23   24   25   NA    NA    NA    NA
[18,]   19   20   21   22   23   24   25   NA   NA    NA    NA    NA
[19,]   20   21   22   23   24   25   NA   NA   NA    NA    NA    NA
[20,]   21   22   23   24   25   NA   NA   NA   NA    NA    NA    NA
[21,]   22   23   24   25   NA   NA   NA   NA   NA    NA    NA    NA
[22,]   23   24   25   NA   NA   NA   NA   NA   NA    NA    NA    NA
[23,]   24   25   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA
[24,]   25   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA

How can I do this without using the ugly for loop?

edit: it would be ok if the data were returned in another format, such as a data.frame or even a list of rows.

edit: here's some code to compare the 3 functions we have so far:

rm(list = ls(all = TRUE))

zach1 <- function(x,maxHorizon) {
    freq <- frequency(x)
    st <- tsp(x)[1]-(1/freq)

    actuals <- matrix(NA,length(x)-1,maxHorizon)

    for(i in seq(1, (length(x)-1))) {
        xnext <- window(x, start=st+(i+1)/freq, end=st+(i+maxHorizon)/freq)
        actuals[i,1:length(xnext)] <- xnext
    }

    actuals
}

zach2 <- function(x,maxHorizon) {
    t(apply(embed(c(x,rep(NA,maxHorizon)),maxHorizon),1,rev))[2:length(x),]
}

josh1 <- function(x,maxHorizon) {
    actuals <- outer(seq_along(x), seq_len(maxHorizon), FUN="+")
    actuals[actuals > length(x)] <- NA
    actuals <- actuals[1:(length(x)-1),]
    actuals <- apply(actuals,2,function(a) x[a])
    actuals
}

x <- ts(rnorm(10000),start=2000, frequency=12)

> system.time(actuals1 <- zach1(x, 6))
   user  system elapsed 
  11.81    0.00   11.93 

> system.time(actuals2 <- zach2(x, 6))
   user  system elapsed 
   0.15    0.00    0.16 

> system.time(actuals3 <- josh1(x, 6))
   user  system elapsed 
      0       0       0 

> all.equal(actuals1,actuals2)
[1] TRUE
> all.equal(actuals1,actuals3)
[1] TRUE
Was it helpful?

Solution

EDIT: To fill the matrix with the elements of x (rather than their indices), you can pass outer() an 'anonymous function' of your own devising. This should do the trick:

# Trying it out
x <- ts(rnorm(25),start=2000, frequency=12)
maxHorizon <- 12

actuals <- outer(seq_along(x), seq_len(maxHorizon), 
                 FUN = function(X,Y) {x[X+Y]}
)

tail(actuals)
#             [,1]       [,2]       [,3]       [,4]      [,5] [,6] [,7] [,8] [,9]
# [20,] -1.2729640 -0.4983060  0.6199497 -2.0999648 0.1673402   NA   NA   NA   NA
# [21,] -0.4983060  0.6199497 -2.0999648  0.1673402        NA   NA   NA   NA   NA
# [22,]  0.6199497 -2.0999648  0.1673402         NA        NA   NA   NA   NA   NA
# [23,] -2.0999648  0.1673402         NA         NA        NA   NA   NA   NA   NA
# [24,]  0.1673402         NA         NA         NA        NA   NA   NA   NA   NA
# [25,]         NA         NA         NA         NA        NA   NA   NA   NA   NA
#       [,10] [,11] [,12]
# [20,]    NA    NA    NA
# [21,]    NA    NA    NA
# [22,]    NA    NA    NA
# [23,]    NA    NA    NA
# [24,]    NA    NA    NA
# [25,]    NA    NA    NA

OTHER TIPS

This gets rid of the for loop, but I'm not sure if it's any more elegant: t(apply(embed(c(x,rep(NA,maxHorizon)),maxHorizon),1,rev))[2:length(x),]

edit: it's a lot faster though.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top