Question

I'm trying to create a 30 day forecast using auto.arima from the forecast package. I want to capture the long term trend, so I inserted it into the xreg argument.

The data:

dput(data)
structure(list(TKDate = structure(c(15706, 15707, 15708, 15709, 
15710, 15711, 15712, 15713, 15714, 15715, 15716, 15717, 15718, 
15719, 15720, 15721, 15722, 15723, 15724, 15725, 15726, 15727, 
15728, 15729, 15730, 15731, 15732, 15733, 15734, 15735, 15736, 
15737, 15738, 15739, 15740, 15741, 15742, 15743, 15744, 15745, 
15746, 15747, 15748, 15749, 15750, 15751, 15752, 15753, 15754, 
15755, 15756, 15757, 15758, 15759, 15760, 15761, 15762, 15763, 
15764, 15765, 15766, 15767, 15768, 15769, 15770, 15771, 15772, 
15773, 15774, 15775, 15776, 15777, 15778, 15779, 15780, 15781, 
15782, 15783, 15784, 15785, 15786, 15787, 15788, 15789, 15790, 
15791, 15792, 15793, 15794, 15795, 15796, 15797, 15798, 15799, 
15800, 15801, 15802, 15803, 15804, 15805, 15806, 15807, 15808, 
15809, 15810, 15811, 15812, 15813, 15814, 15815, 15816, 15817, 
15818, 15819, 15820, 15821, 15822, 15823, 15824, 15825, 15826, 
15827, 15828, 15829, 15830, 15831, 15832, 15833, 15834, 15835, 
15836, 15837, 15838, 15839, 15840, 15841, 15842, 15843, 15844, 
15845, 15846, 15847, 15848, 15849, 15850, 15851, 15852, 15853, 
15854, 15855, 15856, 15857, 15858, 15859, 15860, 15861, 15862, 
15863, 15864, 15865, 15866, 15867, 15868, 15869, 15870, 15871, 
15872, 15873, 15874, 15875, 15876, 15877, 15878, 15879, 15880, 
15881, 15882, 15883, 15884, 15885, 15886, 15887, 15888, 15889, 
15890, 15891, 15892, 15893, 15894, 15895, 15896, 15897, 15898, 
15899, 15900, 15901, 15902, 15903, 15904, 15905, 15906, 15907, 
15908, 15909, 15910, 15911, 15912, 15913, 15914, 15915, 15916, 
15917, 15918, 15919, 15920, 15921, 15922, 15923, 15924, 15925, 
15926, 15927, 15928, 15929, 15930, 15931, 15932, 15933, 15934, 
15935, 15936, 15937, 15938, 15939, 15940, 15941, 15942, 15943, 
15944, 15945, 15946, 15947, 15948, 15949, 15950, 15951, 15952, 
15953, 15954, 15955, 15956, 15957, 15958, 15959, 15960, 15961, 
15962, 15963, 15964, 15965, 15966, 15967, 15968, 15969, 15970, 
15971, 15972, 15973, 15974, 15975, 15976, 15977, 15978, 15979, 
15980, 15981, 15982, 15983, 15984, 15985, 15986, 15987, 15988, 
15989, 15990, 15991, 15992, 15993, 15994, 15995, 15996, 15997, 
15998, 15999, 16000, 16001, 16002, 16003, 16004, 16005, 16006, 
16007, 16008, 16009, 16010, 16011, 16012, 16013, 16014, 16015, 
16016, 16017, 16018, 16019, 16020, 16021, 16022, 16023, 16024, 
16025, 16026, 16027, 16028, 16029, 16030, 16031, 16032, 16033, 
16034, 16035, 16036, 16037, 16038, 16039, 16040, 16041, 16042, 
16043, 16044, 16045, 16046, 16047, 16048, 16049, 16050, 16051, 
16052, 16053, 16054, 16055, 16056, 16057, 16058, 16059, 16060, 
16061, 16062, 16063, 16064, 16065, 16066, 16067, 16068, 16069, 
16070, 16071, 16072, 16073, 16074, 16075, 16076, 16077, 16078, 
16079, 16080, 16081, 16082, 16083, 16084, 16085, 16086, 16087, 
16088, 16089, 16090, 16091, 16092, 16093, 16094, 16095, 16096, 
16097, 16098, 16099, 16100, 16101, 16102, 16103, 16104, 16105, 
16106, 16107, 16108, 16109, 16110, 16111, 16112, 16113, 16114, 
16115, 16116, 16117, 16118), class = "Date"), spend = c(7984.39, 
11476.06, 6555.57, 3981.45, 3963.83, 4827.72, 6309.32, 13503.36, 
17075.89, 33353.71, 29324.34, 7968.68, 5540.63, 12113.45, 15596.38, 
19328.67, 20224.68, 18977.55, 16128.27, 10633.56, 11887.79, 17881.11, 
12613.46, 11607.55, 38232.11, 7861.25, 9397.88, 12056.02, 15115.87, 
12275.93, 14537.35, 9594.26, 8215.83, 9632.52, 9993.15, 13478.37, 
28509.38, 12016.33, 8907.76, 8757.43, 9513.09, 10299.5, 10385.03, 
12515.62, 9008.95, 17825.68, 9320.47, 11189.58, 12902.31, 13341.35, 
18675.32, 16989.53, 10114.53, 9876.65, 11203.39, 11718.73, 26264.95, 
12414.19, 12275.16, 9242.85, 8883.97, 10095.72, 11581.55, 14815.78, 
25064.12, 9297.07, 8047.91, 6876.37, 8881.63, 10982.85, 9975.33, 
24124.62, 8514.66, 15719.84, 5807.39, 8422.38, 15184.95, 14757.58, 
11087.61, 11070.78, 10425.67, 15517.8, 11257.69, 11915.47, 11720.37, 
34064.62, 6493.41, 5757.4, 4387.54, 6520.58, 7806.81, 6356.63, 
10916.36, 9013.43, 9722.41, 6044.25, 7971.7, 23933.54, 8627.85, 
9722.77, 18660.13, 13011.36, 11445.11, 14219.2, 17138.92, 16016.68, 
11434, 31379.03, 8494.25, 12493.85, 7708.1, 21583.05, 9026.17, 
9379.35, 8287.13, 7298.16, 6097.03, 8076.57, 12871.87, 11346.89, 
9115.82, 7737.98, 15065.38, 5262.73, 6522.58, 12743.94, 23945.16, 
16109.26, 6985.89, 6345.08, 6246.93, 6824.66, 8491.42, 9654.99, 
18976.58, 19565.68, 8075.47, 7219.79, 8629.04, 12491.64, 11915.89, 
27533.16, 13554.35, 10102.21, 20029.15, 11641.82, 15855.19, 14139.17, 
15376.63, 14625.99, 9098.87, 9396.64, 12015.84, 17532.75, 15131.65, 
15815.5, 16048.65, 9769.63, 9582.12, 11201.8, 12810, 18857.38, 
11822.71, 19289.08, 8911.29, 9437.55, 10987.14, 12995.65, 16675.26, 
9741.82, 9723.57, 10328.24, 7738.04, 8432.16, 23021.73, 10367.28, 
8210.53, 10468.4, 8024.25, 7296.25, 7445.34, 8539.59, 12386.23, 
15335.72, 9013.49, 7994.95, 7759.46, 8789.38, 11242.38, 28653.23, 
9750.96, 14398.62, 9248.74, 6766.08, 8159.14, 9899.38, 9453.35, 
17588.96, 8958.16, 8256.61, 6240.4, 7235.24, 23841.62, 9002.73, 
11839.47, 8693.31, 7161.37, 7046.39, 9221.53, 10004.93, 8698.76, 
7948.68, 9013.27, 18536.68, 7980.38, 8968.95, 23594.14, 17744.66, 
12615.73, 13646.05, 10512.58, 9066.02, 9665.15, 13183.2, 23864.45, 
12017.52, 10831.07, 8954.76, 7276.41, 7882.9, 16616.41, 15384.68, 
11046.53, 10621.01, 8094.74, 5451.26, 6237.79, 10717.69, 7076.38, 
7044.62, 7047.45, 7774.77, 6496.21, 6340.9, 7110.53, 7691.28, 
17482.02, 5576.19, 3763.79, 11477.68, 5710.5, 6519.51, 20022.61, 
13153.68, 6526.28, 5885.28, 5656.17, 6270.04, 9795.38, 6320.95, 
5741.98, 10808.72, 5150.87, 5416.52, 6305.05, 20953.12, 6569.02, 
6360.21, 9376.68, 4973.93, 5034.48, 6380.45, 15307.28, 14386.65, 
17705.88, 4779.52, 4784.79, 4737.05, 5350.28, 12112.11, 13153.72, 
6049.69, 5430.46, 4627.59, 3637.2, 5482.43, 16705.15, 12221.16, 
13198.88, 6484.54, 5590.86, 4979.09, 5771.75, 7311.92, 16111.86, 
8047.77, 11706.91, 6042.14, 5670.74, 6905.07, 11261.89, 9700.4, 
6643.03, 5693.85, 14778.67, 9128.14, 3682.01, 7911.5, 17742.85, 
5093.31, 7867.97, 3202.78, 2843.35, 2598.77, 10930.81, 11204.67, 
7289.62, 4000.17, 4178.89, 4507.33, 6671.48, 10317.48, 9368.98, 
6156.41, 8375.24, 2762.76, 2457.59, 4707.51, 4584.52, 3749.82, 
11667.82, 4271.67, 3614.3, 3715.83, 4510.57, 4872.36, 21805.71, 
4757.04, 6515.92, 2834.25, 2685.19, 3509.28, 4479.35, 17817.99, 
10357.67, 3412.15, 3044.95, 2840.24, 3348.91, 13671.68, 2027.42, 
1616.25, 1177.73, 995.25, 1062.25, 1578.07, 1649.8, 1410.06, 
1592.03, 3995.24, 6489.87, 6895.21, 8298.58, 7698.68, 5782.07, 
7671.08, 19539.4, 7023.84, 6509.9, 6643.28, 19850.3, 6856.67, 
13142.15, 5524.75, 5063.2, 4916.81, 6117.54, 6717.86, 9393.95, 
10462.44, 10511.15, 4497.94, 4038.31, 5503.91, 5554.82, 5801.11, 
12992.82, 4778.61, 4067.41, 4359.53, 6148.1, 9236.51, 5773.16, 
11313.13, 4702.37, 4167.3, 4067.75, 4469.11, 9278.41, 9911.18, 
5161.13, 4477.78, 4459.53, 4080.14, 5084.67, 7735.34, 10676.6, 
5507.86, 8286.12, 4332.23, 4737.52, 5952.09, 7134.44)), .Names = c("TKDate", 
"spend"), row.names = c(NA, 413L), class = "data.frame")

The code:

library(forecast)
explaining<-rep(1:length(data$TKDate))
predic<-rep((length(data$TKDate)+1):(length(data$TKDate)+31))
modArima <- auto.arima(data[,2],xreg=explaining)
fit<-forecast(modArima,h=30,xreg=explaining,newdata=predic)
plot(fit)

I get this weird jump: enter image description here

Can anyone explain to me this weird jump? Why doesn't the forecast continue from the last observed data point (or at least close to it)?

Was it helpful?

Solution

This is a hard to find error, I'll admit.

forecast.Arima() takes the new values of the external regressors not in a newdata parameter (as does predict.lm()), but in the xreg parameter. So instead of

fit <- forecast(modArima,h=30,xreg=explaining,newdata=predic)

where you forecasted using the values of explaining, not those of predic (unfortunately, forecast.Arima() does not throw a warning if you feed data to the nonexistent newdata parameter), do this:

fit <- forecast(modArima,h=30,xreg=predic)

and plot (with in-sample fits thrown in for good measure - EDIT: somewhat confusingly, the in-sample fits are not returned by auto.arima() or arima() as they are by lm(), but by forecast.Arima()):

plot(fit)
lines(fit$fitted,col="red")

enter image description here

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top