6d: sARIMA Fitting

1. Fit CO2 with Linear Trend + Seasonality

Monthly CO2 levels at NW territories CANADA from Jan 1959 through Dec 1997.

We are using co2 data in TSA package (there’s another one in R)

  # load TSA package while protecting the original acf()
  acf1 = acf
  library(TSA)

## 
## Attaching package: 'TSA'

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

  acf = acf1

  data(co2)

  library(forecast)

## Warning: package 'forecast' was built under R version 4.1.2

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Registered S3 methods overwritten by 'forecast':
##   method       from
##   fitted.Arima TSA 
##   plot.Arima   TSA

  source('https://nmimoto.github.io/R/TS-00.txt')

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

co2

##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct    Nov
## 1994 363.05 364.18 364.87 364.47 364.32 362.13 356.72 350.88 350.69 356.06 360.09
## 1995 363.49 364.94 366.72 366.33 365.75 364.32 358.59 352.06 353.45 357.27 362.34
## 1996 366.93 366.71 367.63 368.15 369.14 367.33 361.53 356.11 354.51 360.12 363.85
## 1997 367.72 369.08 368.17 368.83 369.49 367.57 360.79 355.16 356.01 360.71 364.77
## 1998 369.40 370.12 370.88 370.53 371.56 369.28 364.50 357.46 360.54 364.04 368.74
## 1999 372.60 373.85 373.75 374.10 374.50 372.04 364.81 359.11 359.65 364.94 369.82
## 2000 373.23 375.13 374.83 375.42 376.18 374.01 366.54 360.78 361.77 367.51 370.58
## 2001 375.49 375.94 376.42 377.48 377.67 374.78 367.38 361.67 363.39 367.74 373.18
## 2002 376.68 377.42 378.27 378.73 379.01 375.95 370.78 364.07 365.36 370.25 374.04
## 2003 379.03 379.36 380.90 381.39 382.38 381.02 373.78 367.97 368.55 372.28 377.75
## 2004 382.44 382.36 381.58 383.21 383.58 382.59 374.58 368.69 368.55 373.39 378.49
##         Dec
## 1994 363.27
## 1995 365.65
## 1996 365.52
## 1997 367.81
## 1998 371.58
## 1999 372.62
## 2000 373.37
## 2001 374.41
## 2002 377.99
## 2003 379.99
## 2004 381.62

  is.ts(co2)        # is it Time Series object?

## [1] TRUE

  frequency(co2)    # is frequency set?

## [1] 12

  plot(co2, type="o", main='CO2 Data')

  layout(matrix(1:2, 1,2))
  acf(co2,  lag.max=36)
  pacf(co2, lag.max=36)

  layout(1)

a. De-seasonalize with Monthly Averages

  #--- Take Monthly Averages
  Y = co2
  Mav1  = aggregate(c(Y), list(month=cycle(Y)), mean)$x          #- 1yr long Mtly Av
  Y.MtlyAv = ts(Mav1[cycle(Y)], start=start(Y), freq=frequency(Y))  #- Mtly Av
  Y.DS     = Y-Y.MtlyAv            # Deseasonalized Y

  plot(Y, type="o")               # original
  lines(Y.MtlyAv, col="blue")     # seasonal averages

  co2.DS = Y.DS                  # Deseasonalized
  plot(co2.DS, type="o", main="De-seasonalized CO2")

b. Fit Line and Check Residuals for Stationarity

  #--- Fit line
  Reg1 = lm(co2.DS ~ time(co2.DS))
  summary(Reg1)

## 
## Call:
## lm(formula = co2.DS ~ time(co2.DS))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.22331 -0.69056 -0.01021  0.64085  2.36388 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.633e+03  5.112e+01  -71.07   <2e-16 ***
## time(co2.DS)  1.817e+00  2.557e-02   71.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9327 on 130 degrees of freedom
## Multiple R-squared:  0.9749, Adjusted R-squared:  0.9747 
## F-statistic:  5052 on 1 and 130 DF,  p-value: < 2.2e-16

  plot(Reg1$resid, type="o")    # de-seasonalized CO2
  abline(Reg1, col="red")       # plot regression line

  Stationarity.tests(Reg1$resid)

## 
##     'tseries' version: 0.10-49
## 
##     'tseries' is a package for time series analysis and computational
##     finance.
## 
##     See 'library(help="tseries")' for details.

## Warning in pp.test(A): p-value smaller than printed p-value

## Warning in kpss.test(A): p-value greater than printed p-value

##        KPSS  ADF   PP
## p-val:  0.1 0.02 0.01

  layout(matrix(1:2, 1,2))
  acf(Reg1$resid)
  pacf(Reg1$resid)

  layout(1)

c. Fit line + Arma

  Fit11 = auto.arima(co2.DS, d=0, D=0, xreg=time(co2.DS),
                               stepwise=FALSE, approximation=FALSE)
  Fit11

## Series: co2.DS 
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1     ar2    sar1    sar2   intercept    xreg
##       0.3855  0.2622  0.2462  0.2003  -3624.1376  1.8126
## s.e.  0.0852  0.0891  0.0901  0.0927    150.3886  0.0752
## 
## sigma^2 = 0.5676:  log likelihood = -148.18
## AIC=310.36   AICc=311.26   BIC=330.54

  Randomness.tests(Fit11$resid)

##   B-L test H0: the series is uncorrelated
##   M-L test H0: the square of the series is uncorrelated
##   J-B test H0: the series came from Normal distribution
##   SD         : Standard Deviation of the series

##       BL15  BL20  BL25  ML15 ML20    JB    SD
## [1,] 0.757 0.423 0.485 0.649 0.62 0.905 0.739

d. Linear Trend Model for CO2 data

With observation \(Y_t\), \[ Y_t \hspace{3mm} = \hspace{3mm} L_t + S_t + X_t \]

We first removed \(S_t\) by taking monthly average.

The linear trend is \[ L_t = a + bt \]

Then \(X_t\) is modeld by ARMA(2,0) \(\times\) (2,0) \[ (1-\phi_1 B-\phi_2 B^2)(1-\Phi_1 B^{12} - \Phi_2 B^{24}) X_t = \epsilon_t \] where \(\epsilon_t \sim WN(0,\sigma^2).\)

  Fit11

## Series: co2.DS 
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1     ar2    sar1    sar2   intercept    xreg
##       0.3855  0.2622  0.2462  0.2003  -3624.1376  1.8126
## s.e.  0.0852  0.0891  0.0901  0.0927    150.3886  0.0752
## 
## sigma^2 = 0.5676:  log likelihood = -148.18
## AIC=310.36   AICc=311.26   BIC=330.54

e. 12-step Forecasting

  #- 12-step prediction (without seasonality)
  h = 12
  Y = co2.DS
  Fit00 = Fit11

    Y.pred = forecast(Fit00, h, xreg=last(time(Y))+(1:h)/frequency(Y))
    Y.pred

##          Point Forecast     Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2005       10.56596  9.600437 11.53149 9.089320 12.04260
## Feb 2005       10.40272  9.367926 11.43751 8.820142 11.98529
## Mar 2005       10.44763  9.339424 11.55583 8.752776 12.14248
## Apr 2005       10.87979  9.743621 12.01595 9.142171 12.61740
## May 2005       11.07817  9.924432 12.23190 9.313683 12.84265
## Jun 2005       11.58095 10.418388 12.74351 9.802965 13.35893
## Jul 2005       11.13942  9.971883 12.30696 9.353824 12.92502
## Aug 2005       11.30046 10.130245 12.47067 9.510772 13.09014
## Sep 2005       11.12820  9.956513 12.29988 9.336261 12.92013
## Oct 2005       11.05218  9.879690 12.22466 9.259014 12.84534
## Nov 2005       11.48858 10.315654 12.66150 9.694746 13.28241
## Dec 2005       11.56966 10.396491 12.74282 9.775455 13.36386

  plot(Y.pred)

f. Add back the seasonal average

  #--- 12-step prediction w Sesonal average ---
  Y   = co2      # original TS
  Mav = Mav1     # 1yr worth of seasonal average (non ts obj)
  Y.h = Y.pred   # forecat() object

    Yhat     = ts(Y.h$mean     + Mav1, start=last(time(Y))+(1)/frequency(Y), freq=frequency(Y))
    Yhat.CIu = ts(Y.h$lower[,2]+ Mav1, start=last(time(Y))+(1)/frequency(Y), freq=frequency(Y))
    Yhat.CIl = ts(Y.h$upper[,2]+ Mav1, start=last(time(Y))+(1)/frequency(Y), freq=frequency(Y))

    #- Plot the forecast with original data
    ts.plot(Y,Yhat)
    lines( Yhat,     type="l", col="blue", lwd=2)
    lines( Yhat.CIu, type="l", col="gray30", lty=1)
    lines( Yhat.CIl, type="l", col="gray30", lty=1)

g. Rolling 1-step Forecasting

  #--- This line will load it to R
  source('https://nmimoto.github.io/R/TS-00.txt')

  Pred = Rolling1step.forecast(co2.DS,
                               window.size=100,
                               Arima.order=c(2,0,0),
                               include.mean=TRUE,      # need this for intercept
                               include.drift = FALSE,   #
                               xreg = TRUE,             # NULL=no xreg.  TRUE=Linear Trend is present
                               seasonal = c(2, 0, 0))

## 
##   Total length 132 , window size 100 .
##   Last 32 obs retrospectively forecasted with Rolling 1-step
##       prediction using same order and fized window size.
## 
##   Average Prediction Error:  0.2389
##   root Mean Squared Error of Prediction:   0.8759

  # Pred


  Fit11

## Series: co2.DS 
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1     ar2    sar1    sar2   intercept    xreg
##       0.3855  0.2622  0.2462  0.2003  -3624.1376  1.8126
## s.e.  0.0852  0.0891  0.0901  0.0927    150.3886  0.0752
## 
## sigma^2 = 0.5676:  log likelihood = -148.18
## AIC=310.36   AICc=311.26   BIC=330.54

  sqrt(Fit11$sigma2)

## [1] 0.7534028

2. Fit CO2 with SARIMA

We fit same CO2 data was fit with SARIMA

  plot(co2, type="o", main='CO2 Data')

  # If you take a difference (Del)
  plot(diff(co2), type="o")

  # If you take a seasonal difference (Del_12)
  plot(diff(co2, 12), type="o")

  Stationarity.tests( diff(co2,12) )

## Warning in pp.test(A): p-value smaller than printed p-value

## Warning in kpss.test(A): p-value greater than printed p-value

##        KPSS   ADF   PP
## p-val:  0.1 0.157 0.01

  # If you take (Del)(Del_12)
  plot(diff(diff(co2, 12)), type="o")

  Stationarity.tests( diff(diff(co2,12)) )

## Warning in adf.test(A): p-value smaller than printed p-value

## Warning in pp.test(A): p-value smaller than printed p-value

## Warning in kpss.test(A): p-value greater than printed p-value

##        KPSS  ADF   PP
## p-val:  0.1 0.01 0.01

Taking \(\bigtriangledown_{12}\), eliminates seasonality, but stationarity is questionable.

\(\bigtriangledown \bigtriangledown_{12}\) eliminates seasonlity and produce statonary series.

a. With (d=0, D=1)

  plot(diff(co2, 12), type="o")

  layout(matrix(1:2, 1, 2))
  acf(diff(co2, 12)  , lag.max=36)
  pacf(diff(co2, 12) , lag.max=36)

  layout(1)

  Fit21 = auto.arima(co2, d=0, D=1, stepwise=FALSE, approximation=FALSE)
  Fit21

## Series: co2 
## ARIMA(1,0,1)(0,1,1)[12] with drift 
## 
## Coefficients:
##          ar1      ma1     sma1   drift
##       0.8349  -0.4630  -0.8487  0.1520
## s.e.  0.0819   0.1246   0.1274  0.0052
## 
## sigma^2 = 0.5288:  log likelihood = -136.09
## AIC=282.18   AICc=282.7   BIC=296.11

  Randomness.tests(Fit21$residuals)

##   B-L test H0: the series is uncorrelated
##   M-L test H0: the square of the series is uncorrelated
##   J-B test H0: the series came from Normal distribution
##   SD         : Standard Deviation of the series

##       BL15  BL20  BL25  ML15  ML20    JB    SD
## [1,] 0.554 0.575 0.432 0.679 0.617 0.319 0.684

  # Same as above
  Fit21b = Arima(co2, order=c(1,0,1), seasonal=c(0,1,1), include.drift=TRUE )
  Fit21b

## Series: co2 
## ARIMA(1,0,1)(0,1,1)[12] with drift 
## 
## Coefficients:
##          ar1      ma1     sma1   drift
##       0.8349  -0.4630  -0.8487  0.1520
## s.e.  0.0819   0.1246   0.1274  0.0052
## 
## sigma^2 = 0.5288:  log likelihood = -136.09
## AIC=282.18   AICc=282.7   BIC=296.11

drift estimate is is 0.1520.

12(0.1520) = 1.824, very close to the slope estimate in Linear Trend model. These two models are consistent.

b. Comfirming the Linear Trend Model

In the Lienar Trend model, after the regression, model was ARMA(2,0) \(\times\) (2,0): \[ (1-\phi_1 B-\phi_2 B^2)(1-\Phi_1 B^{12} - \Phi_2 B^{24}) X_t = \epsilon_t. \]

If this model is true, taking \(\bigtriangledown_{12}\) creates SMA(1) with \(\Theta_1=1\): \[ \epsilon_t - e_{t-12} \]

  Arima(co2, order=c(0,0,0),  seasonal=c(0,1,1), include.drift=TRUE)  # Now sMA(1) is close to 1

## Series: co2 
## ARIMA(0,0,0)(0,1,1)[12] with drift 
## 
## Coefficients:
##          sma1   drift
##       -0.9998  0.1527
## s.e.   0.1653  0.0018
## 
## sigma^2 = 0.6634:  log likelihood = -157.82
## AIC=321.64   AICc=321.85   BIC=330

Drift has to be included, since it is significant.

This comfirms the right hand side of Lienar Trend model.

c. Check for Random Trend

If co2 has random walk trend instead of linear trend, Fitting MA(11) after (d=0, D=1) should result in all \(\theta_i\) close to 1.

  Arima(co2, order=c(0,0,11), seasonal=c(0,1,0), include.drift=TRUE)

## Series: co2 
## ARIMA(0,0,11)(0,1,0)[12] with drift 
## 
## Coefficients:
##          ma1     ma2     ma3     ma4     ma5     ma6     ma7     ma8     ma9    ma10
##       0.3972  0.6207  0.2726  0.7195  0.4868  0.7640  0.4740  0.8061  0.1842  0.5929
## s.e.  0.1611  0.1247  0.1690  0.0886  0.1660  0.1241  0.1696  0.1164  0.1883  0.1446
##         ma11   drift
##       0.3300  0.1430
## s.e.  0.2036  0.0371
## 
## sigma^2 = 0.6598:  log likelihood = -145.02
## AIC=316.04   AICc=319.48   BIC=352.28

Drift has to be included, since it is significant. This analysis is not showing a sign that original co2 has Random Trend: \[ Y_t = W_t + S_t + \epsilon_t. \] This further comfirms the linear trend model.

d. 12-step Forecasting

  Y.pred = forecast(Fit21, 12)
  Y.pred

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2005       382.8990 381.9631 383.8349 381.4677 384.3304
## Feb 2005       383.6062 382.6078 384.6046 382.0793 385.1331
## Mar 2005       383.9971 382.9574 385.0367 382.4070 385.5871
## Apr 2005       384.5739 383.5064 385.6415 382.9413 386.2066
## May 2005       385.0562 383.9697 386.1427 383.3946 386.7179
## Jun 2005       383.0545 381.9549 384.1540 381.3728 384.7361
## Jul 2005       376.3810 375.2725 377.4895 374.6856 378.0763
## Aug 2005       370.3794 369.2647 371.4942 368.6746 372.0843
## Sep 2005       371.1485 370.0295 372.2675 369.4371 372.8599
## Oct 2005       375.8221 374.7002 376.9441 374.1063 377.5380
## Nov 2005       380.4135 379.2896 381.5374 378.6946 382.1324
## Dec 2005       383.1663 382.0411 384.2916 381.4454 384.8872

  plot(Y.pred)

e. Rolling 1-step Forecasting

  #--- This line will load it to R
  source('https://nmimoto.github.io/R/TS-00.txt')

  Pred = Rolling1step.forecast(co2,
                               window.size=100,
                               Arima.order=c(1,0,1),
                               include.drift = TRUE,   
                               seasonal = c(0, 1, 1))

## 
##   Total length 132 , window size 100 .
##   Last 32 obs retrospectively forecasted with Rolling 1-step
##       prediction using same order and fized window size.
## 
##   Average Prediction Error:  -0.0304
##   root Mean Squared Error of Prediction:   2.6058

  # Pred

  Fit11

## Series: co2.DS 
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1     ar2    sar1    sar2   intercept    xreg
##       0.3855  0.2622  0.2462  0.2003  -3624.1376  1.8126
## s.e.  0.0852  0.0891  0.0901  0.0927    150.3886  0.0752
## 
## sigma^2 = 0.5676:  log likelihood = -148.18
## AIC=310.36   AICc=311.26   BIC=330.54

  sqrt(Fit11$sigma2)

## [1] 0.7534028

Summary

After fitting two models, (1) is the better model. \[ \mbox{ Linear Model + Seasonality + ARMA(2,0)x(2,0)} \]
After the seasonality (monthly av) was removed, and regression line was fit, residuals were stationary.
After the seasonal difference, force fitting SMA(1) produced parameter estimate that is very close to 1. This suggests that before the seasonal difference, \(\epsilon_t\) was already prsent on it’s own.
If there’s \(\epsilon_t\) by it self, we shouldn’t difference it any further.
There was no indication of random trend in co2, seen in the seasonal difference.