Monthly CO2 levels at NW territories CANADA from Jan 1959 through Dec 1997.
We are using co2 data in TSA package (there’s another one in R)
# load TSA package while protecting the original acf()
= acf
acf1 library(TSA)
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
= acf1
acf
data(co2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.1.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.Arima TSA
## plot.Arima TSA
source('https://nmimoto.github.io/R/TS-00.txt')
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
co2
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 1994 363.05 364.18 364.87 364.47 364.32 362.13 356.72 350.88 350.69 356.06 360.09
## 1995 363.49 364.94 366.72 366.33 365.75 364.32 358.59 352.06 353.45 357.27 362.34
## 1996 366.93 366.71 367.63 368.15 369.14 367.33 361.53 356.11 354.51 360.12 363.85
## 1997 367.72 369.08 368.17 368.83 369.49 367.57 360.79 355.16 356.01 360.71 364.77
## 1998 369.40 370.12 370.88 370.53 371.56 369.28 364.50 357.46 360.54 364.04 368.74
## 1999 372.60 373.85 373.75 374.10 374.50 372.04 364.81 359.11 359.65 364.94 369.82
## 2000 373.23 375.13 374.83 375.42 376.18 374.01 366.54 360.78 361.77 367.51 370.58
## 2001 375.49 375.94 376.42 377.48 377.67 374.78 367.38 361.67 363.39 367.74 373.18
## 2002 376.68 377.42 378.27 378.73 379.01 375.95 370.78 364.07 365.36 370.25 374.04
## 2003 379.03 379.36 380.90 381.39 382.38 381.02 373.78 367.97 368.55 372.28 377.75
## 2004 382.44 382.36 381.58 383.21 383.58 382.59 374.58 368.69 368.55 373.39 378.49
## Dec
## 1994 363.27
## 1995 365.65
## 1996 365.52
## 1997 367.81
## 1998 371.58
## 1999 372.62
## 2000 373.37
## 2001 374.41
## 2002 377.99
## 2003 379.99
## 2004 381.62
is.ts(co2) # is it Time Series object?
## [1] TRUE
frequency(co2) # is frequency set?
## [1] 12
plot(co2, type="o", main='CO2 Data')
layout(matrix(1:2, 1,2))
acf(co2, lag.max=36)
pacf(co2, lag.max=36)
layout(1)
#--- Take Monthly Averages
= co2
Y = aggregate(c(Y), list(month=cycle(Y)), mean)$x #- 1yr long Mtly Av
Mav1 = ts(Mav1[cycle(Y)], start=start(Y), freq=frequency(Y)) #- Mtly Av
Y.MtlyAv = Y-Y.MtlyAv # Deseasonalized Y
Y.DS
plot(Y, type="o") # original
lines(Y.MtlyAv, col="blue") # seasonal averages
= Y.DS # Deseasonalized
co2.DS plot(co2.DS, type="o", main="De-seasonalized CO2")
#--- Fit line
= lm(co2.DS ~ time(co2.DS))
Reg1 summary(Reg1)
##
## Call:
## lm(formula = co2.DS ~ time(co2.DS))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.22331 -0.69056 -0.01021 0.64085 2.36388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.633e+03 5.112e+01 -71.07 <2e-16 ***
## time(co2.DS) 1.817e+00 2.557e-02 71.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9327 on 130 degrees of freedom
## Multiple R-squared: 0.9749, Adjusted R-squared: 0.9747
## F-statistic: 5052 on 1 and 130 DF, p-value: < 2.2e-16
plot(Reg1$resid, type="o") # de-seasonalized CO2
abline(Reg1, col="red") # plot regression line
Stationarity.tests(Reg1$resid)
##
## 'tseries' version: 0.10-49
##
## 'tseries' is a package for time series analysis and computational
## finance.
##
## See 'library(help="tseries")' for details.
## Warning in pp.test(A): p-value smaller than printed p-value
## Warning in kpss.test(A): p-value greater than printed p-value
## KPSS ADF PP
## p-val: 0.1 0.02 0.01
layout(matrix(1:2, 1,2))
acf(Reg1$resid)
pacf(Reg1$resid)
layout(1)
= auto.arima(co2.DS, d=0, D=0, xreg=time(co2.DS),
Fit11 stepwise=FALSE, approximation=FALSE)
Fit11
## Series: co2.DS
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 intercept xreg
## 0.3855 0.2622 0.2462 0.2003 -3624.1376 1.8126
## s.e. 0.0852 0.0891 0.0901 0.0927 150.3886 0.0752
##
## sigma^2 = 0.5676: log likelihood = -148.18
## AIC=310.36 AICc=311.26 BIC=330.54
Randomness.tests(Fit11$resid)
## B-L test H0: the series is uncorrelated
## M-L test H0: the square of the series is uncorrelated
## J-B test H0: the series came from Normal distribution
## SD : Standard Deviation of the series
## BL15 BL20 BL25 ML15 ML20 JB SD
## [1,] 0.757 0.423 0.485 0.649 0.62 0.905 0.739
With observation \(Y_t\), \[ Y_t \hspace{3mm} = \hspace{3mm} L_t + S_t + X_t \]
We first removed \(S_t\) by taking monthly average.
The linear trend is \[ L_t = a + bt \]
Then \(X_t\) is modeld by ARMA(2,0) \(\times\) (2,0) \[ (1-\phi_1 B-\phi_2 B^2)(1-\Phi_1 B^{12} - \Phi_2 B^{24}) X_t = \epsilon_t \] where \(\epsilon_t \sim WN(0,\sigma^2).\)
Fit11
## Series: co2.DS
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 intercept xreg
## 0.3855 0.2622 0.2462 0.2003 -3624.1376 1.8126
## s.e. 0.0852 0.0891 0.0901 0.0927 150.3886 0.0752
##
## sigma^2 = 0.5676: log likelihood = -148.18
## AIC=310.36 AICc=311.26 BIC=330.54
#- 12-step prediction (without seasonality)
= 12
h = co2.DS
Y = Fit11
Fit00
= forecast(Fit00, h, xreg=last(time(Y))+(1:h)/frequency(Y))
Y.pred Y.pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2005 10.56596 9.600437 11.53149 9.089320 12.04260
## Feb 2005 10.40272 9.367926 11.43751 8.820142 11.98529
## Mar 2005 10.44763 9.339424 11.55583 8.752776 12.14248
## Apr 2005 10.87979 9.743621 12.01595 9.142171 12.61740
## May 2005 11.07817 9.924432 12.23190 9.313683 12.84265
## Jun 2005 11.58095 10.418388 12.74351 9.802965 13.35893
## Jul 2005 11.13942 9.971883 12.30696 9.353824 12.92502
## Aug 2005 11.30046 10.130245 12.47067 9.510772 13.09014
## Sep 2005 11.12820 9.956513 12.29988 9.336261 12.92013
## Oct 2005 11.05218 9.879690 12.22466 9.259014 12.84534
## Nov 2005 11.48858 10.315654 12.66150 9.694746 13.28241
## Dec 2005 11.56966 10.396491 12.74282 9.775455 13.36386
plot(Y.pred)
#--- 12-step prediction w Sesonal average ---
= co2 # original TS
Y = Mav1 # 1yr worth of seasonal average (non ts obj)
Mav = Y.pred # forecat() object
Y.h
= ts(Y.h$mean + Mav1, start=last(time(Y))+(1)/frequency(Y), freq=frequency(Y))
Yhat = ts(Y.h$lower[,2]+ Mav1, start=last(time(Y))+(1)/frequency(Y), freq=frequency(Y))
Yhat.CIu = ts(Y.h$upper[,2]+ Mav1, start=last(time(Y))+(1)/frequency(Y), freq=frequency(Y))
Yhat.CIl
#- Plot the forecast with original data
ts.plot(Y,Yhat)
lines( Yhat, type="l", col="blue", lwd=2)
lines( Yhat.CIu, type="l", col="gray30", lty=1)
lines( Yhat.CIl, type="l", col="gray30", lty=1)
#--- This line will load it to R
source('https://nmimoto.github.io/R/TS-00.txt')
= Rolling1step.forecast(co2.DS,
Pred window.size=100,
Arima.order=c(2,0,0),
include.mean=TRUE, # need this for intercept
include.drift = FALSE, #
xreg = TRUE, # NULL=no xreg. TRUE=Linear Trend is present
seasonal = c(2, 0, 0))
##
## Total length 132 , window size 100 .
## Last 32 obs retrospectively forecasted with Rolling 1-step
## prediction using same order and fized window size.
##
## Average Prediction Error: 0.2389
## root Mean Squared Error of Prediction: 0.8759
# Pred
Fit11
## Series: co2.DS
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 intercept xreg
## 0.3855 0.2622 0.2462 0.2003 -3624.1376 1.8126
## s.e. 0.0852 0.0891 0.0901 0.0927 150.3886 0.0752
##
## sigma^2 = 0.5676: log likelihood = -148.18
## AIC=310.36 AICc=311.26 BIC=330.54
sqrt(Fit11$sigma2)
## [1] 0.7534028
We fit same CO2 data was fit with SARIMA
plot(co2, type="o", main='CO2 Data')
# If you take a difference (Del)
plot(diff(co2), type="o")
# If you take a seasonal difference (Del_12)
plot(diff(co2, 12), type="o")
Stationarity.tests( diff(co2,12) )
## Warning in pp.test(A): p-value smaller than printed p-value
## Warning in kpss.test(A): p-value greater than printed p-value
## KPSS ADF PP
## p-val: 0.1 0.157 0.01
# If you take (Del)(Del_12)
plot(diff(diff(co2, 12)), type="o")
Stationarity.tests( diff(diff(co2,12)) )
## Warning in adf.test(A): p-value smaller than printed p-value
## Warning in pp.test(A): p-value smaller than printed p-value
## Warning in kpss.test(A): p-value greater than printed p-value
## KPSS ADF PP
## p-val: 0.1 0.01 0.01
Taking \(\bigtriangledown_{12}\), eliminates seasonality, but stationarity is questionable.
\(\bigtriangledown \bigtriangledown_{12}\) eliminates seasonlity and produce statonary series.
plot(diff(co2, 12), type="o")
layout(matrix(1:2, 1, 2))
acf(diff(co2, 12) , lag.max=36)
pacf(diff(co2, 12) , lag.max=36)
layout(1)
= auto.arima(co2, d=0, D=1, stepwise=FALSE, approximation=FALSE)
Fit21 Fit21
## Series: co2
## ARIMA(1,0,1)(0,1,1)[12] with drift
##
## Coefficients:
## ar1 ma1 sma1 drift
## 0.8349 -0.4630 -0.8487 0.1520
## s.e. 0.0819 0.1246 0.1274 0.0052
##
## sigma^2 = 0.5288: log likelihood = -136.09
## AIC=282.18 AICc=282.7 BIC=296.11
Randomness.tests(Fit21$residuals)
## B-L test H0: the series is uncorrelated
## M-L test H0: the square of the series is uncorrelated
## J-B test H0: the series came from Normal distribution
## SD : Standard Deviation of the series
## BL15 BL20 BL25 ML15 ML20 JB SD
## [1,] 0.554 0.575 0.432 0.679 0.617 0.319 0.684
# Same as above
= Arima(co2, order=c(1,0,1), seasonal=c(0,1,1), include.drift=TRUE )
Fit21b Fit21b
## Series: co2
## ARIMA(1,0,1)(0,1,1)[12] with drift
##
## Coefficients:
## ar1 ma1 sma1 drift
## 0.8349 -0.4630 -0.8487 0.1520
## s.e. 0.0819 0.1246 0.1274 0.0052
##
## sigma^2 = 0.5288: log likelihood = -136.09
## AIC=282.18 AICc=282.7 BIC=296.11
drift estimate is is 0.1520.
12(0.1520) = 1.824, very close to the slope estimate in Linear Trend model. These two models are consistent.
In the Lienar Trend model, after the regression, model was ARMA(2,0) \(\times\) (2,0): \[ (1-\phi_1 B-\phi_2 B^2)(1-\Phi_1 B^{12} - \Phi_2 B^{24}) X_t = \epsilon_t. \]
If this model is true, taking \(\bigtriangledown_{12}\) creates SMA(1) with \(\Theta_1=1\): \[ \epsilon_t - e_{t-12} \]
Arima(co2, order=c(0,0,0), seasonal=c(0,1,1), include.drift=TRUE) # Now sMA(1) is close to 1
## Series: co2
## ARIMA(0,0,0)(0,1,1)[12] with drift
##
## Coefficients:
## sma1 drift
## -0.9998 0.1527
## s.e. 0.1653 0.0018
##
## sigma^2 = 0.6634: log likelihood = -157.82
## AIC=321.64 AICc=321.85 BIC=330
Drift has to be included, since it is significant.
This comfirms the right hand side of Lienar Trend model.
If co2 has random walk trend instead of linear trend, Fitting MA(11) after (d=0, D=1) should result in all \(\theta_i\) close to 1.
Arima(co2, order=c(0,0,11), seasonal=c(0,1,0), include.drift=TRUE)
## Series: co2
## ARIMA(0,0,11)(0,1,0)[12] with drift
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma8 ma9 ma10
## 0.3972 0.6207 0.2726 0.7195 0.4868 0.7640 0.4740 0.8061 0.1842 0.5929
## s.e. 0.1611 0.1247 0.1690 0.0886 0.1660 0.1241 0.1696 0.1164 0.1883 0.1446
## ma11 drift
## 0.3300 0.1430
## s.e. 0.2036 0.0371
##
## sigma^2 = 0.6598: log likelihood = -145.02
## AIC=316.04 AICc=319.48 BIC=352.28
Drift has to be included, since it is significant. This analysis is not showing a sign that original co2 has Random Trend: \[
Y_t = W_t + S_t + \epsilon_t.
\] This further comfirms the linear trend model.
= forecast(Fit21, 12)
Y.pred Y.pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2005 382.8990 381.9631 383.8349 381.4677 384.3304
## Feb 2005 383.6062 382.6078 384.6046 382.0793 385.1331
## Mar 2005 383.9971 382.9574 385.0367 382.4070 385.5871
## Apr 2005 384.5739 383.5064 385.6415 382.9413 386.2066
## May 2005 385.0562 383.9697 386.1427 383.3946 386.7179
## Jun 2005 383.0545 381.9549 384.1540 381.3728 384.7361
## Jul 2005 376.3810 375.2725 377.4895 374.6856 378.0763
## Aug 2005 370.3794 369.2647 371.4942 368.6746 372.0843
## Sep 2005 371.1485 370.0295 372.2675 369.4371 372.8599
## Oct 2005 375.8221 374.7002 376.9441 374.1063 377.5380
## Nov 2005 380.4135 379.2896 381.5374 378.6946 382.1324
## Dec 2005 383.1663 382.0411 384.2916 381.4454 384.8872
plot(Y.pred)
#--- This line will load it to R
source('https://nmimoto.github.io/R/TS-00.txt')
= Rolling1step.forecast(co2,
Pred window.size=100,
Arima.order=c(1,0,1),
include.drift = TRUE,
seasonal = c(0, 1, 1))
##
## Total length 132 , window size 100 .
## Last 32 obs retrospectively forecasted with Rolling 1-step
## prediction using same order and fized window size.
##
## Average Prediction Error: -0.0304
## root Mean Squared Error of Prediction: 2.6058
# Pred
Fit11
## Series: co2.DS
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 intercept xreg
## 0.3855 0.2622 0.2462 0.2003 -3624.1376 1.8126
## s.e. 0.0852 0.0891 0.0901 0.0927 150.3886 0.0752
##
## sigma^2 = 0.5676: log likelihood = -148.18
## AIC=310.36 AICc=311.26 BIC=330.54
sqrt(Fit11$sigma2)
## [1] 0.7534028
After fitting two models, (1) is the better model. \[ \mbox{ Linear Model + Seasonality + ARMA(2,0)x(2,0)} \]
After the seasonality (monthly av) was removed, and regression line was fit, residuals were stationary.
After the seasonal difference, force fitting SMA(1) produced parameter estimate that is very close to 1. This suggests that before the seasonal difference, \(\epsilon_t\) was already prsent on it’s own.
If there’s \(\epsilon_t\) by it self, we shouldn’t difference it any further.
There was no indication of random trend in co2, seen in the seasonal difference.