Content
1. Warmup
- ggfortify is useful for time series object ploting
require(ggfortify)
## Loading required package: ggfortify
## Loading required package: ggplot2
y1 <- arima.sim(model=list(ar=c(1.3,-.4)),1000) #Simulation of AR(2)
y2 <- arima.sim(model=list(ar=c(.8,-.7)),1000)
y <- ts(cumsum(c(0,rnorm(999)))) #Simulation of a random walk process
- An example of AR(2)-1
autoplot(y1, ts.colour = 'red', ts.linetype = 'dashed', main = 'Simulation of AR(2) ar=c(1.3,-.4)')
- An example of AR(2)-2
autoplot(y2, ts.colour = 'darkred', ts.linetype = 'dashed', main = 'Simulation of AR(2) ar=c(.3,-.7)')
- Random Walk
autoplot(y, ts.colour = 'darkblue', ts.linetype = 'dashed', main = "Random walk")
- ACF Plot
acf(y,main="ACF of a random walk process")
2. AR Models
- Load data and create a time-series object
gnp <- scan(file="q-dgnp82.txt")
gnp1 <- ts(gnp,frequency=4,start=c(1970,2))
autoplot(gnp1, ts.colour = 'darkblue', ts.geom = 'ribbon', main = "GNP")
* Load data in a different way
da <- read.table("q-dgnp82.txt")
head(da)
## V1
## 1 0.00632
## 2 0.00366
## 3 0.01202
## 4 0.00627
## 5 0.01761
## 6 0.00918
x <- da[,1]
head(x)
## [1] 0.00632 0.00366 0.01202 0.00627 0.01761 0.00918
- Basic statistics and data visualization
require(fBasics)
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.4.4
## Loading required package: timeDate
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.4.4
basicStats(x)
## x
## nobs 176.000000
## NAs 0.000000
## Minimum -0.023910
## Maximum 0.039890
## 1. Quartile 0.001050
## 3. Quartile 0.014020
## Mean 0.007741
## Median 0.008570
## Sum 1.362460
## SE Mean 0.000809
## LCL Mean 0.006145
## UCL Mean 0.009337
## Variance 0.000115
## Stdev 0.010728
## Skewness -0.152248
## Kurtosis 0.412962
autoplot(ts(x), ts.colour = 'blue', ts.geom = 'ribbon', main = "GNP")
qplot(x[1:175],x[2:176], xlab = "index", ylab = "return", geom = "point")
qplot(x[1:174],x[3:176], xlab = "index", ylab = "return", geom = "point")
acf(x,lag=12)
- Ljung-Box testing: mean = 0.
pacf(x,lag.max=12,main="PACF lag=12")
Box.test(x,lag=10,type='Ljung')
##
## Box-Ljung test
##
## data: x
## X-squared = 43.234, df = 10, p-value = 4.515e-06
t.test(x)
##
## One Sample t-test
##
## data: x
## t = 9.5734, df = 175, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.00614535 0.00933715
## sample estimates:
## mean of x
## 0.00774125
- AR(1) roughly based on PACF
m0 <- arima(gnp,order=c(1,0,0))
#Usual estimation of ARIMA models
m0
##
## Call:
## arima(x = gnp, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.3787 0.0077
## s.e. 0.0698 0.0012
##
## sigma^2 estimated as 9.801e-05: log likelihood = 562.47, aic = -1118.94
- Model checking: possible serial correlations exist in residual
tsdiag(m0,gof=20)
- Refine the model. In general, we can start directly from here.
m1=ar(x,method="mle") #Find the AR order
m1
##
## Call:
## ar(x = x, method = "mle")
##
## Coefficients:
## 1 2 3
## 0.3480 0.1793 -0.1423
##
## Order selected 3 sigma^2 estimated as 9.427e-05
names(m1)
## [1] "order" "ar" "var.pred" "x.mean"
## [5] "aic" "n.used" "order.max" "partialacf"
## [9] "resid" "method" "series" "frequency"
## [13] "call" "asy.var.coef"
m1$aic #AIC values for different order p
## 0 1 2 3 4 5
## 27.8466897 2.7416324 1.6032416 0.0000000 0.3027852 2.2426608
## 6 7 8 9 10 11
## 4.0520840 6.0254750 5.9046676 7.5718635 7.8953337 9.6788727
## 12
## 7.1975452
#The minimum AIC is adjusted to zero
m1$order #An AR(3) is selected based on AIC
## [1] 3
- view residual series and conduct model checking
autoplot(ts(m1$resid),ts.colour = "blue", ts.geom = 'ribbon', main="AR(3) fit for GNP")
Box.test(m1$resid,lag=10,type="Ljung") #Model checking
##
## Box-Ljung test
##
## data: m1$resid
## X-squared = 7.0808, df = 10, p-value = 0.7178
(pv <- 1-pchisq(7.0808,10)) #R uses 10 degrees of freedom
## [1] 0.7177956
(pv1=1-pchisq(7.0808,10-3)) #It should be m-g (10-3) =7
## [1] 0.4205158
- Fit a AR(3)
(m2 <- arima(gnp,order=c(3,0,0)))# Usual estimation of ARIMA models
##
## Call:
## arima(x = gnp, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.3480 0.1793 -0.1423 0.0077
## s.e. 0.0745 0.0778 0.0745 0.0012
##
## sigma^2 estimated as 9.427e-05: log likelihood = 565.84, aic = -1121.68
# In R, 'intercept' denotes the mean of the series.
mean(gnp)
## [1] 0.00774125
- Compute the constant term phi(0): (1-.348-.1793+.1423)*0.0077
phi = 1
for (i in 1:length(m2$coef)) {
if (i == length(m2$coef)) {
(phi = phi * m2$coef[i])
} else {
phi = phi-m2$coef[i]
}
}
- Residual standard error
names(m2)
## [1] "coef" "sigma2" "var.coef" "mask" "loglik"
## [6] "aic" "arma" "residuals" "call" "series"
## [11] "code" "n.cond" "nobs" "model"
sqrt(m2$sigma2)
## [1] 0.009709322
- Characteristic equation and solutions
(p1 <- c(1,-m2$coef[1:3]))
## ar1 ar2 ar3
## 1.0000000 -0.3480209 -0.1793027 0.1422638
(roots=polyroot(p1)) #Find solutions
## [1] 1.590253+1.063882i -1.920152+0.000000i 1.590253-1.063882i
Mod(roots) #Compute the modulus of the solutions
## [1] 1.913308 1.920152 1.913308
- To compute average length of business cycles
(k=2*pi/acos(1.590253/1.913308))
## [1] 10.65638
- Model Checking
tsdiag(m2,gof=20) # Model checking
Box.test(m2$resid,lag=10,type="Ljung")
##
## Box-Ljung test
##
## data: m2$resid
## X-squared = 7.0169, df = 10, p-value = 0.7239
(pv2=1-pchisq(7.0169,10)) #R uses 10 degrees of freedom
## [1] 0.7238485
(pv3=1-pchisq(7.0169,10-3)) #It should be m-g (10-3) =7
## [1] 0.4271224
require(forecast)
## Loading required package: forecast
pre <- forecast(m2,level = c(95), h = 9) # Prediction
dev.off() # make sure no graphics error
## null device
## 1
autoplot(pre)
- Remove the insignificant coefficients
(m3 <- arima(gnp,order=c(2,0,0))) # lag-3 coeff of m2 insignificant at 5% level
##
## Call:
## arima(x = gnp, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 0.3288 0.1331 0.0076
## s.e. 0.0746 0.0748 0.0014
##
## sigma^2 estimated as 9.626e-05: log likelihood = 564.04, aic = -1120.08
p2 = c(1,-m3$coef[1:2]) #Characteristic equation
(roots=polyroot(p2)) #Find solutions
## [1] 1.771472+0i -4.242453+0i
# No complex solutions, no business cycles
Mod(roots) #Compute the modulus of the solutions
## [1] 1.771472 4.242453
tsdiag(m3,gof=20) # Model checking: AR(3) may be better
Box.test(m3$resid,lag=10,type="Ljung")
##
## Box-Ljung test
##
## data: m3$resid
## X-squared = 9.0263, df = 10, p-value = 0.5296
(pv4 = 1-pchisq(9.0263,10)) #R uses 10 degrees of freedom
## [1] 0.5296094
(pv5 = 1-pchisq(9.0263,10-2)) #It should be m-g (10-2) =8
## [1] 0.3400822
- In general, to deal with insignificant coeffs, we use the the subcommand ‘fixed’
(m4 <- arima(gnp,order=c(3,0,0),fixed=c(NA,NA,0,NA)))
## Warning in arima(gnp, order = c(3, 0, 0), fixed = c(NA, NA, 0, NA)): some
## AR parameters were fixed: setting transform.pars = FALSE
##
## Call:
## arima(x = gnp, order = c(3, 0, 0), fixed = c(NA, NA, 0, NA))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.3288 0.1330 0 0.0076
## s.e. 0.0746 0.0748 0 0.0014
##
## sigma^2 estimated as 9.626e-05: log likelihood = 564.04, aic = -1120.08
# The subcommand 'fixed' is used to fix parameter values,
# where NA denotes estimation and 0 means fixing the parameter to 0.
# The ordering of the parameters can be found using m2$coef.