Important: You only need to type the quoted commands in R console

Content

  1. Warmup
  2. AR Models

1. Warmup

  • ggfortify is useful for time series object ploting
require(ggfortify) 
## Loading required package: ggfortify
## Loading required package: ggplot2
y1 <- arima.sim(model=list(ar=c(1.3,-.4)),1000) #Simulation of AR(2)
y2 <- arima.sim(model=list(ar=c(.8,-.7)),1000)
y <- ts(cumsum(c(0,rnorm(999)))) #Simulation of a random walk process
  • An example of AR(2)-1
autoplot(y1, ts.colour = 'red', ts.linetype = 'dashed', main = 'Simulation of AR(2) ar=c(1.3,-.4)')

  • An example of AR(2)-2
autoplot(y2, ts.colour = 'darkred', ts.linetype = 'dashed', main = 'Simulation of AR(2) ar=c(.3,-.7)')

  • Random Walk
autoplot(y, ts.colour = 'darkblue', ts.linetype = 'dashed', main = "Random walk")

  • ACF Plot
acf(y,main="ACF of a random walk process")

2. AR Models

  • Load data and create a time-series object
gnp <- scan(file="q-dgnp82.txt") 
gnp1 <- ts(gnp,frequency=4,start=c(1970,2))
autoplot(gnp1, ts.colour = 'darkblue', ts.geom = 'ribbon', main = "GNP")

* Load data in a different way

da <- read.table("q-dgnp82.txt") 
head(da)
##        V1
## 1 0.00632
## 2 0.00366
## 3 0.01202
## 4 0.00627
## 5 0.01761
## 6 0.00918
x <- da[,1]
head(x)
## [1] 0.00632 0.00366 0.01202 0.00627 0.01761 0.00918
  • Basic statistics and data visualization
require(fBasics)
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.4.4
## Loading required package: timeDate
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.4.4
basicStats(x)
##                      x
## nobs        176.000000
## NAs           0.000000
## Minimum      -0.023910
## Maximum       0.039890
## 1. Quartile   0.001050
## 3. Quartile   0.014020
## Mean          0.007741
## Median        0.008570
## Sum           1.362460
## SE Mean       0.000809
## LCL Mean      0.006145
## UCL Mean      0.009337
## Variance      0.000115
## Stdev         0.010728
## Skewness     -0.152248
## Kurtosis      0.412962
autoplot(ts(x), ts.colour = 'blue', ts.geom = 'ribbon', main = "GNP")

qplot(x[1:175],x[2:176], xlab = "index", ylab = "return", geom = "point")

qplot(x[1:174],x[3:176], xlab = "index", ylab = "return", geom = "point")

acf(x,lag=12)

  • Ljung-Box testing: mean = 0.
pacf(x,lag.max=12,main="PACF lag=12")

Box.test(x,lag=10,type='Ljung')
## 
##  Box-Ljung test
## 
## data:  x
## X-squared = 43.234, df = 10, p-value = 4.515e-06
t.test(x)  
## 
##  One Sample t-test
## 
## data:  x
## t = 9.5734, df = 175, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.00614535 0.00933715
## sample estimates:
##  mean of x 
## 0.00774125
  • AR(1) roughly based on PACF
m0 <- arima(gnp,order=c(1,0,0))
#Usual estimation of ARIMA models
m0
## 
## Call:
## arima(x = gnp, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.3787     0.0077
## s.e.  0.0698     0.0012
## 
## sigma^2 estimated as 9.801e-05:  log likelihood = 562.47,  aic = -1118.94
  • Model checking: possible serial correlations exist in residual
tsdiag(m0,gof=20) 

  • Refine the model. In general, we can start directly from here.
m1=ar(x,method="mle") #Find the AR order
m1
## 
## Call:
## ar(x = x, method = "mle")
## 
## Coefficients:
##       1        2        3  
##  0.3480   0.1793  -0.1423  
## 
## Order selected 3  sigma^2 estimated as  9.427e-05
names(m1)
##  [1] "order"        "ar"           "var.pred"     "x.mean"      
##  [5] "aic"          "n.used"       "order.max"    "partialacf"  
##  [9] "resid"        "method"       "series"       "frequency"   
## [13] "call"         "asy.var.coef"
m1$aic #AIC values for different order p
##          0          1          2          3          4          5 
## 27.8466897  2.7416324  1.6032416  0.0000000  0.3027852  2.2426608 
##          6          7          8          9         10         11 
##  4.0520840  6.0254750  5.9046676  7.5718635  7.8953337  9.6788727 
##         12 
##  7.1975452
#The minimum AIC is adjusted to zero
m1$order #An AR(3) is selected based on AIC
## [1] 3
  • view residual series and conduct model checking
autoplot(ts(m1$resid),ts.colour = "blue",  ts.geom = 'ribbon', main="AR(3) fit for GNP")

Box.test(m1$resid,lag=10,type="Ljung") #Model checking
## 
##  Box-Ljung test
## 
## data:  m1$resid
## X-squared = 7.0808, df = 10, p-value = 0.7178
(pv <- 1-pchisq(7.0808,10)) #R uses 10 degrees of freedom
## [1] 0.7177956
(pv1=1-pchisq(7.0808,10-3)) #It should be m-g (10-3) =7
## [1] 0.4205158
  • Fit a AR(3)
(m2 <- arima(gnp,order=c(3,0,0)))# Usual estimation of ARIMA models
## 
## Call:
## arima(x = gnp, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.3480  0.1793  -0.1423     0.0077
## s.e.  0.0745  0.0778   0.0745     0.0012
## 
## sigma^2 estimated as 9.427e-05:  log likelihood = 565.84,  aic = -1121.68
# In R, 'intercept' denotes the mean of the series. 
mean(gnp)
## [1] 0.00774125
  • Compute the constant term phi(0): (1-.348-.1793+.1423)*0.0077
phi = 1
for (i in 1:length(m2$coef)) {
  if (i == length(m2$coef)) {
    (phi = phi * m2$coef[i])
  } else {
    phi = phi-m2$coef[i]
  }
}
  • Residual standard error
names(m2)
##  [1] "coef"      "sigma2"    "var.coef"  "mask"      "loglik"   
##  [6] "aic"       "arma"      "residuals" "call"      "series"   
## [11] "code"      "n.cond"    "nobs"      "model"
sqrt(m2$sigma2) 
## [1] 0.009709322
  • Characteristic equation and solutions
(p1 <- c(1,-m2$coef[1:3])) 
##                   ar1        ar2        ar3 
##  1.0000000 -0.3480209 -0.1793027  0.1422638
(roots=polyroot(p1)) #Find solutions
## [1]  1.590253+1.063882i -1.920152+0.000000i  1.590253-1.063882i
Mod(roots) #Compute the modulus of the solutions
## [1] 1.913308 1.920152 1.913308
  • To compute average length of business cycles
(k=2*pi/acos(1.590253/1.913308)) 
## [1] 10.65638
  • Model Checking
tsdiag(m2,gof=20)  # Model checking

Box.test(m2$resid,lag=10,type="Ljung") 
## 
##  Box-Ljung test
## 
## data:  m2$resid
## X-squared = 7.0169, df = 10, p-value = 0.7239
(pv2=1-pchisq(7.0169,10)) #R uses 10 degrees of freedom
## [1] 0.7238485
(pv3=1-pchisq(7.0169,10-3)) #It should be m-g (10-3) =7
## [1] 0.4271224
require(forecast)
## Loading required package: forecast
pre <- forecast(m2,level = c(95), h = 9) # Prediction
dev.off() # make sure no graphics error 
## null device 
##           1
autoplot(pre)
  • Remove the insignificant coefficients
(m3 <- arima(gnp,order=c(2,0,0))) # lag-3 coeff of m2 insignificant at 5% level
## 
## Call:
## arima(x = gnp, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.3288  0.1331     0.0076
## s.e.  0.0746  0.0748     0.0014
## 
## sigma^2 estimated as 9.626e-05:  log likelihood = 564.04,  aic = -1120.08
p2 = c(1,-m3$coef[1:2]) #Characteristic equation

(roots=polyroot(p2)) #Find solutions
## [1]  1.771472+0i -4.242453+0i
# No complex solutions, no business cycles
Mod(roots) #Compute the modulus of the solutions
## [1] 1.771472 4.242453
tsdiag(m3,gof=20)  # Model checking: AR(3) may be better

Box.test(m3$resid,lag=10,type="Ljung") 
## 
##  Box-Ljung test
## 
## data:  m3$resid
## X-squared = 9.0263, df = 10, p-value = 0.5296
(pv4 = 1-pchisq(9.0263,10)) #R uses 10 degrees of freedom
## [1] 0.5296094
(pv5 = 1-pchisq(9.0263,10-2)) #It should be m-g (10-2) =8
## [1] 0.3400822
  • In general, to deal with insignificant coeffs, we use the the subcommand ‘fixed’
(m4 <- arima(gnp,order=c(3,0,0),fixed=c(NA,NA,0,NA)))
## Warning in arima(gnp, order = c(3, 0, 0), fixed = c(NA, NA, 0, NA)): some
## AR parameters were fixed: setting transform.pars = FALSE
## 
## Call:
## arima(x = gnp, order = c(3, 0, 0), fixed = c(NA, NA, 0, NA))
## 
## Coefficients:
##          ar1     ar2  ar3  intercept
##       0.3288  0.1330    0     0.0076
## s.e.  0.0746  0.0748    0     0.0014
## 
## sigma^2 estimated as 9.626e-05:  log likelihood = 564.04,  aic = -1120.08
# The subcommand 'fixed' is used to fix parameter values, 
# where NA denotes estimation and 0 means fixing the parameter to 0.
# The ordering of the parameters can be found using m2$coef.