Important: You only need to type the quoted commands in R console

Content

  1. AIC
  2. Create Moving-average process
  3. Building an MA model
  4. Forecast with fitted model
  5. Consider the drift
  6. Handling unit root
  7. Unitroot Test
  8. Handling outliers
  9. Build seasonal time series model
  10. Regression model with time series errors

1. AIC

  • Use log likelihood, AIC and Ljung-Box statistics to compare different AR time series models.
gnp <- scan(file='dgnp82.txt')
m1 <- ar(gnp,method="mle")
m1$order
## [1] 3
(m2 <- arima(gnp,order=c(3,0,0)))
## 
## Call:
## arima(x = gnp, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.3480  0.1793  -0.1423     0.0077
## s.e.  0.0745  0.0778   0.0745     0.0012
## 
## sigma^2 estimated as 9.427e-05:  log likelihood = 565.84,  aic = -1121.68
tsdiag(m2,gof=24) #from Ljung-Box statistics, m2 is better comparing with m3 below

pacf(gnp)

m3 <- arima(gnp,order=c(1,0,0))
tsdiag(m3,gof=24)

2. Create Moving-average process

  • Generate 200 data points from standard Normal distribution
x <- rnorm(200) 
x1 = c(0,x[-200])  # add "0" at the beginning and remove x_{200} from the data
yt <- (x+x1)/2  # compute the mean of the two series
yt = yt[-1]  # remove the first data point.
acf(yt)  # ACF plot shows a significant lag-1 ACF

3. Building an MA model

da <- read.table("m-dec125910-5112.txt",header=T)
dim(da)  
## [1] 3720    3
head(da)
##      caldt prtnam    totret
## 1 19510131      1  0.051262
## 2 19510228      1  0.016479
## 3 19510331      1 -0.015760
## 4 19510430      1  0.053317
## 5 19510531      1 -0.023707
## 6 19510629      1 -0.017999
tail(da)
##         caldt prtnam    totret
## 3715 20120731      9 -0.023162
## 3716 20120831      9  0.030083
## 3717 20120928      9  0.043477
## 3718 20121031      9 -0.035877
## 3719 20121130      9  0.006842
## 3720 20121231      9  0.041697
  • Locate the returns of Decile 5 portfolio
idx = c(1:3720)[da[,2]==5]  
rt5 = da[idx,3]
head(rt5)
## [1]  0.086797  0.005635 -0.041380  0.037564 -0.018345 -0.048787
acf(rt5)

  • Fit a MA(1) and do prediction
m1 <- arima(rt5,order=c(0,0,1)) 
tsdiag(m1) #check the fitted model

da <- read.table("d-ibmvwew6202.txt",header=T)
dim(da)
## [1] 10194     4
vw=log(1+da[,3])*100  # Compute percentage log returns of the vw index.
acf(vw,lag.max=10) 

m1 <- arima(vw,order=c(0,0,1))  # fits an MA(1) model
tsdiag(m1)

predict(m1,5)
## $pred
## Time Series:
## Start = 10195 
## End = 10199 
## Frequency = 1 
## [1] 0.05036298 0.03960887 0.03960887 0.03960887 0.03960887
## 
## $se
## Time Series:
## Start = 10195 
## End = 10199 
## Frequency = 1 
## [1] 0.8823290 0.8917523 0.8917523 0.8917523 0.8917523

9. Build seasonal time series model

da = read.table("m-hstarts-5912.txt",header=T)
dim(da)
## [1] 638   4
head(da)
##   year mon day hstarts
## 1 1959   1   1    96.2
## 2 1959   2   1    99.0
## 3 1959   3   1   127.7
## 4 1959   4   1   150.8
## 5 1959   5   1   152.5
## 6 1959   6   1   147.8
lh = log(da$hstarts)
x1 = ts(lh,frequency=12,start=c(1959,1)) ## create a time series object in R
autoplot(x1) ## Obtain time plot of the data

acf(lh)

dlh=diff(lh)  # regular difference
acf(dlh,lag=48)

ddlh=diff(dlh,12)  # seasonal difference
acf(ddlh)

(m1 = arima(lh,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12))) ## Use log housing series
## 
## Call:
## arima(x = lh, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
## 
## Coefficients:
##          ma1     sma1
##       -0.318  -0.8819
## s.e.   0.035   0.0227
## 
## sigma^2 estimated as 0.008141:  log likelihood = 607.48,  aic = -1208.97
tsdiag(m1,gof=24)

(m1a = arima(lh,order=c(0,1,3),seasonal=list(order=c(0,1,1),period=12))) ## Take care of lag-3 ACF in residuals
## 
## Call:
## arima(x = lh, order = c(0, 1, 3), seasonal = list(order = c(0, 1, 1), period = 12))
## 
## Coefficients:
##           ma1     ma2     ma3     sma1
##       -0.3482  0.0239  0.1034  -0.8700
## s.e.   0.0401  0.0410  0.0395   0.0236
## 
## sigma^2 estimated as 0.008042:  log likelihood = 611.83,  aic = -1213.65
tsdiag(m1a,gof=24)

f1=c(NA,0,NA,NA)
(m1b = arima(lh,order=c(0,1,3),seasonal=list(order=c(0,1,1),period=12),fixed=f1)) ## lag-2 coef is insignificant. Fix lag-2 coef = 0.
## 
## Call:
## arima(x = lh, order = c(0, 1, 3), seasonal = list(order = c(0, 1, 1), period = 12), 
##     fixed = f1)
## 
## Coefficients:
##           ma1  ma2     ma3     sma1
##       -0.3409    0  0.1108  -0.8715
## s.e.   0.0374    0  0.0376   0.0234
## 
## sigma^2 estimated as 0.008044:  log likelihood = 611.66,  aic = -1215.31
tsdiag(m1b,gof=24) #Other seasonal ARMA models may fit the data better.

10. Regression model with time series errors

da = read.table("w-gs1n36299.txt",header=T)
head(da)
##     y1   y3     date
## 1 3.24 3.70 19620104
## 2 3.32 3.75 19620112
## 3 3.29 3.80 19620120
## 4 3.26 3.77 19620126
## 5 3.29 3.80 19620202
## 6 3.29 3.76 19620208
y3 = da$y3
y1 = da$y1
qplot(y1, y3, geom = "point", colour = "red", xlab = "one-year", ylab = "three-year")

m1=lm(y3~y1)  ## linear model (linear regression analysis)
summary(m1)
## 
## Call:
## lm(formula = y3 ~ y1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8121 -0.4023  0.0031  0.4026  1.3388 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.910687   0.032250   28.24   <2e-16 ***
## y1          0.923854   0.004389  210.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.538 on 1965 degrees of freedom
## Multiple R-squared:  0.9575, Adjusted R-squared:  0.9575 
## F-statistic: 4.431e+04 on 1 and 1965 DF,  p-value: < 2.2e-16
autoplot(ts(m1$residuals), main = "residual", ylab = "residual")

acf(m1$residuals)

r3 = diff(y3)
r1 = diff(y1)
qplot(r1, r3, colour = "red")

m2 = lm(r3~r1)
summary(m2)
## 
## Call:
## lm(formula = r3 ~ r1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38060 -0.03338 -0.00054  0.03437  0.47418 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002475  0.0015380   0.161    0.872    
## r1          0.7810590  0.0074651 104.628   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06819 on 1964 degrees of freedom
## Multiple R-squared:  0.8479, Adjusted R-squared:  0.8478 
## F-statistic: 1.095e+04 on 1 and 1964 DF,  p-value: < 2.2e-16
m2 = lm(r3 ~ -1+r1) # remove the insignificant constant term
summary(m2)
## 
## Call:
## lm(formula = r3 ~ -1 + r1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38036 -0.03314 -0.00030  0.03462  0.47444 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## r1 0.781065   0.007463   104.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06818 on 1965 degrees of freedom
## Multiple R-squared:  0.8479, Adjusted R-squared:  0.8478 
## F-statistic: 1.095e+04 on 1 and 1965 DF,  p-value: < 2.2e-16
acf(m2$residuals)

(m3 = arima(r3,order=c(0,0,1),xreg=r1,include.mean=F))
## 
## Call:
## arima(x = r3, order = c(0, 0, 1), xreg = r1, include.mean = F)
## 
## Coefficients:
##          ma1      r1
##       0.2115  0.7824
## s.e.  0.0224  0.0077
## 
## sigma^2 estimated as 0.004456:  log likelihood = 2531.84,  aic = -5057.67
acf(m3$residuals) #A regression model with other ARMA errors may fit the data better.