Content
- AIC
- Create Moving-average process
- Building an MA model
- Forecast with fitted model
- Consider the drift
- Handling unit root
- Unitroot Test
- Handling outliers
- Build seasonal time series model
- Regression model with time series errors
1. AIC
- Use log likelihood, AIC and Ljung-Box statistics to compare different AR time series models.
gnp <- scan(file='dgnp82.txt')
m1 <- ar(gnp,method="mle")
m1$order
## [1] 3
(m2 <- arima(gnp,order=c(3,0,0)))
##
## Call:
## arima(x = gnp, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.3480 0.1793 -0.1423 0.0077
## s.e. 0.0745 0.0778 0.0745 0.0012
##
## sigma^2 estimated as 9.427e-05: log likelihood = 565.84, aic = -1121.68
tsdiag(m2,gof=24) #from Ljung-Box statistics, m2 is better comparing with m3 below
pacf(gnp)
m3 <- arima(gnp,order=c(1,0,0))
tsdiag(m3,gof=24)
2. Create Moving-average process
- Generate 200 data points from standard Normal distribution
x <- rnorm(200)
x1 = c(0,x[-200]) # add "0" at the beginning and remove x_{200} from the data
yt <- (x+x1)/2 # compute the mean of the two series
yt = yt[-1] # remove the first data point.
acf(yt) # ACF plot shows a significant lag-1 ACF
3. Building an MA model
da <- read.table("m-dec125910-5112.txt",header=T)
dim(da)
## [1] 3720 3
head(da)
## caldt prtnam totret
## 1 19510131 1 0.051262
## 2 19510228 1 0.016479
## 3 19510331 1 -0.015760
## 4 19510430 1 0.053317
## 5 19510531 1 -0.023707
## 6 19510629 1 -0.017999
tail(da)
## caldt prtnam totret
## 3715 20120731 9 -0.023162
## 3716 20120831 9 0.030083
## 3717 20120928 9 0.043477
## 3718 20121031 9 -0.035877
## 3719 20121130 9 0.006842
## 3720 20121231 9 0.041697
- Locate the returns of Decile 5 portfolio
idx = c(1:3720)[da[,2]==5]
rt5 = da[idx,3]
head(rt5)
## [1] 0.086797 0.005635 -0.041380 0.037564 -0.018345 -0.048787
acf(rt5)
- Fit a MA(1) and do prediction
m1 <- arima(rt5,order=c(0,0,1))
tsdiag(m1) #check the fitted model
da <- read.table("d-ibmvwew6202.txt",header=T)
dim(da)
## [1] 10194 4
vw=log(1+da[,3])*100 # Compute percentage log returns of the vw index.
acf(vw,lag.max=10)
m1 <- arima(vw,order=c(0,0,1)) # fits an MA(1) model
tsdiag(m1)
predict(m1,5)
## $pred
## Time Series:
## Start = 10195
## End = 10199
## Frequency = 1
## [1] 0.05036298 0.03960887 0.03960887 0.03960887 0.03960887
##
## $se
## Time Series:
## Start = 10195
## End = 10199
## Frequency = 1
## [1] 0.8823290 0.8917523 0.8917523 0.8917523 0.8917523
4. Forecast with fitted model
- Fit an AR(3) model for time series “gnp”
require(ggplot2)
## Loading required package: ggplot2
require(ggfortify)
## Loading required package: ggfortify
require(forecast)
## Loading required package: forecast
gnp=scan(file = "dgnp82.txt")
m1 <- arima(gnp,order=c(3,0,0))
pre <- forecast(m1,level = c(95), h = 9) # Prediction
dev.off() # make sure no graphics error
## null device
## 1
autoplot(pre)
5. Consider the drift
- Compute log returns of Decile 5 portfolio in (2)
xt <- log(rt5+1)
Pt <- cumsum(xt) # accumulate the log returns
mm <- mean(xt)
- Remove mean from the log returns and compute cumulative partial sum
xt1 <- xt-mm
Xt <- cumsum(xt1)
yu <- max(Pt,Xt)
ylow <- min(Pt,Xt)
plot(ts(Pt),type='l',ylim=c(ylow,yu), ylab = "Pt")
lines(Xt,lty=2)
abline(0,mm) # add the drift on the plot.
6. Handling unit root: Differencing
da = read.table("q-earn-jnj.txt")
head(da)
## V1
## 1 0.71
## 2 0.63
## 3 0.85
## 4 0.44
## 5 0.61
## 6 0.69
jnj=log(da[,1])
dx=diff(jnj) # Regular difference
dx=diff(jnj,4) # Seasonal difference, i.e. dx=x(t)-x(t-4).
7. Unitroot Test
require(fUnitRoots) # Load library
## Loading required package: fUnitRoots
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
da = read.table("q-gdpc96.txt",header=T) # Load data U.S. quaterly real GDP series from 1947.I to 2009.IV.
gdp=log(da[,4]) # Take log transformation
adfTest(gdp,lags=3,type=c("c")) # unit-root test with a constant.
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 3
## STATISTIC:
## Dickey-Fuller: -1.798
## P VALUE:
## 0.3873
##
## Description:
## Sun Apr 14 00:17:32 2019 by user: Sirius
8. Handling outliers
(m1=arima(rt5,order=c(0,0,1)))
##
## Call:
## arima(x = rt5, order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## 0.1564 0.0110
## s.e. 0.0385 0.0022
##
## sigma^2 estimated as 0.002629: log likelihood = 1154.42, aic = -2302.83
tsdiag(m1)
which.max(abs(m1$residuals)) # identifies the location of outlier at t=442
## [1] 442
I442=rep(0,744) # create a vector with 744 '0's
I442[442] = 1 #set the 442th element as "1"
plot(I442) # dummy variable for data point t=442
m2=arima(rt5,order=c(0,0,1),xreg=I442)
tsdiag(m2)
which.max(m2$residuals) # shows t=289
## [1] 289
I289=rep(0,744)
I289[289] = 1
X = cbind(I442,I289)
dim(X)
## [1] 744 2
(m3=arima(rt5,order=c(0,0,1),xreg=X)) # handle two outliers.
##
## Call:
## arima(x = rt5, order = c(0, 0, 1), xreg = X)
##
## Coefficients:
## ma1 intercept I442 I289
## 0.1539 0.0110 -0.2657 0.2452
## s.e. 0.0381 0.0021 0.0491 0.0489
##
## sigma^2 estimated as 0.002449: log likelihood = 1180.78, aic = -2351.57
9. Build seasonal time series model
da = read.table("m-hstarts-5912.txt",header=T)
dim(da)
## [1] 638 4
head(da)
## year mon day hstarts
## 1 1959 1 1 96.2
## 2 1959 2 1 99.0
## 3 1959 3 1 127.7
## 4 1959 4 1 150.8
## 5 1959 5 1 152.5
## 6 1959 6 1 147.8
lh = log(da$hstarts)
x1 = ts(lh,frequency=12,start=c(1959,1)) ## create a time series object in R
autoplot(x1) ## Obtain time plot of the data
acf(lh)
dlh=diff(lh) # regular difference
acf(dlh,lag=48)
ddlh=diff(dlh,12) # seasonal difference
acf(ddlh)
(m1 = arima(lh,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12))) ## Use log housing series
##
## Call:
## arima(x = lh, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ma1 sma1
## -0.318 -0.8819
## s.e. 0.035 0.0227
##
## sigma^2 estimated as 0.008141: log likelihood = 607.48, aic = -1208.97
tsdiag(m1,gof=24)
(m1a = arima(lh,order=c(0,1,3),seasonal=list(order=c(0,1,1),period=12))) ## Take care of lag-3 ACF in residuals
##
## Call:
## arima(x = lh, order = c(0, 1, 3), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ma1 ma2 ma3 sma1
## -0.3482 0.0239 0.1034 -0.8700
## s.e. 0.0401 0.0410 0.0395 0.0236
##
## sigma^2 estimated as 0.008042: log likelihood = 611.83, aic = -1213.65
tsdiag(m1a,gof=24)
f1=c(NA,0,NA,NA)
(m1b = arima(lh,order=c(0,1,3),seasonal=list(order=c(0,1,1),period=12),fixed=f1)) ## lag-2 coef is insignificant. Fix lag-2 coef = 0.
##
## Call:
## arima(x = lh, order = c(0, 1, 3), seasonal = list(order = c(0, 1, 1), period = 12),
## fixed = f1)
##
## Coefficients:
## ma1 ma2 ma3 sma1
## -0.3409 0 0.1108 -0.8715
## s.e. 0.0374 0 0.0376 0.0234
##
## sigma^2 estimated as 0.008044: log likelihood = 611.66, aic = -1215.31
tsdiag(m1b,gof=24) #Other seasonal ARMA models may fit the data better.
10. Regression model with time series errors
da = read.table("w-gs1n36299.txt",header=T)
head(da)
## y1 y3 date
## 1 3.24 3.70 19620104
## 2 3.32 3.75 19620112
## 3 3.29 3.80 19620120
## 4 3.26 3.77 19620126
## 5 3.29 3.80 19620202
## 6 3.29 3.76 19620208
y3 = da$y3
y1 = da$y1
qplot(y1, y3, geom = "point", colour = "red", xlab = "one-year", ylab = "three-year")
m1=lm(y3~y1) ## linear model (linear regression analysis)
summary(m1)
##
## Call:
## lm(formula = y3 ~ y1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8121 -0.4023 0.0031 0.4026 1.3388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.910687 0.032250 28.24 <2e-16 ***
## y1 0.923854 0.004389 210.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.538 on 1965 degrees of freedom
## Multiple R-squared: 0.9575, Adjusted R-squared: 0.9575
## F-statistic: 4.431e+04 on 1 and 1965 DF, p-value: < 2.2e-16
autoplot(ts(m1$residuals), main = "residual", ylab = "residual")
acf(m1$residuals)
r3 = diff(y3)
r1 = diff(y1)
qplot(r1, r3, colour = "red")
m2 = lm(r3~r1)
summary(m2)
##
## Call:
## lm(formula = r3 ~ r1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38060 -0.03338 -0.00054 0.03437 0.47418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002475 0.0015380 0.161 0.872
## r1 0.7810590 0.0074651 104.628 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06819 on 1964 degrees of freedom
## Multiple R-squared: 0.8479, Adjusted R-squared: 0.8478
## F-statistic: 1.095e+04 on 1 and 1964 DF, p-value: < 2.2e-16
m2 = lm(r3 ~ -1+r1) # remove the insignificant constant term
summary(m2)
##
## Call:
## lm(formula = r3 ~ -1 + r1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38036 -0.03314 -0.00030 0.03462 0.47444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## r1 0.781065 0.007463 104.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06818 on 1965 degrees of freedom
## Multiple R-squared: 0.8479, Adjusted R-squared: 0.8478
## F-statistic: 1.095e+04 on 1 and 1965 DF, p-value: < 2.2e-16
acf(m2$residuals)
(m3 = arima(r3,order=c(0,0,1),xreg=r1,include.mean=F))
##
## Call:
## arima(x = r3, order = c(0, 0, 1), xreg = r1, include.mean = F)
##
## Coefficients:
## ma1 r1
## 0.2115 0.7824
## s.e. 0.0224 0.0077
##
## sigma^2 estimated as 0.004456: log likelihood = 2531.84, aic = -5057.67
acf(m3$residuals) #A regression model with other ARMA errors may fit the data better.