Content
- Linear and nonlinear Market Model
- Fit all possible models.
- Regression model for US weekly interest rates
1. Linear and nonlinear Market Model
We download required dataset from IVLE and install the required libraries.
require(data.table)
## Loading required package: data.table
require(ggplot2)
## Loading required package: ggplot2
require(MASS)
## Loading required package: MASS
- Load GM and SP daily log returns and plot
da <- fread("d-gmsp2010.csv",header=T)
qplot(da$GM, da$SP)
- Fit the Market Model
m1 <- lm(GM~SP, da)
summary(m1)
##
## Call:
## lm(formula = GM ~ SP, data = da)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.097311 -0.009406 0.000137 0.009085 0.121047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.891e-05 3.724e-04 0.131 0.896
## SP 3.211e-01 2.085e-02 15.400 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01706 on 2098 degrees of freedom
## Multiple R-squared: 0.1016, Adjusted R-squared: 0.1011
## F-statistic: 237.2 on 1 and 2098 DF, p-value: < 2.2e-16
AIC(m1)
## [1] -11133.63
- Confidence intervals for estimated regression parameters
confint(m1)
## 2.5 % 97.5 %
## (Intercept) -0.0006814663 0.0007792855
## SP 0.2802470871 0.3620391875
- Remove the nonsignificant Intercept (constant) term
m11 <- lm(GM ~ -1 + SP, da)
summary(m11)
##
## Call:
## lm(formula = GM ~ -1 + SP, data = da)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.097259 -0.009358 0.000186 0.009133 0.121096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## SP 0.32119 0.02085 15.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01706 on 2099 degrees of freedom
## Multiple R-squared: 0.1016, Adjusted R-squared: 0.1012
## F-statistic: 237.4 on 1 and 2099 DF, p-value: < 2.2e-16
AIC(m11)
## [1] -11135.61
- Locate the time points of non-positive market returns.
idx <- c(1:length(da$SP))[da$SP <= 0]
nsp <- rep(0,length(da$SP)) #create a vector of length 502 with all 0
nsp[idx] = da$SP[idx]
c1 <- rep(0,length(da$SP))
c1[idx] = 1
xx <- data.frame(gm = da$GM, sp = da$SP, c1, nsp) # Show the resulting variables
head(xx)
## gm sp c1 nsp
## 1 -0.024656038 -0.008401001 1 -0.008401001
## 2 0.006893351 0.009515884 0 0.000000000
## 3 0.009512750 -0.007268749 1 -0.007268749
## 4 0.000000000 -0.001684864 1 -0.001684864
## 5 0.011764602 -0.001687820 1 -0.001687820
## 6 0.016816927 0.048361018 0 0.000000000
2. Fit all possible models
- With different intercepts (alpha) for positive and negative SP log returns
m2 <- lm(gm ~ c1+sp, xx)
summary(m2)
##
## Call:
## lm(formula = gm ~ c1 + sp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.097518 -0.009445 0.000111 0.009068 0.121202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002085 0.0006637 0.314 0.753
## c1 -0.0003147 0.0010830 -0.291 0.771
## sp 0.3147464 0.0303246 10.379 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01707 on 2097 degrees of freedom
## Multiple R-squared: 0.1016, Adjusted R-squared: 0.1007
## F-statistic: 118.6 on 2 and 2097 DF, p-value: < 2.2e-16
- With different coefficients (beta) for positive and negative SP log returns
m3 <- lm(gm ~ nsp+sp, xx)
summary(m3)
##
## Call:
## lm(formula = gm ~ nsp + sp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.096173 -0.009499 0.000076 0.009166 0.120713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003833 0.0005413 0.708 0.479
## nsp 0.0516089 0.0606215 0.851 0.395
## sp 0.2955340 0.0366035 8.074 1.14e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01707 on 2097 degrees of freedom
## Multiple R-squared: 0.1019, Adjusted R-squared: 0.101
## F-statistic: 118.9 on 2 and 2097 DF, p-value: < 2.2e-16
- With different intercepts (alpha) and coeff (beta) for positive and negative SP log returns
m4 <- lm(gm ~ sp+c1+nsp, xx)
summary(m4)
##
## Call:
## lm(formula = gm ~ sp + c1 + nsp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.096386 -0.009500 0.000071 0.009182 0.120877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0005593 0.0007792 0.718 0.473
## sp 0.2883550 0.0431574 6.681 3.02e-11 ***
## c1 -0.0003404 0.0010835 -0.314 0.753
## nsp 0.0521342 0.0606576 0.859 0.390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01707 on 2096 degrees of freedom
## Multiple R-squared: 0.1019, Adjusted R-squared: 0.1006
## F-statistic: 79.28 on 3 and 2096 DF, p-value: < 2.2e-16
anova(m4)
## Analysis of Variance Table
##
## Response: gm
## Df Sum Sq Mean Sq F value Pr(>F)
## sp 1 0.06906 0.069059 237.0218 <2e-16 ***
## c1 1 0.00002 0.000025 0.0844 0.7714
## nsp 1 0.00022 0.000215 0.7387 0.3902
## Residuals 2096 0.61069 0.000291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m3,m4) #F-test: H0: in the three-variable model, the slope c1 is zero
## Analysis of Variance Table
##
## Model 1: gm ~ nsp + sp
## Model 2: gm ~ sp + c1 + nsp
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 2097 0.61072
## 2 2096 0.61069 1 2.8758e-05 0.0987 0.7534
anova(m2,m4) #F-test: H0: in the three-variable model, the slope nsp is zero
## Analysis of Variance Table
##
## Model 1: gm ~ c1 + sp
## Model 2: gm ~ sp + c1 + nsp
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 2097 0.61091
## 2 2096 0.61069 1 0.00021523 0.7387 0.3902
m5 <- stepAIC(m4) #remove nonsignificant variables one at a time while minimising AIC
## Start: AIC=-17092
## gm ~ sp + c1 + nsp
##
## Df Sum of Sq RSS AIC
## - c1 1 0.0000288 0.61072 -17094
## - nsp 1 0.0002152 0.61091 -17093
## <none> 0.61069 -17092
## - sp 1 0.0130069 0.62370 -17050
##
## Step: AIC=-17093.9
## gm ~ sp + nsp
##
## Df Sum of Sq RSS AIC
## - nsp 1 0.0002111 0.61093 -17095
## <none> 0.61072 -17094
## - sp 1 0.0189852 0.62971 -17032
##
## Step: AIC=-17095.17
## gm ~ sp
##
## Df Sum of Sq RSS AIC
## <none> 0.61093 -17095
## - sp 1 0.069059 0.67999 -16872
summary(m5)
##
## Call:
## lm(formula = gm ~ sp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.097311 -0.009406 0.000137 0.009085 0.121047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.891e-05 3.724e-04 0.131 0.896
## sp 3.211e-01 2.085e-02 15.400 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01706 on 2098 degrees of freedom
## Multiple R-squared: 0.1016, Adjusted R-squared: 0.1011
## F-statistic: 237.2 on 1 and 2098 DF, p-value: < 2.2e-16
3. Regression model for US weekly interest rates
rate <- fread("w-gs1n36299.txt",header=T)
head(rate)
## y1 y3 date
## 1: 3.24 3.70 19620104
## 2: 3.32 3.75 19620112
## 3: 3.29 3.80 19620120
## 4: 3.26 3.77 19620126
## 5: 3.29 3.80 19620202
## 6: 3.29 3.76 19620208
qplot(rate$y1,rate$y3)
* Linear model (linear regression analysis)
ratelm <- lm(y3 ~ y1, rate)
summary(ratelm)
##
## Call:
## lm(formula = y3 ~ y1, data = rate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8121 -0.4023 0.0031 0.4026 1.3388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.910687 0.032250 28.24 <2e-16 ***
## y1 0.923854 0.004389 210.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.538 on 1965 degrees of freedom
## Multiple R-squared: 0.9575, Adjusted R-squared: 0.9575
## F-statistic: 4.431e+04 on 1 and 1965 DF, p-value: < 2.2e-16
anova(ratelm) #analysis of variance
## Analysis of Variance Table
##
## Response: y3
## Df Sum Sq Mean Sq F value Pr(>F)
## y1 1 12828.1 12828.1 44314 < 2.2e-16 ***
## Residuals 1965 568.8 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov(ratelm) #analysis of variance
## Call:
## aov(formula = ratelm)
##
## Terms:
## y1 Residuals
## Sum of Squares 12828.102 568.835
## Deg. of Freedom 1 1965
##
## Residual standard error: 0.5380365
## Estimated effects may be unbalanced
qplot(rate$date,ratelm$residuals, geom = "point")
acf(ratelm$residuals) #autocorrelaitons of residuals
r3 <- diff(rate$y3)#log returns
r1 <- diff(rate$y1)
qplot(r1,r3)
lm2 <- lm(r3 ~ r1)
summary(lm2)
##
## Call:
## lm(formula = r3 ~ r1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38060 -0.03338 -0.00054 0.03437 0.47418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0002475 0.0015380 0.161 0.872
## r1 0.7810590 0.0074651 104.628 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06819 on 1964 degrees of freedom
## Multiple R-squared: 0.8479, Adjusted R-squared: 0.8478
## F-statistic: 1.095e+04 on 1 and 1964 DF, p-value: < 2.2e-16
lm2 <- lm(r3 ~ -1 + r1) # remove the insignificant constant term
summary(lm2)
##
## Call:
## lm(formula = r3 ~ -1 + r1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38036 -0.03314 -0.00030 0.03462 0.47444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## r1 0.781065 0.007463 104.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06818 on 1965 degrees of freedom
## Multiple R-squared: 0.8479, Adjusted R-squared: 0.8478
## F-statistic: 1.095e+04 on 1 and 1965 DF, p-value: < 2.2e-16
acf(lm2$residuals)