Important: You only need to type the quoted commands in R console

Content

  1. Linear and nonlinear Market Model
  2. Fit all possible models.
  3. Regression model for US weekly interest rates

1. Linear and nonlinear Market Model


We download required dataset from IVLE and install the required libraries.

require(data.table)  
## Loading required package: data.table
require(ggplot2)
## Loading required package: ggplot2
require(MASS)
## Loading required package: MASS
  • Load GM and SP daily log returns and plot
da <- fread("d-gmsp2010.csv",header=T)  
qplot(da$GM, da$SP)

  • Fit the Market Model
m1 <- lm(GM~SP, da)  
summary(m1)
## 
## Call:
## lm(formula = GM ~ SP, data = da)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097311 -0.009406  0.000137  0.009085  0.121047 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.891e-05  3.724e-04   0.131    0.896    
## SP          3.211e-01  2.085e-02  15.400   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01706 on 2098 degrees of freedom
## Multiple R-squared:  0.1016, Adjusted R-squared:  0.1011 
## F-statistic: 237.2 on 1 and 2098 DF,  p-value: < 2.2e-16
AIC(m1)
## [1] -11133.63
  • Confidence intervals for estimated regression parameters
confint(m1)
##                     2.5 %       97.5 %
## (Intercept) -0.0006814663 0.0007792855
## SP           0.2802470871 0.3620391875
  • Remove the nonsignificant Intercept (constant) term
m11 <- lm(GM ~ -1 + SP, da) 
summary(m11)
## 
## Call:
## lm(formula = GM ~ -1 + SP, data = da)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097259 -0.009358  0.000186  0.009133  0.121096 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## SP  0.32119    0.02085   15.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01706 on 2099 degrees of freedom
## Multiple R-squared:  0.1016, Adjusted R-squared:  0.1012 
## F-statistic: 237.4 on 1 and 2099 DF,  p-value: < 2.2e-16
AIC(m11)
## [1] -11135.61
  • Locate the time points of non-positive market returns.
idx <- c(1:length(da$SP))[da$SP <= 0]   
nsp <- rep(0,length(da$SP)) #create a vector of length 502 with all 0 
nsp[idx] = da$SP[idx]
c1 <- rep(0,length(da$SP))
c1[idx] = 1
        
xx <- data.frame(gm = da$GM, sp = da$SP, c1, nsp)   # Show the resulting variables
head(xx)
##             gm           sp c1          nsp
## 1 -0.024656038 -0.008401001  1 -0.008401001
## 2  0.006893351  0.009515884  0  0.000000000
## 3  0.009512750 -0.007268749  1 -0.007268749
## 4  0.000000000 -0.001684864  1 -0.001684864
## 5  0.011764602 -0.001687820  1 -0.001687820
## 6  0.016816927  0.048361018  0  0.000000000

2. Fit all possible models

  • With different intercepts (alpha) for positive and negative SP log returns
m2 <- lm(gm ~ c1+sp, xx) 
summary(m2)
## 
## Call:
## lm(formula = gm ~ c1 + sp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097518 -0.009445  0.000111  0.009068  0.121202 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0002085  0.0006637   0.314    0.753    
## c1          -0.0003147  0.0010830  -0.291    0.771    
## sp           0.3147464  0.0303246  10.379   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01707 on 2097 degrees of freedom
## Multiple R-squared:  0.1016, Adjusted R-squared:  0.1007 
## F-statistic: 118.6 on 2 and 2097 DF,  p-value: < 2.2e-16
  • With different coefficients (beta) for positive and negative SP log returns
m3 <- lm(gm ~ nsp+sp, xx)
summary(m3)
## 
## Call:
## lm(formula = gm ~ nsp + sp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.096173 -0.009499  0.000076  0.009166  0.120713 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0003833  0.0005413   0.708    0.479    
## nsp         0.0516089  0.0606215   0.851    0.395    
## sp          0.2955340  0.0366035   8.074 1.14e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01707 on 2097 degrees of freedom
## Multiple R-squared:  0.1019, Adjusted R-squared:  0.101 
## F-statistic: 118.9 on 2 and 2097 DF,  p-value: < 2.2e-16
  • With different intercepts (alpha) and coeff (beta) for positive and negative SP log returns
m4 <- lm(gm ~ sp+c1+nsp, xx) 
summary(m4)
## 
## Call:
## lm(formula = gm ~ sp + c1 + nsp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.096386 -0.009500  0.000071  0.009182  0.120877 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0005593  0.0007792   0.718    0.473    
## sp           0.2883550  0.0431574   6.681 3.02e-11 ***
## c1          -0.0003404  0.0010835  -0.314    0.753    
## nsp          0.0521342  0.0606576   0.859    0.390    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01707 on 2096 degrees of freedom
## Multiple R-squared:  0.1019, Adjusted R-squared:  0.1006 
## F-statistic: 79.28 on 3 and 2096 DF,  p-value: < 2.2e-16
anova(m4)
## Analysis of Variance Table
## 
## Response: gm
##             Df  Sum Sq  Mean Sq  F value Pr(>F)    
## sp           1 0.06906 0.069059 237.0218 <2e-16 ***
## c1           1 0.00002 0.000025   0.0844 0.7714    
## nsp          1 0.00022 0.000215   0.7387 0.3902    
## Residuals 2096 0.61069 0.000291                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m3,m4)    #F-test: H0: in the three-variable model, the slope c1 is zero
## Analysis of Variance Table
## 
## Model 1: gm ~ nsp + sp
## Model 2: gm ~ sp + c1 + nsp
##   Res.Df     RSS Df  Sum of Sq      F Pr(>F)
## 1   2097 0.61072                            
## 2   2096 0.61069  1 2.8758e-05 0.0987 0.7534
anova(m2,m4)    #F-test: H0: in the three-variable model, the slope nsp is zero
## Analysis of Variance Table
## 
## Model 1: gm ~ c1 + sp
## Model 2: gm ~ sp + c1 + nsp
##   Res.Df     RSS Df  Sum of Sq      F Pr(>F)
## 1   2097 0.61091                            
## 2   2096 0.61069  1 0.00021523 0.7387 0.3902
m5 <- stepAIC(m4) #remove nonsignificant variables one at a time while minimising AIC
## Start:  AIC=-17092
## gm ~ sp + c1 + nsp
## 
##        Df Sum of Sq     RSS    AIC
## - c1    1 0.0000288 0.61072 -17094
## - nsp   1 0.0002152 0.61091 -17093
## <none>              0.61069 -17092
## - sp    1 0.0130069 0.62370 -17050
## 
## Step:  AIC=-17093.9
## gm ~ sp + nsp
## 
##        Df Sum of Sq     RSS    AIC
## - nsp   1 0.0002111 0.61093 -17095
## <none>              0.61072 -17094
## - sp    1 0.0189852 0.62971 -17032
## 
## Step:  AIC=-17095.17
## gm ~ sp
## 
##        Df Sum of Sq     RSS    AIC
## <none>              0.61093 -17095
## - sp    1  0.069059 0.67999 -16872
summary(m5)
## 
## Call:
## lm(formula = gm ~ sp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097311 -0.009406  0.000137  0.009085  0.121047 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.891e-05  3.724e-04   0.131    0.896    
## sp          3.211e-01  2.085e-02  15.400   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01706 on 2098 degrees of freedom
## Multiple R-squared:  0.1016, Adjusted R-squared:  0.1011 
## F-statistic: 237.2 on 1 and 2098 DF,  p-value: < 2.2e-16

3. Regression model for US weekly interest rates

rate <- fread("w-gs1n36299.txt",header=T)
head(rate)
##      y1   y3     date
## 1: 3.24 3.70 19620104
## 2: 3.32 3.75 19620112
## 3: 3.29 3.80 19620120
## 4: 3.26 3.77 19620126
## 5: 3.29 3.80 19620202
## 6: 3.29 3.76 19620208
qplot(rate$y1,rate$y3)

* Linear model (linear regression analysis)

ratelm <- lm(y3 ~ y1, rate)  
summary(ratelm)
## 
## Call:
## lm(formula = y3 ~ y1, data = rate)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8121 -0.4023  0.0031  0.4026  1.3388 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.910687   0.032250   28.24   <2e-16 ***
## y1          0.923854   0.004389  210.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.538 on 1965 degrees of freedom
## Multiple R-squared:  0.9575, Adjusted R-squared:  0.9575 
## F-statistic: 4.431e+04 on 1 and 1965 DF,  p-value: < 2.2e-16
anova(ratelm) #analysis of variance
## Analysis of Variance Table
## 
## Response: y3
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## y1           1 12828.1 12828.1   44314 < 2.2e-16 ***
## Residuals 1965   568.8     0.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov(ratelm) #analysis of variance
## Call:
##    aov(formula = ratelm)
## 
## Terms:
##                        y1 Residuals
## Sum of Squares  12828.102   568.835
## Deg. of Freedom         1      1965
## 
## Residual standard error: 0.5380365
## Estimated effects may be unbalanced
qplot(rate$date,ratelm$residuals, geom = "point")

acf(ratelm$residuals) #autocorrelaitons of residuals 

r3 <- diff(rate$y3)#log returns
r1 <- diff(rate$y1)
qplot(r1,r3)

lm2 <- lm(r3 ~ r1)
summary(lm2)
## 
## Call:
## lm(formula = r3 ~ r1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38060 -0.03338 -0.00054  0.03437  0.47418 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002475  0.0015380   0.161    0.872    
## r1          0.7810590  0.0074651 104.628   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06819 on 1964 degrees of freedom
## Multiple R-squared:  0.8479, Adjusted R-squared:  0.8478 
## F-statistic: 1.095e+04 on 1 and 1964 DF,  p-value: < 2.2e-16
lm2 <- lm(r3 ~ -1 + r1) # remove the insignificant constant term
summary(lm2)
## 
## Call:
## lm(formula = r3 ~ -1 + r1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38036 -0.03314 -0.00030  0.03462  0.47444 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## r1 0.781065   0.007463   104.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06818 on 1965 degrees of freedom
## Multiple R-squared:  0.8479, Adjusted R-squared:  0.8478 
## F-statistic: 1.095e+04 on 1 and 1965 DF,  p-value: < 2.2e-16
acf(lm2$residuals)