Content

Basic statistics review
QQ-Plot
KS-Test

1. Basic statistics review

We download required dataset from IVLE and install the required libraries.

require(fBasics)

## Loading required package: fBasics

## Loading required package: timeDate

## Loading required package: timeSeries

require(data.table)

## Loading required package: data.table

da <- data.table::fread("ibm.csv",header=T) # need to set work space to the file location
dim(da)

## [1] 3079    8

head(da)

##          date IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted
## 1: 2007-01-04    97.25    98.79   96.88     98.31   10524500     70.86525
## 2: 2007-01-05    97.60    97.95   96.91     97.42    7221300     70.22369
## 3: 2007-01-08    98.50    99.50   98.35     98.90   10340000     71.29050
## 4: 2007-01-09    99.08   100.33   99.07    100.07   11108200     72.13392
## 5: 2007-01-10    98.50    99.05   97.93     98.89    8744800     71.28333
## 6: 2007-01-11    99.00    99.90   98.50     98.65    8000700     71.11033
##          Return
## 1:  0.010635145
## 2: -0.009094223
## 3:  0.015077751
## 4:  0.011760682
## 5: -0.011861830
## 6: -0.002429858

tail(da)

##          date IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted
## 1: 2019-03-21   139.10   142.12  138.88    141.44    3605400       141.44
## 2: 2019-03-22   140.97   141.44  138.90    139.45    3877200       139.45
## 3: 2019-03-25   139.06   139.91  138.35    139.18    2839800       139.18
## 4: 2019-03-26   139.93   141.02  139.42    140.22    2553700       140.22
## 5: 2019-03-27   140.41   140.49  138.40    139.24    3098200       139.24
## 6: 2019-03-28   139.91   140.44  139.10    139.92    2541800       139.92
##          Return
## 1:  0.013094380
## 2: -0.014169520
## 3: -0.001938083
## 4:  0.007444616
## 5: -0.007013526
## 6:  0.004871718

kableExtra::kable(basicStats(da$Return))

	X..da.Return
nobs	3079.000000
NAs	0.000000
Minimum	-0.086419
Maximum	0.108989
Quartile	-0.006422
Quartile	0.007192
Mean	0.000118
Median	0.000298
Sum	0.363580
SE Mean	0.000253
LCL Mean	-0.000378
UCL Mean	0.000614
Variance	0.000197
Stdev	0.014041
Skewness	-0.180826
Kurtosis	5.989009

Null hypothesis:true mean is equal to 0

t.test(da$Return)

## 
##  One Sample t-test
## 
## data:  da$Return
## t = 0.46664, df = 3078, p-value = 0.6408
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.0003780805  0.0006142482
## sample estimates:
##    mean of x 
## 0.0001180839

Change alternative hypothesis

t.test(da$Return, alternative=c("greater"))

## 
##  One Sample t-test
## 
## data:  da$Return
## t = 0.46664, df = 3078, p-value = 0.3204
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  -0.000298272          Inf
## sample estimates:
##    mean of x 
## 0.0001180839

d1 <- density(da$Return)
plot(d1$x,d1$y, type='l')

* Jarque-Bera Normalality Test

normalTest(da$Return,method="jb")

## 
## Title:
##  Jarque - Bera Normalality Test
## 
## Test Results:
##   STATISTIC:
##     X-squared: 4627.3716
##   P VALUE:
##     Asymptotic p Value: < 2.2e-16 
## 
## Description:
##  Fri Mar 29 18:22:24 2019 by user: Sirius

2. QQ-Plot

data(EuStockMarkets)   
logR <- diff(log(EuStockMarkets))
index.names <- dimnames(logR)[[2]]
par(mfrow=c(2,2))
for(i in 1:4) 
{
  qqnorm(logR[,i],datax=T,main=index.names[i])
  qqline(logR[,i],datax=T)
}

3. KS-Test

require(graphics)
x <- rnorm(50)
y <- runif(30)

Do x and y come from the same distribution?

ks.test(x, y)

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.46, p-value = 0.0004387
## alternative hypothesis: two-sided

Does x come from a shifted gamma distribution with shape 3 and rate 2?

ks.test(x+2, "pgamma", 3, 2) # two-sided, exact

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x + 2
## D = 0.35571, p-value = 3.626e-06
## alternative hypothesis: two-sided

ks.test(x+2, "pgamma", 3, 2, alternative = "gr")

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x + 2
## D^+ = 0.039677, p-value = 0.8324
## alternative hypothesis: the CDF of x lies above the null hypothesis

Test if x is stochastically larger than x2

x2 <- rnorm(50, -1)
plot(ecdf(x), xlim = range(c(x, x2)))
plot(ecdf(x2), add = TRUE, lty = "dashed")

t.test(x, x2, alternative = "g")

## 
##  Welch Two Sample t-test
## 
## data:  x and x2
## t = 6.4362, df = 92.895, p-value = 2.648e-09
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.8539445       Inf
## sample estimates:
##  mean of x  mean of y 
##  0.0505031 -1.1005768

ks.test(x, x2, alternative = "l")

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and x2
## D^- = 0.5, p-value = 3.727e-06
## alternative hypothesis: the CDF of x lies below that of y

Lecture2: Hypothesis Testing

Zhou Chao

Content

1. Basic statistics review

2. QQ-Plot

3. KS-Test