Content
1. Basic statistics review
We download required dataset from IVLE and install the required libraries.
require(fBasics)
## Loading required package: fBasics
## Loading required package: timeDate
## Loading required package: timeSeries
require(data.table)
## Loading required package: data.table
da <- data.table::fread("ibm.csv",header=T) # need to set work space to the file location
dim(da)
## [1] 3079 8
head(da)
## date IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted
## 1: 2007-01-04 97.25 98.79 96.88 98.31 10524500 70.86525
## 2: 2007-01-05 97.60 97.95 96.91 97.42 7221300 70.22369
## 3: 2007-01-08 98.50 99.50 98.35 98.90 10340000 71.29050
## 4: 2007-01-09 99.08 100.33 99.07 100.07 11108200 72.13392
## 5: 2007-01-10 98.50 99.05 97.93 98.89 8744800 71.28333
## 6: 2007-01-11 99.00 99.90 98.50 98.65 8000700 71.11033
## Return
## 1: 0.010635145
## 2: -0.009094223
## 3: 0.015077751
## 4: 0.011760682
## 5: -0.011861830
## 6: -0.002429858
tail(da)
## date IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted
## 1: 2019-03-21 139.10 142.12 138.88 141.44 3605400 141.44
## 2: 2019-03-22 140.97 141.44 138.90 139.45 3877200 139.45
## 3: 2019-03-25 139.06 139.91 138.35 139.18 2839800 139.18
## 4: 2019-03-26 139.93 141.02 139.42 140.22 2553700 140.22
## 5: 2019-03-27 140.41 140.49 138.40 139.24 3098200 139.24
## 6: 2019-03-28 139.91 140.44 139.10 139.92 2541800 139.92
## Return
## 1: 0.013094380
## 2: -0.014169520
## 3: -0.001938083
## 4: 0.007444616
## 5: -0.007013526
## 6: 0.004871718
kableExtra::kable(basicStats(da$Return))
X..da.Return | |
---|---|
nobs | 3079.000000 |
NAs | 0.000000 |
Minimum | -0.086419 |
Maximum | 0.108989 |
|
-0.006422 |
|
0.007192 |
Mean | 0.000118 |
Median | 0.000298 |
Sum | 0.363580 |
SE Mean | 0.000253 |
LCL Mean | -0.000378 |
UCL Mean | 0.000614 |
Variance | 0.000197 |
Stdev | 0.014041 |
Skewness | -0.180826 |
Kurtosis | 5.989009 |
- Null hypothesis:true mean is equal to 0
t.test(da$Return)
##
## One Sample t-test
##
## data: da$Return
## t = 0.46664, df = 3078, p-value = 0.6408
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.0003780805 0.0006142482
## sample estimates:
## mean of x
## 0.0001180839
- Change alternative hypothesis
t.test(da$Return, alternative=c("greater"))
##
## One Sample t-test
##
## data: da$Return
## t = 0.46664, df = 3078, p-value = 0.3204
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
## -0.000298272 Inf
## sample estimates:
## mean of x
## 0.0001180839
d1 <- density(da$Return)
plot(d1$x,d1$y, type='l')
* Jarque-Bera Normalality Test
normalTest(da$Return,method="jb")
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 4627.3716
## P VALUE:
## Asymptotic p Value: < 2.2e-16
##
## Description:
## Fri Mar 29 18:22:24 2019 by user: Sirius
2. QQ-Plot
data(EuStockMarkets)
logR <- diff(log(EuStockMarkets))
index.names <- dimnames(logR)[[2]]
par(mfrow=c(2,2))
for(i in 1:4)
{
qqnorm(logR[,i],datax=T,main=index.names[i])
qqline(logR[,i],datax=T)
}
3. KS-Test
require(graphics)
x <- rnorm(50)
y <- runif(30)
- Do x and y come from the same distribution?
ks.test(x, y)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: x and y
## D = 0.46, p-value = 0.0004387
## alternative hypothesis: two-sided
- Does x come from a shifted gamma distribution with shape 3 and rate 2?
ks.test(x+2, "pgamma", 3, 2) # two-sided, exact
##
## One-sample Kolmogorov-Smirnov test
##
## data: x + 2
## D = 0.35571, p-value = 3.626e-06
## alternative hypothesis: two-sided
ks.test(x+2, "pgamma", 3, 2, alternative = "gr")
##
## One-sample Kolmogorov-Smirnov test
##
## data: x + 2
## D^+ = 0.039677, p-value = 0.8324
## alternative hypothesis: the CDF of x lies above the null hypothesis
- Test if x is stochastically larger than x2
x2 <- rnorm(50, -1)
plot(ecdf(x), xlim = range(c(x, x2)))
plot(ecdf(x2), add = TRUE, lty = "dashed")
t.test(x, x2, alternative = "g")
##
## Welch Two Sample t-test
##
## data: x and x2
## t = 6.4362, df = 92.895, p-value = 2.648e-09
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.8539445 Inf
## sample estimates:
## mean of x mean of y
## 0.0505031 -1.1005768
ks.test(x, x2, alternative = "l")
##
## Two-sample Kolmogorov-Smirnov test
##
## data: x and x2
## D^- = 0.5, p-value = 3.727e-06
## alternative hypothesis: the CDF of x lies below that of y