# 9章
# 9.4.1項
library(evd)
t <- 2:4
dorder(2:4, distn = "norm", mean = 0.5, sd = 1.2, mlen = 5, j = 2)
## [1] 0.2300687782 0.0133524232 0.0001663078
4* choose(5,4)*dnorm(t,0.5,1.2)*pnorm(t,0.5,1.2)^3*(1-pnorm(t,0.5,1.2))
## [1] 0.2300687782 0.0133524232 0.0001663078
porder(3, distn = "exp", rate = 1.2, mlen = 3, j = 2, largest=F)
## [1] 0.997801
sum(choose(3, 2:3) * pexp(3, rate=1.2)^(2:3) *(1-pexp(3, rate=1.2))^(3-2:3) )
## [1] 0.997801
rorder(5, distn="gamma", shape = 1, mlen = 10, j = 2)
## [1] 1.8315229 2.2791424 0.9962047 0.7622386 2.6352645
dextreme(2:4, distn = "norm", mean = 0.5, sd = 1.2, mlen = 5)
## [1] 0.48689660 0.17602941 0.02346192
dorder(2:4, distn = "norm", mean = 0.5, sd = 1.2, mlen = 5, j=1)
## [1] 0.48689660 0.17602941 0.02346192
pextreme(2:4, distn = "exp", rate = 1.2, mlen = 2)
## [1] 0.8267938 0.9460991 0.9836082
porder(2:4, distn = "exp", rate = 1.2, mlen = 2, j=1)
## [1] 0.8267938 0.9460991 0.9836082
qextreme(seq(0.9, 0.6, -0.1), distn = "exp", rate = 1.2, mlen = 2)
## [1] 2.474783 1.873629 1.509935 1.241553
# 9.4.2項
library(evd)
n <- 10; m <- 5
os <- rorder(1000, distn="exp", mlen=n, j=m, largest=F) # 順序統計量の乱数
oldpar <- par()
par(mfrow=c(2,1))
hist(os)
xmat <- rexp(m*1000) # m*1000個の指数乱数
dim(xmat) <- c(m,1000) # m*1000型行列に成型
x1 <- xmat/((n-m+1):10) # 指数乱数の割り算
x2 <- apply(x1, 2, sum) # 各列の和
hist(x2)

(x <- matrix(12, 4,4))
## [,1] [,2] [,3] [,4]
## [1,] 12 12 12 12
## [2,] 12 12 12 12
## [3,] 12 12 12 12
## [4,] 12 12 12 12
x/(1:4)
## [,1] [,2] [,3] [,4]
## [1,] 12 12 12 12
## [2,] 6 6 6 6
## [3,] 4 4 4 4
## [4,] 3 3 3 3
x/(1:5) # 警告が出る
## [,1] [,2] [,3] [,4]
## [1,] 12 2.4 3.0 4.0
## [2,] 6 12.0 2.4 3.0
## [3,] 4 6.0 12.0 2.4
## [4,] 3 4.0 6.0 12.0
par(oldpar)
# 9.4.3項
eu <- function(n){sum(1/(1:n))-log(n)}
eu(10^8)
## [1] 0.5772157
pi6 <- function(n){sqrt(6*sum(1/(1:n)^2))}
pi6(10^8)
## [1] 3.141593
# 9.5.2項
library(evd)
# 指数分布FについてF^n(an x + bn)を与える関数
pnexp <- function(x,n,lambda=1){
bn <- -1/lambda*(log(1-exp(-1/n)))
an <- 1/(n*lambda*(exp(1/n)-1))
pextreme(an*x+bn, distn="exp", rate=lambda, mlen=n)
}
n<- 100; x<-1:4; lambda<-2
pnexp(x,n,lambda) # x=1:4, n=100, 平均2として計算
## [1] 0.6917323 0.8727514 0.9509450 0.9815782
pgumbel(x) # 収束先の分布関数
## [1] 0.6922006 0.8734230 0.9514320 0.9818511
# 両者の差と上からの評価の比較
abs(pnexp(x,n,lambda)-pgumbel(x)); 1/(exp(1)*(2*n-1))
## [1] 0.0004683243 0.0006716232 0.0004869786 0.0002729085
## [1] 0.00184864
# 指数分布の最大値のグンベル分布への収束
curve(pnexp(x,10,lambda), xlim=c(-3,3))
curve(pnexp(x,100,lambda), add=T)
curve(pnexp(x,1000,lambda), add=T)
curve(pgumbel, add=T)

# 両者の差のグラフ
curve(pnexp(x,10,lambda)-pgumbel(x), xlim=c(-3,3))
curve(pnexp(x,100,lambda)-pgumbel(x), lty=2, add=T)
curve(pnexp(x,1000,lambda)-pgumbel(x), lty=3, add=T)

# 10章
# 10.3.2項
# install.packages("tidyverse")
library(tidyverse)
## ─ Attaching packages ──────────────────────────── tidyverse 1.3.0 ─
## <U+2713> ggplot2 3.2.1 <U+2713> purrr 0.3.3
## <U+2713> tibble 2.1.3 <U+2713> dplyr 0.8.3
## <U+2713> tidyr 1.0.0 <U+2713> stringr 1.4.0
## <U+2713> readr 1.3.1 <U+2713> forcats 0.4.0
## ─ Conflicts ────────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# 10.3.3項
?iris
## starting httpd help server ...
## done
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
library(ggplot2)
ggplot(data=iris) +
geom_point(mapping=aes(x=Sepal.Length, y=Sepal.Width)) # 散布図

ggplot(data=iris) + geom_point(mapping=aes(x=Sepal.Length,
y=Sepal.Width, color=Species)) # 種で色分け

ggplot(data=iris) +
geom_point(mapping=aes(x=Sepal.Length, y=Sepal.Width))+
facet_wrap(~Species, nrow=2) # 種で散布図を分ける

ggplot(data=iris) +
geom_point(mapping=aes(x=Sepal.Length, y=Sepal.Width))+
geom_smooth(mapping=aes(x=Sepal.Length, y=Sepal.Width)) # 平滑化する
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# 10.3.4項
library(dplyr)
head(trees)
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
filter(trees, Girth>10, Volume<20)
## Girth Height Volume
## 1 10.5 72 16.4
## 2 10.7 81 18.8
## 3 10.8 83 19.7
## 4 11.0 66 15.6
## 5 11.0 75 18.2
## 6 11.2 75 19.9
## 7 12.0 75 19.1
arrange(trees, Volume, Height) # Volumeを第1基準にHeightを第2基準で昇順に並べる
## Girth Height Volume
## 1 8.8 63 10.2
## 2 8.6 65 10.3
## 3 8.3 70 10.3
## 4 11.0 66 15.6
## 5 10.5 72 16.4
## 6 11.0 75 18.2
## 7 10.7 81 18.8
## 8 12.0 75 19.1
## 9 10.8 83 19.7
## 10 11.2 75 19.9
## 11 11.4 76 21.0
## 12 11.7 69 21.3
## 13 11.4 76 21.4
## 14 12.9 74 22.2
## 15 11.1 80 22.6
## 16 11.3 79 24.2
## 17 13.8 64 24.9
## 18 13.7 71 25.7
## 19 13.3 86 27.4
## 20 14.2 80 31.7
## 21 12.9 85 33.8
## 22 14.0 78 34.5
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 18.0 80 51.0
## 27 18.0 80 51.5
## 28 17.3 81 55.4
## 29 17.5 82 55.7
## 30 17.9 80 58.3
## 31 20.6 87 77.0
select(trees, Girth, Volume)
## Girth Volume
## 1 8.3 10.3
## 2 8.6 10.3
## 3 8.8 10.2
## 4 10.5 16.4
## 5 10.7 18.8
## 6 10.8 19.7
## 7 11.0 15.6
## 8 11.0 18.2
## 9 11.1 22.6
## 10 11.2 19.9
## 11 11.3 24.2
## 12 11.4 21.0
## 13 11.4 21.4
## 14 11.7 21.3
## 15 12.0 19.1
## 16 12.9 22.2
## 17 12.9 33.8
## 18 13.3 27.4
## 19 13.7 25.7
## 20 13.8 24.9
## 21 14.0 34.5
## 22 14.2 31.7
## 23 14.5 36.3
## 24 16.0 38.3
## 25 16.3 42.6
## 26 17.3 55.4
## 27 17.5 55.7
## 28 17.9 58.3
## 29 18.0 51.5
## 30 18.0 51.0
## 31 20.6 77.0
mutate(trees, Volume/Height)
## Girth Height Volume Volume/Height
## 1 8.3 70 10.3 0.1471429
## 2 8.6 65 10.3 0.1584615
## 3 8.8 63 10.2 0.1619048
## 4 10.5 72 16.4 0.2277778
## 5 10.7 81 18.8 0.2320988
## 6 10.8 83 19.7 0.2373494
## 7 11.0 66 15.6 0.2363636
## 8 11.0 75 18.2 0.2426667
## 9 11.1 80 22.6 0.2825000
## 10 11.2 75 19.9 0.2653333
## 11 11.3 79 24.2 0.3063291
## 12 11.4 76 21.0 0.2763158
## 13 11.4 76 21.4 0.2815789
## 14 11.7 69 21.3 0.3086957
## 15 12.0 75 19.1 0.2546667
## 16 12.9 74 22.2 0.3000000
## 17 12.9 85 33.8 0.3976471
## 18 13.3 86 27.4 0.3186047
## 19 13.7 71 25.7 0.3619718
## 20 13.8 64 24.9 0.3890625
## 21 14.0 78 34.5 0.4423077
## 22 14.2 80 31.7 0.3962500
## 23 14.5 74 36.3 0.4905405
## 24 16.0 72 38.3 0.5319444
## 25 16.3 77 42.6 0.5532468
## 26 17.3 81 55.4 0.6839506
## 27 17.5 82 55.7 0.6792683
## 28 17.9 80 58.3 0.7287500
## 29 18.0 80 51.5 0.6437500
## 30 18.0 80 51.0 0.6375000
## 31 20.6 87 77.0 0.8850575
summarize(group_by(iris, Species), mean(Sepal.Length)) # 種別に平均をとる
## # A tibble: 3 x 2
## Species `mean(Sepal.Length)`
## <fct> <dbl>
## 1 setosa 5.01
## 2 versicolor 5.94
## 3 virginica 6.59
trees %>% filter(Girth>17) %>% select(Height) # パイプを使う
## Height
## 1 81
## 2 82
## 3 80
## 4 80
## 5 80
## 6 87
# 表
# anova
library(evd)
x <- rgev(100)
m1 <- fgev(x)
m2 <- fgev(x, shape=0)
anova(m1,m2)
## Analysis of Deviance Table
##
## M.Df Deviance Df Chisq Pr(>chisq)
## m1 3 309.32
## m2 2 311.72 1 2.402 0.1212
# mtransform
library(evd)
x <- 1:10
p <- c(1,2,3)
mtransform(x,p)-log(pgev(x,loc=p[1], scale=p[2], shape=p[3]))
## [1] 2.0000000 1.4736126 1.2599210 1.1330327 1.0455159 0.9799946 0.9283178
## [8] 0.8860619 0.8505807 0.8201765