# 9章


# 9.4.1項
library(evd)
t <- 2:4
dorder(2:4, distn = "norm", mean = 0.5, sd = 1.2, mlen = 5, j = 2)
## [1] 0.2300687782 0.0133524232 0.0001663078
4* choose(5,4)*dnorm(t,0.5,1.2)*pnorm(t,0.5,1.2)^3*(1-pnorm(t,0.5,1.2))
## [1] 0.2300687782 0.0133524232 0.0001663078
porder(3, distn = "exp", rate = 1.2, mlen = 3, j = 2, largest=F)
## [1] 0.997801
sum(choose(3, 2:3) * pexp(3, rate=1.2)^(2:3) *(1-pexp(3, rate=1.2))^(3-2:3) )
## [1] 0.997801
rorder(5, distn="gamma", shape = 1, mlen = 10, j = 2)
## [1] 1.8315229 2.2791424 0.9962047 0.7622386 2.6352645
dextreme(2:4, distn = "norm", mean = 0.5, sd = 1.2, mlen = 5)
## [1] 0.48689660 0.17602941 0.02346192
dorder(2:4, distn = "norm", mean = 0.5, sd = 1.2, mlen = 5, j=1)
## [1] 0.48689660 0.17602941 0.02346192
pextreme(2:4, distn = "exp", rate = 1.2, mlen = 2)
## [1] 0.8267938 0.9460991 0.9836082
porder(2:4, distn = "exp", rate = 1.2, mlen = 2, j=1)
## [1] 0.8267938 0.9460991 0.9836082
qextreme(seq(0.9, 0.6, -0.1), distn = "exp", rate = 1.2, mlen = 2)
## [1] 2.474783 1.873629 1.509935 1.241553
# 9.4.2項
library(evd)
n <- 10; m <- 5
os <- rorder(1000, distn="exp", mlen=n, j=m, largest=F) # 順序統計量の乱数
oldpar <- par()
par(mfrow=c(2,1))
hist(os)
xmat <- rexp(m*1000)    # m*1000個の指数乱数
dim(xmat) <- c(m,1000)  # m*1000型行列に成型
x1 <- xmat/((n-m+1):10) # 指数乱数の割り算
x2 <- apply(x1, 2, sum) # 各列の和
hist(x2)

(x <- matrix(12, 4,4))
##      [,1] [,2] [,3] [,4]
## [1,]   12   12   12   12
## [2,]   12   12   12   12
## [3,]   12   12   12   12
## [4,]   12   12   12   12
x/(1:4)
##      [,1] [,2] [,3] [,4]
## [1,]   12   12   12   12
## [2,]    6    6    6    6
## [3,]    4    4    4    4
## [4,]    3    3    3    3
x/(1:5)     # 警告が出る
##      [,1] [,2] [,3] [,4]
## [1,]   12  2.4  3.0  4.0
## [2,]    6 12.0  2.4  3.0
## [3,]    4  6.0 12.0  2.4
## [4,]    3  4.0  6.0 12.0
par(oldpar)


# 9.4.3項
eu <- function(n){sum(1/(1:n))-log(n)}
eu(10^8)
## [1] 0.5772157
pi6 <- function(n){sqrt(6*sum(1/(1:n)^2))}
pi6(10^8)
## [1] 3.141593
# 9.5.2項
library(evd)
# 指数分布FについてF^n(an x + bn)を与える関数
pnexp <- function(x,n,lambda=1){
  bn <- -1/lambda*(log(1-exp(-1/n)))
  an <- 1/(n*lambda*(exp(1/n)-1))
  pextreme(an*x+bn, distn="exp", rate=lambda, mlen=n)
}
n<- 100; x<-1:4; lambda<-2
pnexp(x,n,lambda)   # x=1:4, n=100, 平均2として計算
## [1] 0.6917323 0.8727514 0.9509450 0.9815782
pgumbel(x)      # 収束先の分布関数
## [1] 0.6922006 0.8734230 0.9514320 0.9818511
# 両者の差と上からの評価の比較
abs(pnexp(x,n,lambda)-pgumbel(x)); 1/(exp(1)*(2*n-1))
## [1] 0.0004683243 0.0006716232 0.0004869786 0.0002729085
## [1] 0.00184864
# 指数分布の最大値のグンベル分布への収束
curve(pnexp(x,10,lambda), xlim=c(-3,3))
curve(pnexp(x,100,lambda), add=T)
curve(pnexp(x,1000,lambda), add=T)
curve(pgumbel, add=T)

# 両者の差のグラフ
curve(pnexp(x,10,lambda)-pgumbel(x), xlim=c(-3,3))
curve(pnexp(x,100,lambda)-pgumbel(x), lty=2, add=T)
curve(pnexp(x,1000,lambda)-pgumbel(x), lty=3, add=T)

  # 10章


# 10.3.2項
# install.packages("tidyverse")
library(tidyverse)
## ─ Attaching packages ──────────────────────────── tidyverse 1.3.0 ─
## <U+2713> ggplot2 3.2.1     <U+2713> purrr   0.3.3
## <U+2713> tibble  2.1.3     <U+2713> dplyr   0.8.3
## <U+2713> tidyr   1.0.0     <U+2713> stringr 1.4.0
## <U+2713> readr   1.3.1     <U+2713> forcats 0.4.0
## ─ Conflicts ────────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# 10.3.3項
?iris
## starting httpd help server ...
##  done
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
library(ggplot2)
ggplot(data=iris) + 
  geom_point(mapping=aes(x=Sepal.Length, y=Sepal.Width)) # 散布図

ggplot(data=iris) + geom_point(mapping=aes(x=Sepal.Length,
   y=Sepal.Width, color=Species)) # 種で色分け

ggplot(data=iris) + 
  geom_point(mapping=aes(x=Sepal.Length, y=Sepal.Width))+
  facet_wrap(~Species, nrow=2) # 種で散布図を分ける

ggplot(data=iris) + 
  geom_point(mapping=aes(x=Sepal.Length, y=Sepal.Width))+
  geom_smooth(mapping=aes(x=Sepal.Length, y=Sepal.Width)) # 平滑化する
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# 10.3.4項
library(dplyr)
head(trees)
##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7
filter(trees, Girth>10, Volume<20)
##   Girth Height Volume
## 1  10.5     72   16.4
## 2  10.7     81   18.8
## 3  10.8     83   19.7
## 4  11.0     66   15.6
## 5  11.0     75   18.2
## 6  11.2     75   19.9
## 7  12.0     75   19.1
arrange(trees, Volume, Height) # Volumeを第1基準にHeightを第2基準で昇順に並べる
##    Girth Height Volume
## 1    8.8     63   10.2
## 2    8.6     65   10.3
## 3    8.3     70   10.3
## 4   11.0     66   15.6
## 5   10.5     72   16.4
## 6   11.0     75   18.2
## 7   10.7     81   18.8
## 8   12.0     75   19.1
## 9   10.8     83   19.7
## 10  11.2     75   19.9
## 11  11.4     76   21.0
## 12  11.7     69   21.3
## 13  11.4     76   21.4
## 14  12.9     74   22.2
## 15  11.1     80   22.6
## 16  11.3     79   24.2
## 17  13.8     64   24.9
## 18  13.7     71   25.7
## 19  13.3     86   27.4
## 20  14.2     80   31.7
## 21  12.9     85   33.8
## 22  14.0     78   34.5
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  18.0     80   51.0
## 27  18.0     80   51.5
## 28  17.3     81   55.4
## 29  17.5     82   55.7
## 30  17.9     80   58.3
## 31  20.6     87   77.0
select(trees, Girth, Volume)
##    Girth Volume
## 1    8.3   10.3
## 2    8.6   10.3
## 3    8.8   10.2
## 4   10.5   16.4
## 5   10.7   18.8
## 6   10.8   19.7
## 7   11.0   15.6
## 8   11.0   18.2
## 9   11.1   22.6
## 10  11.2   19.9
## 11  11.3   24.2
## 12  11.4   21.0
## 13  11.4   21.4
## 14  11.7   21.3
## 15  12.0   19.1
## 16  12.9   22.2
## 17  12.9   33.8
## 18  13.3   27.4
## 19  13.7   25.7
## 20  13.8   24.9
## 21  14.0   34.5
## 22  14.2   31.7
## 23  14.5   36.3
## 24  16.0   38.3
## 25  16.3   42.6
## 26  17.3   55.4
## 27  17.5   55.7
## 28  17.9   58.3
## 29  18.0   51.5
## 30  18.0   51.0
## 31  20.6   77.0
mutate(trees, Volume/Height) 
##    Girth Height Volume Volume/Height
## 1    8.3     70   10.3     0.1471429
## 2    8.6     65   10.3     0.1584615
## 3    8.8     63   10.2     0.1619048
## 4   10.5     72   16.4     0.2277778
## 5   10.7     81   18.8     0.2320988
## 6   10.8     83   19.7     0.2373494
## 7   11.0     66   15.6     0.2363636
## 8   11.0     75   18.2     0.2426667
## 9   11.1     80   22.6     0.2825000
## 10  11.2     75   19.9     0.2653333
## 11  11.3     79   24.2     0.3063291
## 12  11.4     76   21.0     0.2763158
## 13  11.4     76   21.4     0.2815789
## 14  11.7     69   21.3     0.3086957
## 15  12.0     75   19.1     0.2546667
## 16  12.9     74   22.2     0.3000000
## 17  12.9     85   33.8     0.3976471
## 18  13.3     86   27.4     0.3186047
## 19  13.7     71   25.7     0.3619718
## 20  13.8     64   24.9     0.3890625
## 21  14.0     78   34.5     0.4423077
## 22  14.2     80   31.7     0.3962500
## 23  14.5     74   36.3     0.4905405
## 24  16.0     72   38.3     0.5319444
## 25  16.3     77   42.6     0.5532468
## 26  17.3     81   55.4     0.6839506
## 27  17.5     82   55.7     0.6792683
## 28  17.9     80   58.3     0.7287500
## 29  18.0     80   51.5     0.6437500
## 30  18.0     80   51.0     0.6375000
## 31  20.6     87   77.0     0.8850575
summarize(group_by(iris, Species), mean(Sepal.Length)) # 種別に平均をとる
## # A tibble: 3 x 2
##   Species    `mean(Sepal.Length)`
##   <fct>                     <dbl>
## 1 setosa                     5.01
## 2 versicolor                 5.94
## 3 virginica                  6.59
trees %>% filter(Girth>17) %>% select(Height) # パイプを使う
##   Height
## 1     81
## 2     82
## 3     80
## 4     80
## 5     80
## 6     87
  # 表

# anova
library(evd)
x <- rgev(100)
m1 <- fgev(x)
m2 <- fgev(x, shape=0)
anova(m1,m2)
## Analysis of Deviance Table
## 
##    M.Df Deviance Df Chisq Pr(>chisq)
## m1    3   309.32                    
## m2    2   311.72  1 2.402     0.1212
# mtransform
library(evd)
x <- 1:10
p <- c(1,2,3)
mtransform(x,p)-log(pgev(x,loc=p[1], scale=p[2], shape=p[3]))
##  [1] 2.0000000 1.4736126 1.2599210 1.1330327 1.0455159 0.9799946 0.9283178
##  [8] 0.8860619 0.8505807 0.8201765