# 第 56 章 貝葉斯多層回歸模型2 隨機斜率模型 multilevel models – varying slopes models

We are always forced to analyze data with a model that is MISSPECIFIED: the true data-generating process is different than the model.
Richard McElreath

## 56.1 模擬隨機斜率數據

a <- 3.5                # average morning wait time
b <- (-1)               # average difference afternoon wait time
sigma_a <- 1            # standard dev in intercepts
sigma_b <- 0.5          # standard dev in slopes
rho <- (-0.7)           # correlation between intercepts and slopes

Mu <- c( a, b )

$\begin{pmatrix} \text{variance of intercepts} & \text{covariance of intercepts & slopes} \\ \text{covariance of intercepts & slopes} & \text{variance of slopes} \\ \end{pmatrix}$

$\begin{pmatrix} \sigma_\alpha^2 & \sigma_\alpha\sigma_\beta\rho \\ \sigma_\alpha\sigma_\beta\rho & \sigma_\beta^2 \end{pmatrix}$

cov_ab <- sigma_a * sigma_b * rho
Sigma <- matrix( c(sigma_a^2, cov_ab, cov_ab, sigma_b^2), ncol = 2 )
Sigma
##       [,1]  [,2]
## [1,]  1.00 -0.35
## [2,] -0.35  0.25

sigmas <- c(sigma_a, sigma_b) # standard deviations
Rho <- matrix( c(1, rho, rho, 1) , nrow = 2 ) # correlation matrix

# Now matrix multiply to get covariance matrix

Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)
Sigma
##       [,1]  [,2]
## [1,]  1.00 -0.35
## [2,] -0.35  0.25

N_cafes <- 20

library(MASS)
set.seed(5)
vary_effects <- mvrnorm( N_cafes, Mu, Sigma )

vary_effects
##            [,1]        [,2]
##  [1,] 4.2239624 -1.60935645
##  [2,] 2.0104980 -0.75177037
##  [3,] 4.5658109 -1.94826458
##  [4,] 3.3436353 -1.19265388
##  [5,] 1.7009705 -0.58556181
##  [6,] 4.1343727 -1.14445388
##  [7,] 3.7944692 -1.62646613
##  [8,] 3.9465976 -1.71527939
##  [9,] 3.8642671 -0.90716770
## [10,] 3.4676137 -0.68040541
## [11,] 2.2428755 -0.61815155
## [12,] 4.1595055 -1.65921204
## [13,] 4.3002830 -2.11254740
## [14,] 3.5069480 -1.44064299
## [15,] 4.3820864 -1.87989834
## [16,] 3.5211329 -1.35069860
## [17,] 4.2167127 -0.91927985
## [18,] 5.9130025 -1.23136238
## [19,] 3.4773057 -0.35703411
## [20,] 3.7748988 -1.05704571

a_cafe <- vary_effects[, 1] # the intercepts
b_cafe <- vary_effects[, 2] # the slopes

plot( a_cafe, b_cafe, col = rangi2,
xlab = "intercepts (a_cafe)",
ylab = "slopes (b_cafe)")

# overlay population distribution

# library(ellipse)
for ( l in c(0.1, 0.3, 0.5, 0.8, 0.99) )
lines(ellipse::ellipse(Sigma, centre = Mu, level = l), col = col.alpha('black', 0.2))

### 56.1.1 模擬觀察值 simulate observations

set.seed(22)
N_visits <- 10

afternoon <- rep(0:1, N_visits * N_cafes / 2)

cafe_id <- rep( 1:N_cafes, each = N_visits )

mu <- a_cafe[cafe_id] + b_cafe[cafe_id] * afternoon
sigma <- 0.5 # std dev within cafes

wait <- rnorm( N_visits*N_cafes, mu, sigma )

d <- data.frame( cafe = cafe_id, afternoon = afternoon, wait = wait)

head(d, 20)
##    cafe afternoon       wait
## 1     1         0 3.96789287
## 2     1         1 3.85719780
## 3     1         0 4.72787549
## 4     1         1 2.76101325
## 5     1         0 4.11948274
## 6     1         1 3.54365216
## 7     1         0 4.19094922
## 8     1         1 2.53322349
## 9     1         0 4.12403208
## 10    1         1 2.76488683
## 11    2         0 1.62854439
## 12    2         1 1.29970861
## 13    2         0 2.38201217
## 14    2         1 1.21671656
## 15    2         0 1.61405077
## 16    2         1 0.79765084
## 17    2         0 2.44127922
## 18    2         1 2.26019875
## 19    2         0 2.47877354
## 20    2         1 0.45086022

### 56.1.2 隨機斜率模型 the varying slopes model

\begin{aligned} W_i & \sim \text{Normal}(\mu_i, \sigma) & \text{[likelihood]} \\ \mu_i & = \alpha_{\text{CAFE}[i]} + \beta_{\text{CAFE}[i]} A_i & \text{[linear model]} \end{aligned}

\begin{aligned} \left[ \begin{array}{c} \alpha_{\text{CAFE}[i]} \\ \beta_{\text{CAFE}[i]} \end{array} \right] & \sim \text{MVNormal}\left( \left[ \begin{array}{c} \alpha \\ \beta \end{array} \right], \textbf{S} \right) & \text{[population of varying effects]} \\ \textbf{S} & = \left( \begin{array}{cc} \sigma_\alpha & 0\\ 0 & \sigma_\beta \end{array} \right) \textbf{R} \left( \begin{array}{cc} \sigma_\alpha & 0\\ 0 & \sigma_\beta \end{array} \right) & \text{[construct covariance matrix]} \end{aligned}

\begin{aligned} \alpha & \sim \text{Normal}(5, 2) & \text{[prior for average intercept]} \\ \beta & \sim \text{Normal}(-1, 0.5) & \text{[prior for average slope]} \\ \sigma & \sim \text{Exponential}(1) & \text{[prior stddev within cafes]} \\ \sigma_\alpha & \sim \text{Exponential}(1) & \text{[prior stddev among intercepts]} \\ \sigma_\beta & \sim \text{Exponential}(1) & \text{[prior stddev among slopes]} \\ \textbf{R} & \sim \text{LKJcorr} (2) & \text{[prior for correlation matrix]} \end{aligned}

\begin{aligned} \textbf{R} = \left( \begin{array}{cc} 1 & \rho \\ \rho & 1 \end{array} \right) \end{aligned}

R <- rlkjcorr( 10000, K = 2, eta = 4 )
dens( R[, 1, 2], xlab = "correlation",
bty = "n")
R <- rlkjcorr( 10000, K = 2, eta = 1 )
dens( R[, 1, 2], xlab = "correlation",
R <- rlkjcorr( 10000, K = 2, eta = 2 )
dens( R[, 1, 2], xlab = "correlation",

text( 0, 0.45, 'eta = 1')
text( 0, 0.8, "eta = 2")
text( 0.5, 1, "eta = 4")

set.seed(867530)
m14.1 <- ulam(
alist(
wait  ~ normal( mu, sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe] * afternoon,
c(a_cafe, b_cafe)[cafe] ~ multi_normal(c(a, b), Rho,
sigma_cafe),
a ~ normal(5, 2),
b ~ normal(-1, 0.5),
sigma_cafe ~ exponential(1),
sigma ~ exponential(1),
Rho ~ lkj_corr(2)
), data = d, chains = 4, cores = 4
)
saveRDS(m14.1, "../Stanfits/m14_1.rds")

m14.1 <- readRDS("../Stanfits/m14_1.rds")

post <- extract.samples( m14.1 )

dens( post$Rho[,1,2], xlim = c(-1, 1), bty = "n") # posterior R <- rlkjcorr(10000, K = 2, eta = 2) # prior dens( R[,1,2], add = TRUE, lty = 2, col = rangi2, lwd = 3) text( 0.45, 0.8 , "prior", col = rangi2) text( -0.25, 2.0, "posterior") # precis( m14.1, depth = 2 ) 我們發現事後的相關係數集中在小於零的部分。因爲模型認爲斜率和截距之間的相關性是負的，符合我們產生數據的過程中使用的圖 56.1 中所示。接下來，繪製一下隨機效應部分的示意圖，並且和原始數據做一個比較。 # compute unpooled estimates directly from data a1 <- sapply( 1:N_cafes, function(i) mean(wait[cafe_id == i & afternoon == 0])) b1 <- sapply( 1:N_cafes, function(i) mean(wait[cafe_id == i & afternoon == 1])) - a1 # extract posterior means of partially pooled estimates post <- extract.samples( m14.1 ) a2 <- apply( post$a_cafe, 2, mean )
b2 <- apply( post$b_cafe, 2, mean ) # plot both and connect with lines plot(a1, b1, xlab = "intercept", ylab = "slope", pch = 16, col = rangi2, ylim = c( min(b1) - 0.1, max(b1) + 0.1), xlim = c(min(a1) - 0.1, max(a1) + 0.1) , bty = "n") points( a2, b2, pch = 1) for ( i in 1:N_cafes ) lines( c(a1[i], a2[i]), c(b1[i], b2[i]) ) # superimpose the contours of the population # compute the posterior mean bivariate Gaussian Mu_est <- c( mean(post$a), mean(post$b) ) rho_est <- mean(post$Rho[,1,2])
sa_est <- mean( post$sigma_cafe[,1] ) sb_est <- mean( post$sigma_cafe[,2] )
cov_ab <- sa_est * sb_est * rho_est
Sigma_est <- matrix( c(sa_est^2 , cov_ab, cov_ab, sb_est^2), ncol = 2 )

# draw contours
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:rethinking':
##
##     pairs
## The following object is masked from 'package:car':
##
##     ellipse
## The following object is masked from 'package:graphics':
##
##     pairs
for ( l in c(0.1, 0.3, 0.5, 0.8, 0.99) )
lines(ellipse(Sigma_est, centre = Mu_est, level = l),
col = col.alpha("black", 0.2))

56.4 中藍色的點表示的是原始數據直接取平均值計算獲得的結果。空心點則是根據隨機效應模型的估計給出的各個餐廳的截距和斜率的事後概率分佈的估計平均值。二者之間用實線連接。我們發現每個空心點（模型估計值）的兩個維度都比簡單粗暴計算的原始值要更加靠近這個二維高斯分佈的中心點位置（這個現象被叫做塌陷 shrinkage）。距離中心點越遠的藍點，它的塌陷程度 (shrinkage) 越大