# 第 53 章 評價模型的表現 Assessing model performance

$\hat\pi_i = \frac{\text{exp}(\hat\alpha + \hat\beta_1x_{i1} + \cdots + \hat\beta_px_{ip})}{1+\text{exp}(\hat\alpha + \hat\beta_1x_{i1} + \cdots + \hat\beta_px_{ip})}$

## 53.1 精準度 calibration

$E(Y|\hat\pi = p) = p$

NHANES <- read_dta("backupfiles/nhanesglm.dta")
NHANES <- NHANES %>%
mutate(Gender = ifelse(gender == 1, "Male", "Female")) %>%
mutate(Gender = factor(Gender, levels = c("Male", "Female")))
with(NHANES, table(Gender))
## Gender
##   Male Female
##   1391   1157
NHANES <- mutate(NHANES, Heavydrinker = ALQ130 > 5)
Model_perf <- glm(Heavydrinker ~ Gender, data = NHANES, family = binomial(link = "logit"))
logistic.display(Model_perf)
##
## Logistic regression predicting Heavydrinker
##
##                        OR(95%CI)         P(Wald's test) P(LR-test)
## Gender: Female vs Male 0.17 (0.12,0.24)  < 0.001        < 0.001
##
## Log-likelihood = -834.1079
## No. of observations = 2548
## AIC value = 1672.2158

LogisticDx::dx(Model_perf)[, 2:6]
##    GenderFemale   y           P    n yhat
## 1:            0 249 0.179007908 1391  249
## 2:            1  42 0.036300778 1157   42
# obtain Pearson's test statistics
chi2 <- sum((LogisticDx::dx(Model_perf)$sPr)^2) pval <- pchisq(chi2, 1, lower.tail=FALSE) data.frame(test="Pearson chi2(1)",chi.sq=chi2,pvalue=pval) ## test chi.sq pvalue ## 1 Pearson chi2(1) 6.2582271e-25 1 在這個只有性別作預測變量的邏輯迴歸模型裏，當然只有男，女，兩種共變量模式 (covariate patterns)。此時，模型的精準度100% (y 是觀測值， yhat 是期待值)。接下來，再在模型中加入一個是否體重超重的二分類變量。再獲取其觀測值和期待值表格如下： NHANES <- mutate(NHANES, highbmi = bmi > 25) Model_perf <- glm(Heavydrinker ~ Gender + highbmi, data = NHANES, family = binomial) logistic.display(Model_perf) ## ## Logistic regression predicting Heavydrinker ## ## crude OR(95%CI) adj. OR(95%CI) P(Wald's test) P(LR-test) ## Gender: Female vs Male 0.17 (0.12,0.24) 0.17 (0.12,0.24) < 0.001 < 0.001 ## ## highbmi 1.21 (0.93,1.58) 1.11 (0.84,1.46) 0.456 0.453 ## ## Log-likelihood = -833.8269 ## No. of observations = 2548 ## AIC value = 1673.6539 LogisticDx::dx(Model_perf)[,2:7] ## GenderFemale highbmiTRUE y P n yhat ## 1: 0 1 175 0.183662501 961 176.499664 ## 2: 0 0 74 0.168605433 430 72.500336 ## 3: 1 1 29 0.037620159 731 27.500336 ## 4: 1 0 13 0.034036770 426 14.499664 # obtain Pearson's test statistics chi2 <- sum((LogisticDx::dx(Model_perf)$sPr)^2)
pval <- pchisq(chi2, 1, lower.tail=FALSE)
data.frame(test="Pearson chi2(1)",chi.sq=chi2,pvalue=pval)
##              test     chi.sq     pvalue
## 1 Pearson chi2(1) 0.29881856 0.58462403

## 53.2 可解釋因變量的變異度及 $$R^2$$ 決定係數

$R^2 = \frac{SS_{REG}}{SS_{yy}} = \frac{\sum_{i=1}^n(\hat{y}_i-\bar{y})^2}{\sum_{i=1}^n(y_i-\bar{y})^2} = 1-\frac{\sum_{i=1}^n(y_i-\hat{y}_i)^2}{\sum_{i=1}^n(y_i-\bar{y})^2}$

$R^r_{\text{McFadden}} = 1 - \frac{\ell_c}{\ell_\text{null}}$

PseudoR2(Model_perf)
##    McFadden
## 0.078752188

## 53.3 分辨能力 descrimination

### 53.3.1 敏感度和特異度

53.1 所示的是，將性別， BMI，和年齡三個變量放入邏輯迴歸模型之後，模型對於觀察對象的分辨能力的 ROC 示意圖。計算所得的 ROC 曲線下面積爲 0.7484。如果一個模型是失敗的，那麼其曲線下面積爲 (接近) 0.5。也就是會十分貼近 $$y=x$$ 的直線。

Model_perf <- glm(Heavydrinker ~ Gender + bmi + ageyrs, data = NHANES, family = binomial)
ROC_graph <- lroc(Model_perf, grid.col = "grey", lwd = 3, frame = FALSE)
ROC_graph\$auc
## [1] 0.74835297

ROC 曲線本身有自己的優點，也有許多侷限性。最近有另外一個用於診斷的新型曲線–預測曲線(Pepe et al. 2007)。預測曲線繪製的是，觀測對象的擬合後概率 $$\hat\pi_i$$ 和這個概率在所有觀察對象的擬合後概率的百分位數 (percentile) 之間的曲線。一個模型，如果給許多對象相似的概率，那麼不能說這個模型的分辨能力足夠好。同時，此圖也能一目瞭然讓人看到大概多少對象的患病概率是大於一定水平的。

Predictive <- data.frame(fitted(Model_perf), rank(fitted(Model_perf))/2548)
names(Predictive) <- c("hatpi", "percentile")
ggplot(Predictive, aes(x = percentile, y = hatpi)) + geom_line() +
ylim(0, 0.4) +
labs(x = "Risk percentile", y = "Heavy drinker risk")  + theme_bw() +
theme(axis.title = element_text(size = 17),
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15))

53.2 所示的是，性別，年齡，BMI作爲共變量的邏輯迴歸模型的預測變量，預測重度飲酒概率的模型給出的預測曲線。從圖中可見，大多數人的概率值各不相同。而且，圖中也能告訴我們大約 20% 的觀測對象其重度飲酒的概率大於 0.2。

### References

Pepe, Margaret S, Ziding Feng, Ying Huang, Gary Longton, Ross Prentice, Ian M Thompson, and Yingye Zheng. 2007. “Integrating the Predictiveness of a Marker with Its Performance as a Classifier.” American Journal of Epidemiology 167 (3). Oxford University Press: 362–68.