This report was automatically generated with the R package knitr (version 1.5).
library(faraway)
data(state)
statedata <- data.frame(state.x77, row.names = state.abb)
lmod <- lm(Life.Exp ~ ., statedata)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.09e+01 1.75e+00 40.59 < 2e-16
Population 5.18e-05 2.92e-05 1.77 0.083
Income -2.18e-05 2.44e-04 -0.09 0.929
Illiteracy 3.38e-02 3.66e-01 0.09 0.927
Murder -3.01e-01 4.66e-02 -6.46 8.7e-08
HS.Grad 4.89e-02 2.33e-02 2.10 0.042
Frost -5.74e-03 3.14e-03 -1.82 0.075
Area -7.38e-08 1.67e-06 -0.04 0.965
n = 50, p = 8, Residual SE = 0.74, R-Squared = 0.74
lmod <- update(lmod, . ~ . - Area)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.10e+01 1.39e+00 51.17 < 2e-16
Population 5.19e-05 2.88e-05 1.80 0.079
Income -2.44e-05 2.34e-04 -0.10 0.917
Illiteracy 2.85e-02 3.42e-01 0.08 0.934
Murder -3.02e-01 4.33e-02 -6.96 1.5e-08
HS.Grad 4.85e-02 2.07e-02 2.35 0.024
Frost -5.78e-03 2.97e-03 -1.94 0.058
n = 50, p = 7, Residual SE = 0.74, R-Squared = 0.74
lmod <- update(lmod, . ~ . - Illiteracy)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.11e+01 1.03e+00 69.07 < 2e-16
Population 5.11e-05 2.71e-05 1.89 0.066
Income -2.48e-05 2.32e-04 -0.11 0.915
Murder -3.00e-01 3.70e-02 -8.10 2.9e-10
HS.Grad 4.78e-02 1.86e-02 2.57 0.014
Frost -5.91e-03 2.47e-03 -2.39 0.021
n = 50, p = 6, Residual SE = 0.73, R-Squared = 0.74
lmod <- update(lmod, . ~ . - Income)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.10e+01 9.53e-01 74.54 < 2e-16
Population 5.01e-05 2.51e-05 2.00 0.052
Murder -3.00e-01 3.66e-02 -8.20 1.8e-10
HS.Grad 4.66e-02 1.48e-02 3.14 0.003
Frost -5.94e-03 2.42e-03 -2.46 0.018
n = 50, p = 5, Residual SE = 0.72, R-Squared = 0.74
lmod <- update(lmod, . ~ . - Population)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 71.03638 0.98326 72.25 <2e-16
Murder -0.28307 0.03673 -7.71 8e-10
HS.Grad 0.04995 0.01520 3.29 0.002
Frost -0.00691 0.00245 -2.82 0.007
n = 50, p = 4, Residual SE = 0.74, R-Squared = 0.71
sumary(lm(Life.Exp ~ Illiteracy + Murder + Frost, statedata))
Estimate Std. Error t value Pr(>|t|)
(Intercept) 74.55672 0.58425 127.61 <2e-16
Illiteracy -0.60176 0.29893 -2.01 0.0500
Murder -0.28005 0.04339 -6.45 6e-08
Frost -0.00869 0.00296 -2.94 0.0052
n = 50, p = 4, Residual SE = 0.79, R-Squared = 0.67
require(leaps)
Loading required package: leaps
b <- regsubsets(Life.Exp ~ ., data = statedata)
rs <- summary(b)
rs$which
(Intercept) Population Income Illiteracy Murder HS.Grad Frost Area
1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
2 TRUE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
3 TRUE FALSE FALSE FALSE TRUE TRUE TRUE FALSE
4 TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE
5 TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE
6 TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
7 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
AIC <- 50 * log(rs$rss/50) + (2:8) * 2
plot(AIC ~ I(1:7), ylab = "AIC", xlab = "Number of Predictors")
plot(2:8, rs$adjr2, xlab = "No. of Parameters", ylab = "Adjusted R-square")
which.max(rs$adjr2)
[1] 4
plot(2:8, rs$cp, xlab = "No. of Parameters", ylab = "Cp Statistic")
abline(0, 1)
lmod <- lm(Life.Exp ~ ., data = statedata)
step(lmod)
Start: AIC=-22.18
Life.Exp ~ Population + Income + Illiteracy + Murder + HS.Grad +
Frost + Area
Df Sum of Sq RSS AIC
- Area 1 0.00 23.3 -24.2
- Income 1 0.00 23.3 -24.2
- Illiteracy 1 0.00 23.3 -24.2
<none> 23.3 -22.2
- Population 1 1.75 25.0 -20.6
- Frost 1 1.85 25.1 -20.4
- HS.Grad 1 2.44 25.7 -19.2
- Murder 1 23.14 46.4 10.3
Step: AIC=-24.18
Life.Exp ~ Population + Income + Illiteracy + Murder + HS.Grad +
Frost
Df Sum of Sq RSS AIC
- Illiteracy 1 0.00 23.3 -26.2
- Income 1 0.01 23.3 -26.2
<none> 23.3 -24.2
- Population 1 1.76 25.1 -22.5
- Frost 1 2.05 25.3 -22.0
- HS.Grad 1 2.98 26.3 -20.2
- Murder 1 26.27 49.6 11.6
Step: AIC=-26.17
Life.Exp ~ Population + Income + Murder + HS.Grad + Frost
Df Sum of Sq RSS AIC
- Income 1 0.0 23.3 -28.2
<none> 23.3 -26.2
- Population 1 1.9 25.2 -24.3
- Frost 1 3.0 26.3 -22.1
- HS.Grad 1 3.5 26.8 -21.2
- Murder 1 34.7 58.0 17.5
Step: AIC=-28.16
Life.Exp ~ Population + Murder + HS.Grad + Frost
Df Sum of Sq RSS AIC
<none> 23.3 -28.2
- Population 1 2.1 25.4 -25.9
- Frost 1 3.1 26.4 -23.9
- HS.Grad 1 5.1 28.4 -20.2
- Murder 1 34.8 58.1 15.5
Call:
lm(formula = Life.Exp ~ Population + Murder + HS.Grad + Frost,
data = statedata)
Coefficients:
(Intercept) Population Murder HS.Grad Frost
7.10e+01 5.01e-05 -3.00e-01 4.66e-02 -5.94e-03
h <- lm.influence(lmod)$hat
names(h) <- state.abb
rev(sort(h))
AK CA HI NV NM TX NY WA OR
0.80952 0.40886 0.37876 0.36525 0.32472 0.28416 0.25695 0.22268 0.22183
ND LA CT UT RI MD AL AZ MS
0.21969 0.19495 0.19363 0.19098 0.17082 0.16407 0.16110 0.15891 0.15572
FL IL PA NJ SD ME SC MI WY
0.14857 0.13743 0.13210 0.12993 0.12528 0.12182 0.11758 0.11725 0.11638
VT MA GA MO KY AR CO WV NC
0.11504 0.11274 0.11029 0.11027 0.10909 0.10448 0.10251 0.09965 0.09362
DE NH ID OH MT MN TN WI VA
0.09322 0.08981 0.08756 0.08751 0.08639 0.07617 0.07006 0.06835 0.06394
OK IA NE KS IN
0.06350 0.06200 0.05749 0.05538 0.05198
b <- regsubsets(Life.Exp ~ ., data = statedata, subset = (state.abb != "AK"))
rs <- summary(b)
rs$which[which.max(rs$adjr), ]
(Intercept) Population Income Illiteracy Murder HS.Grad
TRUE TRUE FALSE FALSE TRUE TRUE
Frost Area
TRUE TRUE
stripchart(data.frame(scale(statedata)), method = "jitter", las = 2, vertical = TRUE)
b <- regsubsets(Life.Exp ~ log(Population) + Income + Illiteracy + Murder +
HS.Grad + Frost + log(Area), statedata)
rs <- summary(b)
rs$which[which.max(rs$adjr), ]
(Intercept) log(Population) Income Illiteracy
TRUE TRUE FALSE FALSE
Murder HS.Grad Frost log(Area)
TRUE TRUE TRUE FALSE
The R session information (including the OS info, R version and all packages used):
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] graphics grDevices utils datasets methods stats base
other attached packages:
[1] leaps_2.9 faraway_1.0.6 knitr_1.5 ggplot2_0.9.3.1
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 dichromat_2.0-0 digest_0.6.4
[4] evaluate_0.5.3 formatR_0.10 grid_3.1.0
[7] gtable_0.1.2 labeling_0.2 MASS_7.3-31
[10] munsell_0.4.2 plyr_1.8.1 proto_0.3-10
[13] RColorBrewer_1.0-5 Rcpp_0.11.1 reshape2_1.2.2
[16] scales_0.2.3 stringr_0.6.2 tools_3.1.0
Sys.time()
[1] "2014-06-16 14:02:13 BST"