This report was automatically generated with the R package knitr (version 1.5).
library(faraway)
data(eco, package = "faraway")
plot(income ~ usborn, data = eco, xlab = "Proportion US born", ylab = "Mean Annual Income")
lmod <- lm(income ~ usborn, eco)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 68642 8739 7.85 3.2e-10
usborn -46019 9279 -4.96 8.9e-06
n = 51, p = 2, Residual SE = 3489.54, R-Squared = 0.33
plot(income ~ usborn, data = eco, xlab = "Proportion US born", ylab = "Mean Annual Income",
xlim = c(0, 1), ylim = c(15000, 70000), xaxs = "i")
abline(coef(lmod))
data(chredlin, package = "faraway")
head(chredlin)
race fire theft age involact income side
60626 10.0 6.2 29 60.4 0.0 11.744 n
60640 22.2 9.5 44 76.5 0.1 9.323 n
60613 19.6 10.5 36 73.5 1.2 9.948 n
60657 17.3 7.7 37 66.9 0.5 10.656 n
60614 24.5 8.6 53 81.4 0.7 9.730 n
60610 54.0 34.1 68 52.6 0.3 8.231 n
summary(chredlin)
race fire theft age
Min. : 1.00 Min. : 2.00 Min. : 3.0 Min. : 2.0
1st Qu.: 3.75 1st Qu.: 5.65 1st Qu.: 22.0 1st Qu.:48.6
Median :24.50 Median :10.40 Median : 29.0 Median :65.0
Mean :34.99 Mean :12.28 Mean : 32.4 Mean :60.3
3rd Qu.:57.65 3rd Qu.:16.05 3rd Qu.: 38.0 3rd Qu.:77.3
Max. :99.70 Max. :39.70 Max. :147.0 Max. :90.1
involact income side
Min. :0.000 Min. : 5.58 n:25
1st Qu.:0.000 1st Qu.: 8.45 s:22
Median :0.400 Median :10.69
Mean :0.615 Mean :10.70
3rd Qu.:0.900 3rd Qu.:11.99
Max. :2.200 Max. :21.48
require(ggplot2)
ggplot(chredlin, aes(race, involact)) + geom_point() + stat_smooth(method = "lm")
ggplot(chredlin, aes(fire, involact)) + geom_point() + stat_smooth(method = "lm")
ggplot(chredlin, aes(theft, involact)) + geom_point() + stat_smooth(method = "lm")
ggplot(chredlin, aes(age, involact)) + geom_point() + stat_smooth(method = "lm")
ggplot(chredlin, aes(income, involact)) + geom_point() + stat_smooth(method = "lm")
ggplot(chredlin, aes(side, involact)) + geom_point(position = position_jitter(width = 0.2,
height = 0))
sumary(lm(involact ~ race, chredlin))
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.12922 0.09661 1.34 0.19
race 0.01388 0.00203 6.84 1.8e-08
n = 47, p = 2, Residual SE = 0.45, R-Squared = 0.51
ggplot(chredlin, aes(race, fire)) + geom_point() + stat_smooth(method = "lm")
ggplot(chredlin, aes(race, theft)) + geom_point() + stat_smooth(method = "lm")
lmod <- lm(involact ~ race + fire + theft + age + log(income), chredlin)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.18554 1.10025 -1.08 0.28755
race 0.00950 0.00249 3.82 0.00045
fire 0.03986 0.00877 4.55 4.8e-05
theft -0.01029 0.00282 -3.65 0.00073
age 0.00834 0.00274 3.04 0.00413
log(income) 0.34576 0.40012 0.86 0.39254
n = 47, p = 6, Residual SE = 0.33, R-Squared = 0.75
plot(lmod, 1:2)
termplot(lmod, partial.resid = TRUE, terms = 1:2)
listcombo <- unlist(sapply(0:4, function(x) combn(4, x, simplify = FALSE)),
recursive = FALSE)
predterms <- lapply(listcombo, function(x) paste(c("race", c("fire", "theft",
"age", "log(income)")[x]), collapse = "+"))
coefm <- matrix(NA, 16, 2)
for (i in 1:16) {
lmi <- lm(as.formula(paste("involact ~ ", predterms[[i]])), data = chredlin)
coefm[i, ] <- summary(lmi)$coef[2, c(1, 4)]
}
rownames(coefm) <- predterms
colnames(coefm) <- c("beta", "pvalue")
round(coefm, 4)
beta pvalue
race 0.0139 0.0000
race+fire 0.0089 0.0002
race+theft 0.0141 0.0000
race+age 0.0123 0.0000
race+log(income) 0.0082 0.0087
race+fire+theft 0.0082 0.0002
race+fire+age 0.0089 0.0001
race+fire+log(income) 0.0070 0.0160
race+theft+age 0.0128 0.0000
race+theft+log(income) 0.0084 0.0083
race+age+log(income) 0.0099 0.0017
race+fire+theft+age 0.0081 0.0001
race+fire+theft+log(income) 0.0073 0.0078
race+fire+age+log(income) 0.0085 0.0041
race+theft+age+log(income) 0.0106 0.0010
race+fire+theft+age+log(income) 0.0095 0.0004
diags <- data.frame(lm.influence(lmod)$coef)
ggplot(diags, aes(row.names(diags), race)) + geom_linerange(aes(ymax = 0, ymin = race)) +
theme(axis.text.x = element_text(angle = 90)) + xlab("ZIP") + geom_hline(yint = 0)
Error: object 'diags' not found
ggplot(diags, aes(x = fire, y = theft)) + geom_text(label = row.names(diags))
plot(lmod, 5)
chredlin[c("60607", "60610"), ]
race fire theft age involact income side
60607 50.2 39.7 147 83.0 0.9 7.459 n
60610 54.0 34.1 68 52.6 0.3 8.231 n
match(c("60607", "60610"), row.names(chredlin))
[1] 24 6
lmode <- lm(involact ~ race + fire + theft + age + log(income), chredlin, subset = -c(6,
24))
sumary(lmode)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.57674 1.08005 -0.53 0.596
race 0.00705 0.00270 2.62 0.013
fire 0.04965 0.00857 5.79 1e-06
theft -0.00643 0.00435 -1.48 0.147
age 0.00517 0.00289 1.79 0.082
log(income) 0.11570 0.40111 0.29 0.775
n = 45, p = 6, Residual SE = 0.30, R-Squared = 0.8
modalt <- lm(involact ~ race + fire + log(income), chredlin, subset = -c(6,
24))
sumary(modalt)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.75326 0.83588 0.90 0.373
race 0.00421 0.00228 1.85 0.072
fire 0.05102 0.00845 6.04 3.8e-07
log(income) -0.36238 0.31916 -1.14 0.263
n = 45, p = 4, Residual SE = 0.31, R-Squared = 0.79
lmod <- lm(involact ~ race + fire + theft + age, subset = (side == "s"), chredlin)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.23441 0.23774 -0.99 0.338
race 0.00595 0.00328 1.81 0.087
fire 0.04839 0.01689 2.87 0.011
theft -0.00664 0.00844 -0.79 0.442
age 0.00501 0.00505 0.99 0.335
n = 22, p = 5, Residual SE = 0.35, R-Squared = 0.74
lmod <- lm(involact ~ race + fire + theft + age, subset = (side == "n"), chredlin)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.31857 0.22702 -1.40 0.176
race 0.01256 0.00448 2.81 0.011
fire 0.02313 0.01398 1.65 0.114
theft -0.00758 0.00366 -2.07 0.052
age 0.00820 0.00346 2.37 0.028
n = 25, p = 5, Residual SE = 0.34, R-Squared = 0.76
The R session information (including the OS info, R version and all packages used):
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] graphics grDevices utils datasets methods stats base
other attached packages:
[1] faraway_1.0.6 knitr_1.5 ggplot2_0.9.3.1
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 dichromat_2.0-0 digest_0.6.4
[4] evaluate_0.5.3 formatR_0.10 grid_3.1.0
[7] gtable_0.1.2 labeling_0.2 MASS_7.3-31
[10] munsell_0.4.2 plyr_1.8.1 proto_0.3-10
[13] RColorBrewer_1.0-5 Rcpp_0.11.1 reshape2_1.2.2
[16] scales_0.2.3 stringr_0.6.2 tools_3.1.0
Sys.time()
[1] "2014-06-16 14:02:25 BST"