This report was automatically generated with the R package knitr (version 1.5).

library(faraway)
data(eco, package = "faraway")
plot(income ~ usborn, data = eco, xlab = "Proportion US born", ylab = "Mean Annual Income")

plot of chunk unnamed-chunk-1

lmod <- lm(income ~ usborn, eco)
sumary(lmod)
            Estimate Std. Error t value Pr(>|t|)
(Intercept)    68642       8739    7.85  3.2e-10
usborn        -46019       9279   -4.96  8.9e-06

n = 51, p = 2, Residual SE = 3489.54, R-Squared = 0.33
plot(income ~ usborn, data = eco, xlab = "Proportion US born", ylab = "Mean Annual Income", 
    xlim = c(0, 1), ylim = c(15000, 70000), xaxs = "i")
abline(coef(lmod))

plot of chunk unnamed-chunk-1

data(chredlin, package = "faraway")
head(chredlin)
      race fire theft  age involact income side
60626 10.0  6.2    29 60.4      0.0 11.744    n
60640 22.2  9.5    44 76.5      0.1  9.323    n
60613 19.6 10.5    36 73.5      1.2  9.948    n
60657 17.3  7.7    37 66.9      0.5 10.656    n
60614 24.5  8.6    53 81.4      0.7  9.730    n
60610 54.0 34.1    68 52.6      0.3  8.231    n
summary(chredlin)
      race            fire           theft            age      
 Min.   : 1.00   Min.   : 2.00   Min.   :  3.0   Min.   : 2.0  
 1st Qu.: 3.75   1st Qu.: 5.65   1st Qu.: 22.0   1st Qu.:48.6  
 Median :24.50   Median :10.40   Median : 29.0   Median :65.0  
 Mean   :34.99   Mean   :12.28   Mean   : 32.4   Mean   :60.3  
 3rd Qu.:57.65   3rd Qu.:16.05   3rd Qu.: 38.0   3rd Qu.:77.3  
 Max.   :99.70   Max.   :39.70   Max.   :147.0   Max.   :90.1  
    involact         income      side  
 Min.   :0.000   Min.   : 5.58   n:25  
 1st Qu.:0.000   1st Qu.: 8.45   s:22  
 Median :0.400   Median :10.69         
 Mean   :0.615   Mean   :10.70         
 3rd Qu.:0.900   3rd Qu.:11.99         
 Max.   :2.200   Max.   :21.48         
require(ggplot2)
ggplot(chredlin, aes(race, involact)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

ggplot(chredlin, aes(fire, involact)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

ggplot(chredlin, aes(theft, involact)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

ggplot(chredlin, aes(age, involact)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

ggplot(chredlin, aes(income, involact)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

ggplot(chredlin, aes(side, involact)) + geom_point(position = position_jitter(width = 0.2, 
    height = 0))

plot of chunk unnamed-chunk-1

sumary(lm(involact ~ race, chredlin))
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  0.12922    0.09661    1.34     0.19
race         0.01388    0.00203    6.84  1.8e-08

n = 47, p = 2, Residual SE = 0.45, R-Squared = 0.51
ggplot(chredlin, aes(race, fire)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

ggplot(chredlin, aes(race, theft)) + geom_point() + stat_smooth(method = "lm")

plot of chunk unnamed-chunk-1

lmod <- lm(involact ~ race + fire + theft + age + log(income), chredlin)
sumary(lmod)
            Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.18554    1.10025   -1.08  0.28755
race         0.00950    0.00249    3.82  0.00045
fire         0.03986    0.00877    4.55  4.8e-05
theft       -0.01029    0.00282   -3.65  0.00073
age          0.00834    0.00274    3.04  0.00413
log(income)  0.34576    0.40012    0.86  0.39254

n = 47, p = 6, Residual SE = 0.33, R-Squared = 0.75
plot(lmod, 1:2)

plot of chunk unnamed-chunk-1plot of chunk unnamed-chunk-1

termplot(lmod, partial.resid = TRUE, terms = 1:2)

plot of chunk unnamed-chunk-1plot of chunk unnamed-chunk-1

listcombo <- unlist(sapply(0:4, function(x) combn(4, x, simplify = FALSE)), 
    recursive = FALSE)
predterms <- lapply(listcombo, function(x) paste(c("race", c("fire", "theft", 
    "age", "log(income)")[x]), collapse = "+"))
coefm <- matrix(NA, 16, 2)
for (i in 1:16) {
    lmi <- lm(as.formula(paste("involact ~ ", predterms[[i]])), data = chredlin)
    coefm[i, ] <- summary(lmi)$coef[2, c(1, 4)]
}
rownames(coefm) <- predterms
colnames(coefm) <- c("beta", "pvalue")
round(coefm, 4)
                                  beta pvalue
race                            0.0139 0.0000
race+fire                       0.0089 0.0002
race+theft                      0.0141 0.0000
race+age                        0.0123 0.0000
race+log(income)                0.0082 0.0087
race+fire+theft                 0.0082 0.0002
race+fire+age                   0.0089 0.0001
race+fire+log(income)           0.0070 0.0160
race+theft+age                  0.0128 0.0000
race+theft+log(income)          0.0084 0.0083
race+age+log(income)            0.0099 0.0017
race+fire+theft+age             0.0081 0.0001
race+fire+theft+log(income)     0.0073 0.0078
race+fire+age+log(income)       0.0085 0.0041
race+theft+age+log(income)      0.0106 0.0010
race+fire+theft+age+log(income) 0.0095 0.0004
diags <- data.frame(lm.influence(lmod)$coef)
ggplot(diags, aes(row.names(diags), race)) + geom_linerange(aes(ymax = 0, ymin = race)) + 
    theme(axis.text.x = element_text(angle = 90)) + xlab("ZIP") + geom_hline(yint = 0)
Error: object 'diags' not found
ggplot(diags, aes(x = fire, y = theft)) + geom_text(label = row.names(diags))

plot of chunk unnamed-chunk-1

plot(lmod, 5)

plot of chunk unnamed-chunk-1

chredlin[c("60607", "60610"), ]
      race fire theft  age involact income side
60607 50.2 39.7   147 83.0      0.9  7.459    n
60610 54.0 34.1    68 52.6      0.3  8.231    n
match(c("60607", "60610"), row.names(chredlin))
[1] 24  6
lmode <- lm(involact ~ race + fire + theft + age + log(income), chredlin, subset = -c(6, 
    24))
sumary(lmode)
            Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.57674    1.08005   -0.53    0.596
race         0.00705    0.00270    2.62    0.013
fire         0.04965    0.00857    5.79    1e-06
theft       -0.00643    0.00435   -1.48    0.147
age          0.00517    0.00289    1.79    0.082
log(income)  0.11570    0.40111    0.29    0.775

n = 45, p = 6, Residual SE = 0.30, R-Squared = 0.8
modalt <- lm(involact ~ race + fire + log(income), chredlin, subset = -c(6, 
    24))
sumary(modalt)
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  0.75326    0.83588    0.90    0.373
race         0.00421    0.00228    1.85    0.072
fire         0.05102    0.00845    6.04  3.8e-07
log(income) -0.36238    0.31916   -1.14    0.263

n = 45, p = 4, Residual SE = 0.31, R-Squared = 0.79
lmod <- lm(involact ~ race + fire + theft + age, subset = (side == "s"), chredlin)
sumary(lmod)
            Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.23441    0.23774   -0.99    0.338
race         0.00595    0.00328    1.81    0.087
fire         0.04839    0.01689    2.87    0.011
theft       -0.00664    0.00844   -0.79    0.442
age          0.00501    0.00505    0.99    0.335

n = 22, p = 5, Residual SE = 0.35, R-Squared = 0.74
lmod <- lm(involact ~ race + fire + theft + age, subset = (side == "n"), chredlin)
sumary(lmod)
            Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.31857    0.22702   -1.40    0.176
race         0.01256    0.00448    2.81    0.011
fire         0.02313    0.01398    1.65    0.114
theft       -0.00758    0.00366   -2.07    0.052
age          0.00820    0.00346    2.37    0.028

n = 25, p = 5, Residual SE = 0.34, R-Squared = 0.76

The R session information (including the OS info, R version and all packages used):

sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

attached base packages:
[1] graphics  grDevices utils     datasets  methods   stats     base     

other attached packages:
[1] faraway_1.0.6   knitr_1.5       ggplot2_0.9.3.1

loaded via a namespace (and not attached):
 [1] colorspace_1.2-4   dichromat_2.0-0    digest_0.6.4      
 [4] evaluate_0.5.3     formatR_0.10       grid_3.1.0        
 [7] gtable_0.1.2       labeling_0.2       MASS_7.3-31       
[10] munsell_0.4.2      plyr_1.8.1         proto_0.3-10      
[13] RColorBrewer_1.0-5 Rcpp_0.11.1        reshape2_1.2.2    
[16] scales_0.2.3       stringr_0.6.2      tools_3.1.0       
Sys.time()
[1] "2014-06-16 14:02:25 BST"