This report was automatically generated with the R package knitr (version 1.5).
library(faraway)
data(coagulation, package = "faraway")
coagulation
coag diet
1 62 A
2 60 A
3 63 A
4 59 A
5 63 B
6 67 B
7 71 B
8 64 B
9 65 B
10 66 B
11 68 C
12 66 C
13 71 C
14 67 C
15 68 C
16 68 C
17 56 D
18 62 D
19 60 D
20 61 D
21 63 D
22 64 D
23 63 D
24 59 D
plot(coag ~ diet, coagulation, ylab = "coagulation time")
stripchart(coag ~ diet, coagulation, vertical = TRUE, method = "stack", xlab = "diet",
ylab = "coagulation time")
lmod <- lm(coag ~ diet, coagulation)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.10e+01 1.18e+00 51.55 < 2e-16
dietB 5.00e+00 1.53e+00 3.27 0.00380
dietC 7.00e+00 1.53e+00 4.58 0.00018
dietD 2.99e-15 1.45e+00 0.00 1.00000
n = 24, p = 4, Residual SE = 2.37, R-Squared = 0.67
round(coef(lmod), 1)
(Intercept) dietB dietC dietD
61 5 7 0
model.matrix(lmod)
(Intercept) dietB dietC dietD
1 1 0 0 0
2 1 0 0 0
3 1 0 0 0
4 1 0 0 0
5 1 1 0 0
6 1 1 0 0
7 1 1 0 0
8 1 1 0 0
9 1 1 0 0
10 1 1 0 0
11 1 0 1 0
12 1 0 1 0
13 1 0 1 0
14 1 0 1 0
15 1 0 1 0
16 1 0 1 0
17 1 0 0 1
18 1 0 0 1
19 1 0 0 1
20 1 0 0 1
21 1 0 0 1
22 1 0 0 1
23 1 0 0 1
24 1 0 0 1
attr(,"assign")
[1] 0 1 1 1
attr(,"contrasts")
attr(,"contrasts")$diet
[1] "contr.treatment"
anova(lmod)
Analysis of Variance Table
Response: coag
Df Sum Sq Mean Sq F value Pr(>F)
diet 3 228 76.0 13.6 4.7e-05
Residuals 20 112 5.6
lmodi <- lm(coag ~ diet - 1, coagulation)
sumary(lmodi)
Estimate Std. Error t value Pr(>|t|)
dietA 61.000 1.183 51.5 <2e-16
dietB 66.000 0.966 68.3 <2e-16
dietC 68.000 0.966 70.4 <2e-16
dietD 61.000 0.837 72.9 <2e-16
n = 24, p = 4, Residual SE = 2.37, R-Squared = 1
lmnull <- lm(coag ~ 1, coagulation)
anova(lmnull, lmodi)
Analysis of Variance Table
Model 1: coag ~ 1
Model 2: coag ~ diet - 1
Res.Df RSS Df Sum of Sq F Pr(>F)
1 23 340
2 20 112 3 228 13.6 4.7e-05
options(contrasts = c("contr.sum", "contr.poly"))
lmods <- lm(coag ~ diet, coagulation)
sumary(lmods)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 64.000 0.498 128.54 < 2e-16
diet1 -3.000 0.974 -3.08 0.00589
diet2 2.000 0.845 2.37 0.02819
diet3 4.000 0.845 4.73 0.00013
n = 24, p = 4, Residual SE = 2.37, R-Squared = 0.67
qqnorm(residuals(lmod))
qqline(residuals(lmod))
plot(jitter(fitted(lmod)), residuals(lmod), xlab = "Fitted", ylab = "Residuals")
abline(h = 0)
med <- with(coagulation, tapply(coag, diet, median))
ar <- with(coagulation, abs(coag - med[diet]))
anova(lm(ar ~ diet, coagulation))
Analysis of Variance Table
Response: ar
Df Sum Sq Mean Sq F value Pr(>F)
diet 3 4.3 1.44 0.65 0.59
Residuals 20 44.5 2.23
bartlett.test(coag ~ diet, coagulation)
Bartlett test of homogeneity of variances
data: coag by diet
Bartlett's K-squared = 1.668, df = 3, p-value = 0.6441
5 + c(-1, 1) * qt(0.975, 24 - 4) * 1.53
[1] 1.808 8.192
se <- summary(lmod)$sigma
5 + c(-1, 1) * qtukey(0.95, 4, 24 - 4)/sqrt(2) * se * sqrt(1/4 + 1/6)
[1] 0.7246 9.2754
(tci <- TukeyHSD(aov(coag ~ diet, coagulation)))
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = coag ~ diet, data = coagulation)
$diet
diff lwr upr p adj
B-A 5 0.7246 9.275 0.0183
C-A 7 2.7246 11.275 0.0010
D-A 0 -4.0560 4.056 1.0000
C-B 2 -1.8241 5.824 0.4766
D-B -5 -8.5771 -1.423 0.0044
D-C -7 -10.5771 -3.423 0.0001
plot(tci)
data(jsp, package = "faraway")
jsp$mathcent <- jsp$math - mean(jsp$math)
require(ggplot2)
ggplot(aes(x = school, y = mathcent), data = jsp) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90)) +
ylab("Centered Math Scores")
lmod <- lm(mathcent ~ school - 1, jsp)
sumary(lmod)
Estimate Std. Error t value Pr(>|t|)
school1 -3.3685 0.7686 -4.38 1.2e-05
school2 0.6714 1.2287 0.55 0.58481
school3 2.2964 1.0641 2.16 0.03099
school4 -2.6619 0.8688 -3.06 0.00220
school5 1.8320 0.8092 2.26 0.02364
school6 0.2381 0.9517 0.25 0.80249
school7 1.7227 1.1805 1.46 0.14459
school8 0.5909 0.7904 0.75 0.45472
school9 2.2458 0.9144 2.46 0.01410
school10 -1.2453 1.5048 -0.83 0.40802
school11 -0.4619 1.2461 -0.37 0.71089
school12 0.2082 0.8401 0.25 0.80429
school13 -1.6474 0.8875 -1.86 0.06351
school14 -3.2898 1.1243 -2.93 0.00346
school15 -0.4921 1.0127 -0.49 0.62702
school16 -3.7374 1.0127 -3.69 0.00023
school17 1.0881 1.6485 0.66 0.50927
school18 -0.8619 0.9941 -0.87 0.38597
school19 1.6108 1.1114 1.45 0.14734
school20 -0.2095 1.1376 -0.18 0.85386
school21 -3.6185 0.7686 -4.71 2.6e-06
school22 -1.7271 1.0870 -1.59 0.11217
school23 -0.8171 0.9680 -0.84 0.39868
school24 2.6238 0.9288 2.82 0.00476
school25 1.4232 1.0753 1.32 0.18578
school26 0.4919 0.8347 0.59 0.55570
school27 -2.3058 0.8629 -2.67 0.00757
school28 -5.8286 1.0641 -5.48 4.6e-08
school29 0.2090 0.9363 0.22 0.82334
school30 0.2392 0.7728 0.31 0.75698
school31 3.8241 0.7127 5.37 8.6e-08
school32 -0.0268 0.8570 -0.03 0.97506
school33 1.0633 0.6441 1.65 0.09889
school34 1.9359 0.7686 2.52 0.01183
school35 1.9041 1.0127 1.88 0.06016
school36 2.3605 0.7815 3.02 0.00254
school37 1.7618 0.9598 1.84 0.06651
school38 -1.6106 1.1805 -1.36 0.17255
school39 -1.4842 1.0990 -1.35 0.17696
school40 -4.9855 1.2643 -3.94 8.2e-05
school41 0.8809 1.2461 0.71 0.47966
school42 -0.3261 0.6441 -0.51 0.61275
school44 0.5881 1.8431 0.32 0.74969
school45 -4.6392 1.1114 -4.17 3.1e-05
school46 3.0636 1.0323 2.97 0.00302
school47 2.3969 0.7300 3.28 0.00104
school48 0.2119 0.5136 0.41 0.68003
school49 2.7964 0.8688 3.22 0.00130
school50 -2.6520 0.7336 -3.62 0.00030
n = 3236, p = 49, Residual SE = 7.37, R-Squared = 0.08
anova(lm(mathcent ~ school, jsp))
Analysis of Variance Table
Response: mathcent
Df Sum Sq Mean Sq F value Pr(>F)
school 48 15484 323 5.94 <2e-16
Residuals 3187 173212 54
pvals <- summary(lmod)$coef[, 4]
padj <- p.adjust(pvals, method = "bonferroni")
coef(lmod)[padj < 0.05]
school1 school16 school21 school28 school31 school40 school45 school50
-3.368 -3.737 -3.618 -5.829 3.824 -4.985 -4.639 -2.652
names(which(sort(pvals) < (1:49) * 0.05/49))
[1] "school28" "school31" "school21" "school1" "school45" "school40"
[7] "school16" "school50" "school47" "school49" "school4" "school36"
[13] "school46" "school14" "school24" "school27" "school34" "school9"
padj <- p.adjust(pvals, method = "fdr")
coef(lmod)[padj < 0.05]
school1 school4 school9 school14 school16 school21 school24 school27
-3.368 -2.662 2.246 -3.290 -3.737 -3.618 2.624 -2.306
school28 school31 school34 school36 school40 school45 school46 school47
-5.829 3.824 1.936 2.361 -4.985 -4.639 3.064 2.397
school49 school50
2.796 -2.652
The R session information (including the OS info, R version and all packages used):
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] graphics grDevices utils datasets methods stats base
other attached packages:
[1] faraway_1.0.6 knitr_1.5 ggplot2_0.9.3.1
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 dichromat_2.0-0 digest_0.6.4
[4] evaluate_0.5.3 formatR_0.10 grid_3.1.0
[7] gtable_0.1.2 labeling_0.2 MASS_7.3-31
[10] munsell_0.4.2 plyr_1.8.1 proto_0.3-10
[13] RColorBrewer_1.0-5 Rcpp_0.11.1 reshape2_1.2.2
[16] scales_0.2.3 stringr_0.6.2 tools_3.1.0
Sys.time()
[1] "2014-06-16 14:02:31 BST"