This report was automatically generated with the R package knitr (version 1.5).
library(faraway)
data(pima, package = "faraway")
head(pima)
pregnant glucose diastolic triceps insulin bmi diabetes age test
1 6 148 72 35 0 33.6 0.627 50 1
2 1 85 66 29 0 26.6 0.351 31 0
3 8 183 64 0 0 23.3 0.672 32 1
4 1 89 66 23 94 28.1 0.167 21 0
5 0 137 40 35 168 43.1 2.288 33 1
6 5 116 74 0 0 25.6 0.201 30 0
summary(pima)
pregnant glucose diastolic triceps
Min. : 0.00 Min. : 0 Min. : 0.0 Min. : 0.0
1st Qu.: 1.00 1st Qu.: 99 1st Qu.: 62.0 1st Qu.: 0.0
Median : 3.00 Median :117 Median : 72.0 Median :23.0
Mean : 3.85 Mean :121 Mean : 69.1 Mean :20.5
3rd Qu.: 6.00 3rd Qu.:140 3rd Qu.: 80.0 3rd Qu.:32.0
Max. :17.00 Max. :199 Max. :122.0 Max. :99.0
insulin bmi diabetes age
Min. : 0.0 Min. : 0.0 Min. :0.078 Min. :21.0
1st Qu.: 0.0 1st Qu.:27.3 1st Qu.:0.244 1st Qu.:24.0
Median : 30.5 Median :32.0 Median :0.372 Median :29.0
Mean : 79.8 Mean :32.0 Mean :0.472 Mean :33.2
3rd Qu.:127.2 3rd Qu.:36.6 3rd Qu.:0.626 3rd Qu.:41.0
Max. :846.0 Max. :67.1 Max. :2.420 Max. :81.0
test
Min. :0.000
1st Qu.:0.000
Median :0.000
Mean :0.349
3rd Qu.:1.000
Max. :1.000
sort(pima$diastolic)
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[18] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[35] 0 24 30 30 38 40 44 44 44 44 46 46 48 48 48 48 48
[52] 50 50 50 50 50 50 50 50 50 50 50 50 50 52 52 52 52
[69] 52 52 52 52 52 52 52 54 54 54 54 54 54 54 54 54 54
[86] 54 55 55 56 56 56 56 56 56 56 56 56 56 56 56 58 58
[103] 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58
[120] 58 58 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60
[137] 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60
[154] 60 60 60 60 60 61 62 62 62 62 62 62 62 62 62 62 62
[171] 62 62 62 62 62 62 62 62 62 62 62 62 62 62 62 62 62
[188] 62 62 62 62 62 62 64 64 64 64 64 64 64 64 64 64 64
[205] 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
[222] 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 65 65
[239] 65 65 65 65 65 66 66 66 66 66 66 66 66 66 66 66 66
[256] 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66
[273] 66 68 68 68 68 68 68 68 68 68 68 68 68 68 68 68 68
[290] 68 68 68 68 68 68 68 68 68 68 68 68 68 68 68 68 68
[307] 68 68 68 68 68 68 68 68 68 68 68 68 70 70 70 70 70
[324] 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70
[341] 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70
[358] 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70 70
[375] 70 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72
[392] 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72 72
[409] 72 72 72 72 72 72 72 72 72 72 72 74 74 74 74 74 74
[426] 74 74 74 74 74 74 74 74 74 74 74 74 74 74 74 74 74
[443] 74 74 74 74 74 74 74 74 74 74 74 74 74 74 74 74 74
[460] 74 74 74 74 74 74 74 74 74 74 74 74 75 75 75 75 75
[477] 75 75 75 76 76 76 76 76 76 76 76 76 76 76 76 76 76
[494] 76 76 76 76 76 76 76 76 76 76 76 76 76 76 76 76 76
[511] 76 76 76 76 76 76 76 76 78 78 78 78 78 78 78 78 78
[528] 78 78 78 78 78 78 78 78 78 78 78 78 78 78 78 78 78
[545] 78 78 78 78 78 78 78 78 78 78 78 78 78 78 78 78 78
[562] 78 78 80 80 80 80 80 80 80 80 80 80 80 80 80 80 80
[579] 80 80 80 80 80 80 80 80 80 80 80 80 80 80 80 80 80
[596] 80 80 80 80 80 80 80 80 82 82 82 82 82 82 82 82 82
[613] 82 82 82 82 82 82 82 82 82 82 82 82 82 82 82 82 82
[630] 82 82 82 82 84 84 84 84 84 84 84 84 84 84 84 84 84
[647] 84 84 84 84 84 84 84 84 84 84 85 85 85 85 85 85 86
[664] 86 86 86 86 86 86 86 86 86 86 86 86 86 86 86 86 86
[681] 86 86 86 88 88 88 88 88 88 88 88 88 88 88 88 88 88
[698] 88 88 88 88 88 88 88 88 88 88 88 90 90 90 90 90 90
[715] 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 92
[732] 92 92 92 92 92 92 92 94 94 94 94 94 94 95 96 96 96
[749] 96 98 98 98 100 100 100 102 104 104 106 106 106 108 108 110 110
[766] 110 114 122
pima$diastolic[pima$diastolic == 0] <- NA
pima$glucose[pima$glucose == 0] <- NA
pima$triceps[pima$triceps == 0] <- NA
pima$insulin[pima$insulin == 0] <- NA
pima$bmi[pima$bmi == 0] <- NA
pima$test <- factor(pima$test)
summary(pima$test)
0 1
500 268
levels(pima$test) <- c("negative", "positive")
summary(pima)
pregnant glucose diastolic triceps
Min. : 0.00 Min. : 44 Min. : 24.0 Min. : 7.0
1st Qu.: 1.00 1st Qu.: 99 1st Qu.: 64.0 1st Qu.:22.0
Median : 3.00 Median :117 Median : 72.0 Median :29.0
Mean : 3.85 Mean :122 Mean : 72.4 Mean :29.1
3rd Qu.: 6.00 3rd Qu.:141 3rd Qu.: 80.0 3rd Qu.:36.0
Max. :17.00 Max. :199 Max. :122.0 Max. :99.0
NA's :5 NA's :35 NA's :227
insulin bmi diabetes age
Min. : 14.0 Min. :18.2 Min. :0.078 Min. :21.0
1st Qu.: 76.2 1st Qu.:27.5 1st Qu.:0.244 1st Qu.:24.0
Median :125.0 Median :32.3 Median :0.372 Median :29.0
Mean :155.5 Mean :32.5 Mean :0.472 Mean :33.2
3rd Qu.:190.0 3rd Qu.:36.6 3rd Qu.:0.626 3rd Qu.:41.0
Max. :846.0 Max. :67.1 Max. :2.420 Max. :81.0
NA's :374 NA's :11
test
negative:500
positive:268
hist(pima$diastolic, xlab = "Diastolic", main = "")
plot(density(pima$diastolic, na.rm = TRUE), main = "")
plot(sort(pima$diastolic), ylab = "Sorted Diastolic")
plot(diabetes ~ diastolic, pima)
plot(diabetes ~ test, pima)
require(ggplot2)
ggplot(pima, aes(x = diastolic)) + geom_histogram()
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(pima, aes(x = diastolic)) + geom_density()
Warning: Removed 35 rows containing non-finite values (stat_density).
ggplot(pima, aes(x = diastolic, y = diabetes)) + geom_point()
Warning: Removed 35 rows containing missing values (geom_point).
ggplot(pima, aes(x = diastolic, y = diabetes, shape = test)) + geom_point() +
theme(legend.position = "top", legend.direction = "horizontal")
Warning: Removed 35 rows containing missing values (geom_point).
ggplot(pima, aes(x = diastolic, y = diabetes)) + geom_point(size = 1) + facet_grid(~test)
Warning: Removed 19 rows containing missing values (geom_point).
Warning: Removed 16 rows containing missing values (geom_point).
data(manilius, package = "faraway")
head(manilius)
arc sinang cosang group
1 13.17 0.8836 -0.4682 1
2 13.13 0.9996 -0.0282 1
3 13.20 0.9899 0.1421 1
4 14.25 0.2221 0.9750 3
5 14.70 0.0006 1.0000 3
6 13.02 0.9308 -0.3654 1
(moon3 <- aggregate(manilius[, 1:3], list(manilius$group), sum))
Group.1 arc sinang cosang
1 1 118.1 8.499 -0.7932
2 2 140.3 -6.140 1.7443
3 3 127.5 2.978 7.9649
solve(cbind(9, moon3$sinang, moon3$cosang), moon3$arc)
[1] 14.5446 -1.4898 0.1341
lmod <- lm(arc ~ sinang + cosang, manilius)
coef(lmod)
(Intercept) sinang cosang
14.56162 -1.50458 0.09137
data(GaltonFamilies, package = "HistData")
plot(childHeight ~ midparentHeight, GaltonFamilies)
lmod <- lm(childHeight ~ midparentHeight, GaltonFamilies)
coef(lmod)
(Intercept) midparentHeight
22.6362 0.6374
abline(lmod)
(beta <- with(GaltonFamilies, cor(midparentHeight, childHeight) * sd(childHeight)/sd(midparentHeight)))
[1] 0.6374
(alpha <- with(GaltonFamilies, mean(childHeight) - beta * mean(midparentHeight)))
[1] 22.64
(beta1 <- with(GaltonFamilies, sd(childHeight)/sd(midparentHeight)))
[1] 1.986
(alpha1 <- with(GaltonFamilies, mean(childHeight) - beta1 * mean(midparentHeight)))
[1] -70.69
abline(alpha1, beta1, lty = 2)
The R session information (including the OS info, R version and all packages used):
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] graphics grDevices utils datasets methods stats base
other attached packages:
[1] faraway_1.0.6 knitr_1.5 ggplot2_0.9.3.1
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 dichromat_2.0-0 digest_0.6.4
[4] evaluate_0.5.3 formatR_0.10 grid_3.1.0
[7] gtable_0.1.2 labeling_0.2 MASS_7.3-31
[10] munsell_0.4.2 plyr_1.8.1 proto_0.3-10
[13] RColorBrewer_1.0-5 Rcpp_0.11.1 reshape2_1.2.2
[16] scales_0.2.3 stringr_0.6.2 tools_3.1.0
Sys.time()
[1] "2014-06-16 14:01:03 BST"