### Creation of model formulae and subset testing and training sets ### Formula for classification of all types: ```{r} allform <- as.formula(paste("type ~",predform)) ``` transients vs. non-variables: ```{r} trains$vtype <- ifelse(trains$type == "nv","nv","tr") trains$vtype <- factor(trains$vtype) tests$vtype <- ifelse(tests$type == "nv","nv","tr") tests$vtype <- factor(tests$vtype) vtform <- as.formula(paste("vtype ~",predform)) ``` transients only, ```{r} trains$ttype <- trains$type trains$ttype[trains$ttype == "nv"] <- NA trains$ttype <- factor(trains$ttype) tests$ttype <- tests$type tests$ttype[tests$ttype == "nv"] <- NA tests$ttype <- factor(tests$ttype) trform <- as.formula(paste("ttype ~",predform)) ``` ```{r} cmat <- matrix(NA,nrow=4,ncol=5) dimnames(cmat) <- list(c("All","TranNoTran","Tranonly","Heirarch"),c("LDA","RPart","SVM","NN","Forest")) ``` ## All types ### LDA Linear Discriminant analysis using the default options. We produce the cross-classification between predicted and observed class. Note that the default priors are the proportions found in the training set. ```{r} ldamod <- lda(allform ,data=trains) pv <- predict(ldamod, tests) cm <- xtabs( ~ pv$class + tests$type) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[1,1] <- classrate ``` ### Recursive Partitioning ```{r} roz <- rpart(allform ,data=trains) rpart.plot(roz,type=1,extra=1) pv <- predict(roz,newdata=tests,type="class") cm <- xtabs( ~ pv + tests$type) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[1,2] <- classrate ``` ### Support Vector Machines Use the default choice of setting from the *kernlab* R package for this: ```{r} svmod <- ksvm(allform, data=trains) pv <- predict(svmod, tests) cm <- xtabs( ~ pv + tests$type) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[1,3] <- classrate ``` ### Neural Net Use the multinom() function from the *nnet* R package. Might work better with some scaling. ```{r} svmod <- multinom(allform, data=trains, trace=FALSE, maxit=1000, decay=5e-4) pv <- predict(svmod, tests) cm <- xtabs( ~ pv + tests$type) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[1,4] <- classrate ``` ### Random Forest Use the *randomForest* package with the default settings: ```{r} tallform <- as.formula(paste("type ~",tpredform)) fmod <- randomForest(tallform, data=trains) pv <- predict(fmod, newdata=tests) cm <- xtabs( ~ pv + tests$type) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[1,5] <- classrate confmat <- cm ``` ## Transients vs. non-variables ### LDA Linear Discriminant analysis using the default options. We produce the cross-classification between predicted and observed class. Note that the default priors are the proportions found in the training set. ```{r} ldamod <- lda(vtform ,data=trains) pv <- predict(ldamod, tests) cm <- xtabs( ~ pv$class + tests$vtype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[2,1] <- classrate ``` ### Recursive Partitioning ```{r} roz <- rpart(vtform ,data=trains) rpart.plot(roz,type=1,extra=1) pv <- predict(roz,newdata=tests,type="class") cm <- xtabs( ~ pv + tests$vtype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[2,2] <- classrate ``` ### Support Vector Machines Use the default choice of setting from the *kernlab* R package for this: ```{r} svmod <- ksvm(vtform, data=trains) pv <- predict(svmod, tests) cm <- xtabs( ~ pv + tests$vtype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[2,3] <- classrate ``` ### Neural Net Use the multinom() function from the *nnet* R package. Might work better with some scaling. ```{r} svmod <- multinom(vtform, data=trains, trace=FALSE, maxit=1000, decay=5e-4) pv <- predict(svmod, tests) cm <- xtabs( ~ pv + tests$vtype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[2,4] <- classrate ``` ## Random Forest ## Use the *randomForest* package with the default settings: ```{r} tallform <- as.formula(paste("vtype ~",tpredform)) fmod <- randomForest(tallform, data=trains) pv <- predict(fmod, newdata=tests) cm <- xtabs( ~ pv + tests$vtype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[2,5] <- classrate ``` ## Transients only ### LDA Linear Discriminant analysis using the default options. We produce the cross-classification between predicted and observed class. Note that the default priors are the proportions found in the training set. ```{r} ldamod <- lda(trform ,data=trains) pv <- predict(ldamod, tests) cm <- xtabs( ~ pv$class + tests$ttype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[3,1] <- classrate ``` ### Recursive Partitioning ```{r} roz <- rpart(trform ,data=trains) rpart.plot(roz,type=1,extra=1) pv <- predict(roz,newdata=tests,type="class") cm <- xtabs( ~ pv + tests$ttype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[3,2] <- classrate ``` ### Support Vector Machines Use the default choice of setting from the *kernlab* R package for this: ```{r} svmod <- ksvm(trform, data=trains) pv <- predict(svmod, tests) cm <- xtabs( ~ pv + tests$ttype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[3,3] <- classrate ``` ### Neural Net Use the multinom() function from the *nnet* R package. Might work better with some scaling. ```{r} svmod <- multinom(trform, data=trains, trace=FALSE, maxit=1000, decay=5e-4) pv <- predict(svmod, tests) cm <- xtabs( ~ pv + tests$ttype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[3,4] <- classrate ``` ### Random Forest Use the *randomForest* package with the default settings: ```{r} tallform <- as.formula(paste("ttype ~",tpredform)) fmod <- randomForest(tallform, data=na.omit(trains)) pv <- predict(fmod, newdata=na.omit(tests)) cm <- xtabs( ~ pv + na.omit(tests)$ttype) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[3,5] <- classrate ``` ## Heirarchical Classification First we classify into transient and non-variable. The cases which are classified as transient are then classified into type of transient. The transient classification here is different from the one above in the data used. Above, all the data are known to be transients whereas here some cases from the non-variable set will have been classified as transient at the first stage. ### LDA ```{r} ldamod <- lda(vtform ,data=trains) pv <- predict(ldamod, tests) utests <- subset(tests, pv$class != 'nv') pvt <- predict(ldamod, trains) utrains <- subset(trains, pvt$class != 'nv') ldamod <- lda(trform, data=utrains) predc <- as.character(pv$class) predc[predc != 'nv'] <- as.character(predict(ldamod, utests)$class) cm <- xtabs( ~ predc + as.character(tests$type)) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[4,1] <- classrate ``` ### RPART ```{r} roz <- rpart(vtform ,data=trains) pv <- predict(roz, tests, type="class") utests <- subset(tests, pv != 'nv') pvt <- predict(roz, trains, type="class") utrains <- subset(trains, pvt != 'nv') roz <- rpart(trform, data=utrains) predc <- as.character(pv) predc[predc != 'nv'] <- as.character(predict(roz, utests, type="class")) predc <- factor(predc, levels=sort(levels(trains$type))) cm <- xtabs( ~ predc + as.character(tests$type)) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[4,2] <- classrate ``` ### SVM ```{r} svmod <- ksvm(vtform ,data=trains) pv <- predict(svmod, tests) utests <- subset(tests, pv != 'nv') pvt <- predict(svmod, trains) utrains <- subset(trains, pvt != 'nv') svmod <- ksvm(trform, data=utrains) predc <- as.character(pv) predc[predc != 'nv'] <- as.character(predict(svmod, utests)) predc <- factor(predc, levels=sort(levels(trains$type))) cm <- xtabs( ~ predc + as.character(tests$type)) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[4,3] <- classrate ``` ### NNET ```{r} svmod <- multinom(vtform, data=trains, trace=FALSE, maxit=1000, decay=5e-4) pv <- predict(svmod, tests) utests <- subset(tests, pv != 'nv') pvt <- predict(svmod, trains) utrains <- subset(trains, pvt != 'nv') svmod <- multinom(trform, data=trains, trace=FALSE, maxit=1000, decay=5e-4) predc <- as.character(pv) predc[predc != 'nv'] <- as.character(predict(svmod, utests)) predc <- factor(predc, levels=sort(levels(trains$type))) cm <- xtabs( ~ predc + as.character(tests$type)) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[4,4] <- classrate ``` ### Random Forest ```{r} tallform <- as.formula(paste("vtype ~",tpredform)) svmod <- randomForest(tallform, data=trains) pv <- predict(svmod, tests) utests <- subset(tests, pv != 'nv') pvt <- predict(svmod, trains) utrains <- subset(trains, pvt != 'nv') tallform <- as.formula(paste("ttype ~",tpredform)) svmod <- randomForest(tallform, data=na.omit(trains)) predc <- as.character(pv) predc[predc != 'nv'] <- as.character(predict(svmod, utests)) predc <- factor(predc, levels=sort(levels(trains$type))) cm <- xtabs( ~ predc + as.character(tests$type)) ``` ```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE} ``` ```{r echo=FALSE} cmat[4,5] <- classrate ``` ## Summary of results Summary of percentage classification rates across tests: ```{r results='asis'} print(xtable(100*cmat,digits=2),type="html") ```