### Creation of model formulae and subset testing and training sets ###

Formula for classification of all types:


```{r}
allform <- as.formula(paste("type ~",predform))
```

transients vs. non-variables:

```{r}
trains$vtype <- ifelse(trains$type == "nv","nv","tr")
trains$vtype <- factor(trains$vtype)
tests$vtype <- ifelse(tests$type == "nv","nv","tr")
tests$vtype <- factor(tests$vtype)
vtform <- as.formula(paste("vtype ~",predform))
```

transients only,

```{r}
trains$ttype <- trains$type
trains$ttype[trains$ttype == "nv"] <- NA
trains$ttype <- factor(trains$ttype)
tests$ttype <- tests$type
tests$ttype[tests$ttype == "nv"] <- NA
tests$ttype <- factor(tests$ttype)
trform <- as.formula(paste("ttype ~",predform))
```

```{r}
cmat <- matrix(NA,nrow=4,ncol=5)
dimnames(cmat) <- list(c("All","TranNoTran","Tranonly","Heirarch"),c("LDA","RPart","SVM","NN","Forest"))
```

## All types

### LDA

Linear Discriminant analysis using the default options.

We produce the cross-classification between predicted and observed class. Note that the default priors are the proportions found in the training set.

```{r}
ldamod <- lda(allform ,data=trains)
pv <- predict(ldamod, tests)
cm <- xtabs( ~ pv$class + tests$type)
```

```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[1,1] <- classrate
```

### Recursive Partitioning

```{r}
roz <- rpart(allform ,data=trains)
rpart.plot(roz,type=1,extra=1)
pv <- predict(roz,newdata=tests,type="class")
cm <- xtabs( ~ pv + tests$type)
```

```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[1,2] <- classrate
```


### Support Vector Machines

Use the default choice of setting from the *kernlab* R package for this:

```{r}
svmod <- ksvm(allform, data=trains)
pv <- predict(svmod, tests)
cm <- xtabs( ~ pv + tests$type)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[1,3] <- classrate
```


### Neural Net

Use the multinom() function from the *nnet* R package. Might work
better with some scaling.

```{r}
svmod <- multinom(allform, data=trains, trace=FALSE, maxit=1000, decay=5e-4)
pv <- predict(svmod, tests)
cm <- xtabs( ~ pv + tests$type)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[1,4] <- classrate
```


### Random Forest

Use the *randomForest* package with the default settings:

```{r}
tallform <- as.formula(paste("type ~",tpredform))
fmod <- randomForest(tallform, data=trains)
pv <- predict(fmod, newdata=tests)
cm <- xtabs( ~ pv + tests$type)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[1,5] <- classrate
confmat <- cm
```


## Transients vs. non-variables

### LDA

Linear Discriminant analysis using the default options.

We produce the cross-classification between predicted and observed class. Note that the default priors are the proportions found in the training set.

```{r}
ldamod <- lda(vtform ,data=trains)
pv <- predict(ldamod, tests)
cm <- xtabs( ~ pv$class + tests$vtype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[2,1] <- classrate
```


### Recursive Partitioning

```{r}
roz <- rpart(vtform ,data=trains)
rpart.plot(roz,type=1,extra=1)
pv <- predict(roz,newdata=tests,type="class")
cm <- xtabs( ~ pv + tests$vtype)
```

```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[2,2] <- classrate
```


### Support Vector Machines

Use the default choice of setting from the *kernlab* R package for this:

```{r}
svmod <- ksvm(vtform, data=trains)
pv <- predict(svmod, tests)
cm <- xtabs( ~ pv + tests$vtype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[2,3] <- classrate
```

### Neural Net

Use the multinom() function from the *nnet* R package. Might work
better with some scaling.

```{r}
svmod <- multinom(vtform, data=trains, trace=FALSE, maxit=1000, decay=5e-4)
pv <- predict(svmod, tests)
cm <- xtabs( ~ pv + tests$vtype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[2,4] <- classrate
```


## Random Forest ##

Use the *randomForest* package with the default settings:

```{r}
tallform <- as.formula(paste("vtype ~",tpredform))
fmod <- randomForest(tallform, data=trains)
pv <- predict(fmod, newdata=tests)
cm <- xtabs( ~ pv + tests$vtype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[2,5] <- classrate
```


## Transients only

### LDA

Linear Discriminant analysis using the default options.

We produce the cross-classification between predicted and observed class. Note that the default priors are the proportions found in the training set.

```{r}
ldamod <- lda(trform ,data=trains)
pv <- predict(ldamod, tests)
cm <- xtabs( ~ pv$class + tests$ttype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[3,1] <- classrate
```


### Recursive Partitioning

```{r}
roz <- rpart(trform ,data=trains)
rpart.plot(roz,type=1,extra=1)
pv <- predict(roz,newdata=tests,type="class")
cm <- xtabs( ~ pv + tests$ttype)
```

```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[3,2] <- classrate
```


### Support Vector Machines

Use the default choice of setting from the *kernlab* R package for this:

```{r}
svmod <- ksvm(trform, data=trains)
pv <- predict(svmod, tests)
cm <- xtabs( ~ pv + tests$ttype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[3,3] <- classrate
```

### Neural Net

Use the multinom() function from the *nnet* R package. Might work
better with some scaling.

```{r}
svmod <- multinom(trform, data=trains, trace=FALSE, maxit=1000, decay=5e-4)
pv <- predict(svmod, tests)
cm <- xtabs( ~ pv + tests$ttype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[3,4] <- classrate
```


### Random Forest

Use the *randomForest* package with the default settings:

```{r}
tallform <- as.formula(paste("ttype ~",tpredform))
fmod <- randomForest(tallform, data=na.omit(trains))
pv <- predict(fmod, newdata=na.omit(tests))
cm <- xtabs( ~ pv + na.omit(tests)$ttype)
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[3,5] <- classrate
```


## Heirarchical Classification

First we classify into transient and non-variable. The cases which are
classified as transient are then classified into type of
transient. The transient classification here is different from the one
above in the data used. Above, all the data are known to be transients
whereas here some cases from the non-variable set will have been
classified as transient at the first stage.

### LDA

```{r}
ldamod <- lda(vtform ,data=trains)
pv <- predict(ldamod, tests)
utests <- subset(tests, pv$class != 'nv')
pvt <- predict(ldamod, trains)
utrains <- subset(trains, pvt$class != 'nv')
ldamod <- lda(trform, data=utrains)
predc <- as.character(pv$class)
predc[predc != 'nv'] <- as.character(predict(ldamod, utests)$class)
cm <- xtabs( ~ predc + as.character(tests$type))
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```
```{r echo=FALSE}
cmat[4,1] <- classrate
```


### RPART

```{r}
roz <- rpart(vtform ,data=trains)
pv <- predict(roz, tests, type="class")
utests <- subset(tests, pv != 'nv')
pvt <- predict(roz, trains, type="class")
utrains <- subset(trains, pvt != 'nv')
roz <- rpart(trform, data=utrains)
predc <- as.character(pv)
predc[predc != 'nv'] <- as.character(predict(roz, utests,
type="class"))
predc <- factor(predc, levels=sort(levels(trains$type)))
cm <- xtabs( ~ predc + as.character(tests$type))
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[4,2] <- classrate
```


### SVM


```{r}
svmod <- ksvm(vtform ,data=trains)
pv <- predict(svmod, tests)
utests <- subset(tests, pv != 'nv')
pvt <- predict(svmod, trains)
utrains <- subset(trains, pvt != 'nv')
svmod <- ksvm(trform, data=utrains)
predc <- as.character(pv)
predc[predc != 'nv'] <- as.character(predict(svmod, utests))
predc <- factor(predc, levels=sort(levels(trains$type)))
cm <- xtabs( ~ predc + as.character(tests$type))
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[4,3] <- classrate
```

### NNET


```{r}
svmod <- multinom(vtform, data=trains, trace=FALSE, maxit=1000, decay=5e-4)
pv <- predict(svmod, tests)
utests <- subset(tests, pv != 'nv')
pvt <- predict(svmod, trains)
utrains <- subset(trains, pvt != 'nv')
svmod <- multinom(trform, data=trains, trace=FALSE, maxit=1000, decay=5e-4)
predc <- as.character(pv)
predc[predc != 'nv'] <- as.character(predict(svmod, utests))
predc <- factor(predc, levels=sort(levels(trains$type)))
cm <- xtabs( ~ predc + as.character(tests$type))
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[4,4] <- classrate
```

### Random Forest


```{r}
tallform <- as.formula(paste("vtype ~",tpredform))
svmod <- randomForest(tallform, data=trains)
pv <- predict(svmod, tests)
utests <- subset(tests, pv != 'nv')
pvt <- predict(svmod, trains)
utrains <- subset(trains, pvt != 'nv')
tallform <- as.formula(paste("ttype ~",tpredform))
svmod <- randomForest(tallform, data=na.omit(trains))
predc <- as.character(pv)
predc[predc != 'nv'] <- as.character(predict(svmod, utests))
predc <- factor(predc, levels=sort(levels(trains$type)))
cm <- xtabs( ~ predc + as.character(tests$type))
```
```{r child="dispres.Rmd",eval=TRUE,quiet=TRUE}
```

```{r echo=FALSE}
cmat[4,5] <- classrate
```

## Summary of results

Summary of percentage classification rates across tests:

```{r results='asis'}
print(xtable(100*cmat,digits=2),type="html")
```