The choice of the smoothing parameter in nonparametric regression is critical to the form of the estimated curve and any inference that follows. Many methods are available that will generate a single choice for this parameter. Here we argue that the considerable uncertainty in this choice should be explicitly represented.
The construction of standard simultaneous confidence bands in nonparametric regression often requires difficult mathematical arguments. We question their practical utility, presenting several deficiencies. We propose a new kind of confidence band that reflects the uncertainty regarding the smoothness of the estimate.
We use the following:
Download these if you want to reproduce the following:
Read in AGN data
agn <- read.table("agn.dat",header=TRUE)
Compute default SCBs using lowess:
datarange=2764
agn$mjd <- agn$mjd - 53464
library(ggplot2,quietly=TRUE)
p <- ggplot(agn ,aes(x=mjd, y=mag))
p <- p+xlab("Day")+ ylab("Magnitude") + scale_y_reverse()+xlim(0,datarange)
p <- p+geom_point(size=0.75)
slow <- p + geom_smooth()
slow
Now use smoothing splines via mgcv:
sspl <- p + geom_smooth(method="gam", formula = y ~ s(x, bs = "cs"))
sspl
Set up implausible curves:
library(mgcv)
amod <- gam(mag ~ s(mjd, bs="cs"), data=agn)
ap <- plot(amod)[[1]]
bdf <- data.frame(Day=ap$x, Magnitude=ap$fit+mean(agn$mag))
bdf$ucb <- bdf$Magnitude - 2 * ap$se
bdf$lcb <- bdf$Magnitude + 2 * ap$se
bdf$zig <- bdf$Magnitude + (((1:nrow(bdf)) %% 2)*2-1) * ap$se
bdf$jl <- bdf$ucb+0.1
Set up the plots:
p <- ggplot(bdf,aes(x=Day, y=Magnitude))
p <- p+ scale_y_reverse()+xlim(0,datarange)
p <- p+geom_line(aes(y=ucb),color="grey")
p <- p+geom_line(aes(y=lcb),color="grey")
pzig <- p+geom_line(aes(y=zig))
pjl <- p+geom_line(aes(y=jl))
First plot of the second Figure in the paper:
pzig
Second plot of the second Figure in the paper:
pjl
Load in R functions. Set the seed for reproducibility (may turn out differently on other OS).
source("gpr.R")
set.seed(123)
Set up the base plot:
p <- ggplot(agn, aes(mjd, mag)) + geom_point() + xlab("Day")+ylab("Magnitude")+scale_y_reverse()+xlim(0,datarange)
xj <- agn$mjd
yj <- agn$mag
Initialize the full Bayes fit process:
stanmod <- gpinit()
Run the chains:
medianresponse <- median(yj)
priorfun <- function(x) rep(medianresponse,length=length(x))
bgp <- gpfit(xj, yj, stanmod, priory=priorfun)
Plot the posterior mean at the median values of posteriors on the tuning parameters:
parext <- extract(bgp, pars=c("eta_sq", "rho_sq", "sigma_sq"))
xgrid <- creategrid(xj)
gpbm <- gpreg(xj, yj, grid=xgrid, invwid=2*median(parext$rho_sq), noisevar=median(parext$sigma_sq), sigvar=median(parext$eta_sq), priormean=priorfun(xgrid))
ldf <- data.frame(x=gpbm$grid, y=gpbm$predict)
p+geom_line(data=ldf, aes(x,y))
Plot the 95% smoothness bands:
gpbm <- gpreg(xj, yj, grid=xgrid, invwid=2*quantile(parext$rho_sq,0.025), noisevar=median(parext$sigma_sq), sigvar=median(parext$eta_sq), priormean=priorfun(xgrid))
ldf1 <- data.frame(x=gpbm$grid, y=gpbm$predict)
gpbm <- gpreg(xj, yj, grid=xgrid, invwid=2*quantile(parext$rho_sq,0.975), noisevar=median(parext$sigma_sq), sigvar=median(parext$eta_sq), priormean=priorfun(xgrid))
ldf2 <- data.frame(x=gpbm$grid, y=gpbm$predict)
p+geom_line(data=ldf1, aes(x,y),color='red')+geom_line(data=ldf2, aes(x,y),color='blue')
Set up and plot the data:
fossil <- read.table("fossil.dat",header=TRUE)
fossil$sr <- (fossil$sr-0.7)*100
p <- ggplot(fossil, aes(age, sr)) + geom_point() + xlab("Age")+ylab("Strontium Ratio")
xj <- fossil$age
yj <- fossil$sr
Run the chains:
medianresponse <- median(yj)
priorfun <- function(x) rep(medianresponse,length=length(x))
bgp <- gpfit(xj, yj, stanmod, priory=priorfun)
Plot the posterior mean at the median values of posteriors on the tuning parameters:
parext <- extract(bgp, pars=c("eta_sq", "rho_sq", "sigma_sq"))
xgrid <- creategrid(xj)
gpbm <- gpreg(xj, yj, grid=xgrid, invwid=2*median(parext$rho_sq), noisevar=median(parext$sigma_sq), sigvar=median(parext$eta_sq), priormean=priorfun(xgrid))
ldf <- data.frame(x=gpbm$grid, y=gpbm$predict)
p+geom_line(data=ldf, aes(x,y))
Plot the 95% smoothness bands:
gpbm <- gpreg(xj, yj, grid=xgrid, invwid=2*quantile(parext$rho_sq,0.025), noisevar=median(parext$sigma_sq), sigvar=median(parext$eta_sq), priormean=priorfun(xgrid))
ldf1 <- data.frame(x=gpbm$grid, y=gpbm$predict)
gpbm <- gpreg(xj, yj, grid=xgrid, invwid=2*quantile(parext$rho_sq,0.975), noisevar=median(parext$sigma_sq), sigvar=median(parext$eta_sq), priormean=priorfun(xgrid))
ldf2 <- data.frame(x=gpbm$grid, y=gpbm$predict)
p+geom_line(data=ldf1, aes(x,y),color='red')+geom_line(data=ldf2, aes(x,y),color='blue')