[1] "Created: Wed Jun 25 09:34:12 2014"
Load in the new version of the original data. Load code from previous work.
load("oldtranrev.rda")
source("funcs.R")
GPR <- TRUE
Set the kernel type to squared exponential and set the inverse width:
kerntype <- "exponential"
wvec <- 2e-4
Use the same class labels as previously.
levels(lcdbi$type) <- c("agn","blazar","cv","flare","sn")
Investigate the range of observation. Compute the date of the first observation:
fd <- sapply(lcdb, function(x) min(x$mjd))
plot(fd, ylab="MJD")
head(sort(fd))
[1] 63.44 63.44 63.44 63.44 63.44 63.44
MJDs have been offset by 53249 so the actual start date is 53249+63 which is 53255.
Now consider the range of observation:
robv <- sapply(lcdb, function(x) max(x$mjd) - min(fd))
max(robv)
[1] 3500
plot(robv, ylab="Range MJD")
abline(h=max(robv))
Upper bound on range of 3501 days looks good for this data.
Create the measurement grouping variable
for(i in 1:length(lcdb)){
lc <- lcdb[[i]]
lc$measgrp <- factor(c(0,cumsum(ifelse(diff(lc$mjd) < 1, 0, 1))))
lcdb[[i]] <- lc
}
For each light curve, we compute - proportion of observations with magnitudes greater than 20.5, 21 and 21.5 - the maximum magnitude
dlmat <- matrix(NA,length(lcdb),4)
for(i in 1:length(lcdb)){
lc <- lcdb[[i]]
nobs <- nrow(lc)
dl1 <- sum(lc$mag > 20.5)/nobs
dl2 <- sum(lc$mag > 21)/nobs
dl3 <- sum(lc$mag > 21.5)/nobs
dl4 <- max(lc$mag)
dlmat[i,] <- c(dl1, dl2, dl3, dl4)
}
colnames(dlmat) <- c("d20.5","d21","d21.5","min")
dlmat <- data.frame(dlmat)
summary(dlmat)
d20.5 d21 d21.5 min
Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :14.6
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:20.1
Median :0.0714 Median :0.0057 Median :0.00000 Median :21.0
Mean :0.1393 Mean :0.0600 Mean :0.00256 Mean :20.6
3rd Qu.:0.2345 3rd Qu.:0.0947 3rd Qu.:0.00000 3rd Qu.:21.3
Max. :0.8095 Max. :0.6000 Max. :0.20000 Max. :24.2
A detection limit of 20.5 as used on the original data seems unreasonable here given the significant proportion of observations that go below this limit.
Now ready to compute the measures (using a detection limit of 21.5)
firstdate <- 63
daterange <- 3501
detection.limit <- 21.5
uids <- lcdbi$id
nuids <- length(uids)
statmat <- NULL
types <- NULL
ids <- NULL
for(i in 1:nuids){
olc <- lcdb[[i]]
type <- as.character(lcdbi$type[i])
id <- as.character(lcdbi$id[i])
aa <- lcstats(olc,GPR,kerntype,wvec,daterange=daterange,firstdate=firstdate,detection.limit=detection.limit)
if(is.na(aa[1])) next
statmat <- rbind(statmat,aa)
types <- c(types,type)
ids <- c(ids,id)
}
colnames(statmat) <- c("meds", "iqr","shov", "maxdiff", "dscore","wander", "moveloc","nobs", "totvar", "quadvar", "famp", "fslope","trend",
"outl", "skewres", "std", "shapwilk", "lsd", "gscore", "mdev", "gtvar", "rm.amplitude", "rm.mean",
"rm.stddev", "rm.beyond1std", "rm.fpr20", "rm.fpr35", "rm.fpr50",
"rm.fpr65", "rm.fpr80", "rm.lintrend", "rm.maxslope", "rm.mad",
"rm.medbuf", "rm.pairslope", "rm.peramp", "rm.pdfp", "rm.skew",
"rm.kurtosis", "rm.std", "rm.rcorbor")
cmdb <- data.frame(statmat,type=types,id=ids,row.names=ids)
cmdb$lsd[is.na(cmdb$lsd) | is.infinite(cmdb$lsd)] <- min(cmdb$lsd[!is.infinite(cmdb$lsd)],na.rm=TRUE)-1
cmdb$fslope[is.infinite(cmdb$fslope)] <- 1
cmdb$wander <- NULL
cmdb$moveloc <- NULL
summary(cmdb)
meds iqr shov maxdiff
Min. :13.5 Min. :0.040 Min. :0.0564 Min. :0.287
1st Qu.:18.4 1st Qu.:0.598 1st Qu.:0.3267 1st Qu.:1.556
Median :19.2 Median :0.825 Median :0.4503 Median :2.149
Mean :19.0 Mean :1.010 Mean :0.4643 Mean :2.488
3rd Qu.:19.9 3rd Qu.:1.232 3rd Qu.:0.5649 3rd Qu.:3.187
Max. :21.1 Max. :5.454 Max. :1.5393 Max. :7.074
dscore nobs totvar quadvar
Min. :0.0000 Min. : 5 Min. :0.0013 Min. :0.0000
1st Qu.:0.0598 1st Qu.: 20 1st Qu.:0.0262 1st Qu.:0.0023
Median :0.0915 Median : 61 Median :0.0389 Median :0.0046
Mean :0.0932 Mean :127 Mean :0.0597 Mean :0.0171
3rd Qu.:0.1208 3rd Qu.:233 3rd Qu.:0.0706 3rd Qu.:0.0142
Max. :0.2960 Max. :550 Max. :0.8649 Max. :1.7567
famp fslope trend outl
Min. : 0.19 Min. :0.015 Min. :-7.27e-04 Min. : 1.10
1st Qu.: 1.74 1st Qu.:0.177 1st Qu.:-6.42e-05 1st Qu.: 2.21
Median : 2.55 Median :0.288 Median : 1.06e-05 Median : 2.84
Mean : 3.28 Mean :0.391 Mean : 6.50e-06 Mean : 3.21
3rd Qu.: 4.09 3rd Qu.:0.502 3rd Qu.: 7.60e-05 3rd Qu.: 3.69
Max. :41.15 Max. :4.961 Max. : 7.39e-04 Max. :14.46
skewres std shapwilk lsd
Min. :-3.131 Min. :0.0339 Min. :0.387 Min. :-3.479
1st Qu.:-0.480 1st Qu.:0.2808 1st Qu.:0.902 1st Qu.:-1.738
Median :-0.127 Median :0.3870 Median :0.952 Median :-1.268
Mean :-0.014 Mean :0.4329 Mean :0.925 Mean :-1.392
3rd Qu.: 0.307 3rd Qu.:0.5112 3rd Qu.:0.976 3rd Qu.:-0.979
Max. : 8.590 Max. :1.8424 Max. :0.997 Max. :-0.239
gscore mdev gtvar rm.amplitude
Min. :0.0000 Min. :0.042 Min. : 0.06 Min. :0.16
1st Qu.:0.0738 1st Qu.:0.410 1st Qu.: 5.36 1st Qu.:1.22
Median :0.1541 Median :0.694 Median : 14.70 Median :1.52
Mean :0.1583 Mean :0.734 Mean : 22.05 Mean :1.68
3rd Qu.:0.2451 3rd Qu.:0.993 3rd Qu.: 30.32 3rd Qu.:2.09
Max. :0.3959 Max. :3.960 Max. :163.52 Max. :4.05
rm.mean rm.stddev rm.beyond1std rm.fpr20
Min. :5.00e-09 Min. :2.20e-09 Min. :0.0209 Min. :0.0008
1st Qu.:1.50e-08 1st Qu.:8.80e-09 1st Qu.:0.2041 1st Qu.:0.0730
Median :3.00e-08 Median :1.90e-08 Median :0.2902 Median :0.1301
Mean :7.80e-08 Mean :9.10e-08 Mean :0.2845 Mean :0.1527
3rd Qu.:6.90e-08 3rd Qu.:6.63e-08 3rd Qu.:0.3582 3rd Qu.:0.1894
Max. :4.67e-06 Max. :2.76e-06 Max. :0.6364 Max. :0.8947
rm.fpr35 rm.fpr50 rm.fpr65 rm.fpr80
Min. :0.0019 Min. :0.0031 Min. :0.0063 Min. :0.0217
1st Qu.:0.1570 1st Qu.:0.2669 1st Qu.:0.4354 1st Qu.:0.6912
Median :0.2522 Median :0.3982 Median :0.5776 Median :0.8035
Mean :0.2688 Mean :0.3983 Mean :0.5528 Mean :0.7519
3rd Qu.:0.3529 3rd Qu.:0.5245 3rd Qu.:0.7237 3rd Qu.:0.9000
Max. :0.9576 Max. :0.9719 Max. :0.9842 Max. :0.9956
rm.lintrend rm.maxslope rm.mad rm.medbuf
Min. :-0.09171 Min. : 1.2 Min. :0.0225 Min. :0.389
1st Qu.:-0.00015 1st Qu.: 14.8 1st Qu.:0.2774 1st Qu.:0.914
Median : 0.00003 Median : 26.2 Median :0.3894 Median :0.994
Mean : 0.00261 Mean : 28.7 Mean :0.4446 Mean :0.942
3rd Qu.: 0.00041 3rd Qu.: 38.9 3rd Qu.:0.5507 3rd Qu.:1.000
Max. : 0.20396 Max. :456.0 Max. :2.0051 Max. :1.000
rm.pairslope rm.peramp rm.pdfp rm.skew
Min. :0.143 Min. : 0.2 Min. : 0.14 Min. :-9.391
1st Qu.:0.467 1st Qu.: 1.1 1st Qu.: 1.34 1st Qu.:-0.855
Median :0.500 Median : 2.6 Median : 1.86 Median : 0.002
Mean :0.516 Mean : 14.3 Mean : 6.48 Mean :-0.300
3rd Qu.:0.567 3rd Qu.: 8.8 3rd Qu.: 3.78 3rd Qu.: 0.559
Max. :1.000 Max. :852.1 Max. :264.48 Max. : 2.920
rm.kurtosis rm.std rm.rcorbor type
Min. : 0.78 Min. :0.113 Min. :0.0000 agn :139
1st Qu.: 1.91 1st Qu.:0.557 1st Qu.:0.0000 blazar:124
Median : 2.68 Median :0.730 Median :0.0000 cv :458
Mean : 4.87 Mean :0.819 Mean :0.0423 flare : 64
3rd Qu.: 4.46 3rd Qu.:1.011 3rd Qu.:0.0438 sn :533
Max. :118.19 Max. :2.886 Max. :0.4815
id
1001081010294126339: 1
1001081010294129406: 1
1001081010444113640: 1
1001081090174129448: 1
1001081090204115993: 1
1001121150154120180: 1
(Other) :1312
mnames <- names(cmdb)
for(i in 1:(ncol(cmdb)-2)){
vname <- mnames[i]
tranf <- functran[[match(vname,names(functran))]]
y <- sapply(cmdb[,i], tranf)
ylab <- ifelse(tranf=="identity",mnames[i],paste0(tranf,"(",mnames[i],")"))
plot(cmdb[,i] ~ cmdb$type, ylab=ylab, xlab="Type")
}
cmd5 <- cmdb
save(cmd5,file="featrev5.rda")