[1] "Created: Tue Jun 24 12:28:37 2014"
Load in the new data. Load code from previous work.
load("lc100k.rda")
row.names(lcdbi) <- lcdbi$id
lcdbi$id <- NULL
lcdbi$type <- factor(lcdbi$type)
source("funcs.R")
GPR <- TRUE
Set the kernel type to squared exponential and set the inverse width:
kerntype <- "exponential"
wvec <- 2e-4
summary(lcdbi)
nobs type
Min. : 41 nv:50000
1st Qu.: 95 tr:50000
Median :153
Mean :153
3rd Qu.:207
Max. :360
We see no short light curves.
Investigate the range of observation. Compute the date of the first observation:
fd <- sapply(lcdb, function(x) min(x$mjd))
min(fd)
[1] 53464
We see that 53464 is indeed the start date.
robv <- sapply(lcdb, function(x) max(x$mjd) - min(x$mjd))
max(robv)
[1] 2221
Seems we have substantially shorter range of observation in this dataset than the 2764 used previously.
Sort the lightcurves to be monotonely increasing in date and create the measurement grouping variable:
for(i in 1:length(lcdb)){
lc <- lcdb[[i]]
ii <- order(lc$mjd)
lc <- lc[ii,]
lc$measgrp <- factor(c(0,cumsum(ifelse(diff(lc$mjd) < 1, 0, 1))))
lcdb[[i]] <- lc
}
For each light curve, we compute - proportion of observations with magnitudes greater than 20.5, 21 and 21.5 - the maximum magnitude
dlmat <- matrix(NA,length(lcdb),4)
for(i in 1:length(lcdb)){
lc <- lcdb[[i]]
nobs <- nrow(lc)
dl1 <- sum(lc$mag > 20.5)/nobs
dl2 <- sum(lc$mag > 21)/nobs
dl3 <- sum(lc$mag > 21.5)/nobs
dl4 <- max(lc$mag)
dlmat[i,] <- c(dl1, dl2, dl3, dl4)
}
colnames(dlmat) <- c("d20.5","d21","d21.5","min")
dlmat <- data.frame(dlmat)
summary(dlmat)
d20.5 d21 d21.5 min
Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :12.1
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:17.8
Median :0.0000 Median :0.0000 Median :0.00000 Median :19.4
Mean :0.0402 Mean :0.0084 Mean :0.00002 Mean :19.1
3rd Qu.:0.0144 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:20.8
Max. :0.7073 Max. :0.4000 Max. :0.04878 Max. :23.7
Looks like 20.5 is a suitable detection limit here.
Now ready to compute the measure.
firstdate <- 53464
daterange <- 2222
detection.limit <- 20.5
lcdbi$id <- row.names(lcdbi)
uids <- lcdbi$id
nuids <- length(uids)
statmat <- NULL
types <- NULL
ids <- NULL
for(i in 1:nuids){
olc <- lcdb[[i]]
type <- as.character(lcdbi$type[i])
id <- as.character(lcdbi$id[i])
aa <- lcstats(olc,GPR,kerntype,wvec,daterange=daterange,firstdate=firstdate,detection.limit=detection.limit)
if(is.na(aa[1])) next
statmat <- rbind(statmat,aa)
types <- c(types,type)
ids <- c(ids,id)
}
colnames(statmat) <- c("meds", "iqr","shov", "maxdiff", "dscore","wander", "moveloc","nobs", "totvar", "quadvar", "famp", "fslope","trend",
"outl", "skewres", "std", "shapwilk", "lsd", "gscore", "mdev", "gtvar", "rm.amplitude", "rm.mean",
"rm.stddev", "rm.beyond1std", "rm.fpr20", "rm.fpr35", "rm.fpr50",
"rm.fpr65", "rm.fpr80", "rm.lintrend", "rm.maxslope", "rm.mad",
"rm.medbuf", "rm.pairslope", "rm.peramp", "rm.pdfp", "rm.skew",
"rm.kurtosis", "rm.std", "rm.rcorbor")
cmdb <- data.frame(statmat,type=types,id=ids,row.names=ids)
cmdb$lsd[is.na(cmdb$lsd) | is.infinite(cmdb$lsd)] <- min(cmdb$lsd[!is.infinite(cmdb$lsd)],na.rm=TRUE)-1
cmdb$fslope[is.infinite(cmdb$fslope)] <- 1
cmdb$wander <- NULL
cmdb$moveloc <- NULL
summary(cmdb)
meds iqr shov maxdiff
Min. :10.9 Min. :0.011 Min. :0.0079 Min. : 0.029
1st Qu.:17.2 1st Qu.:0.126 1st Qu.:0.1129 1st Qu.: 0.727
Median :18.4 Median :0.272 Median :0.2378 Median : 1.325
Mean :18.2 Mean :0.346 Mean :0.2900 Mean : 1.502
3rd Qu.:19.4 3rd Qu.:0.515 3rd Qu.:0.4404 3rd Qu.: 1.914
Max. :20.8 Max. :5.353 Max. :3.0197 Max. :14.938
dscore nobs totvar quadvar
Min. :0.0007 Min. : 41 Min. :0.00042 Min. :0.00000
1st Qu.:0.2026 1st Qu.: 95 1st Qu.:0.00735 1st Qu.:0.00010
Median :0.2402 Median :153 Median :0.01122 Median :0.00024
Mean :0.2412 Mean :153 Mean :0.01276 Mean :0.00047
3rd Qu.:0.2855 3rd Qu.:207 3rd Qu.:0.01603 3rd Qu.:0.00049
Max. :0.3919 Max. :360 Max. :0.16585 Max. :0.05561
famp fslope trend outl
Min. :0.025 Min. :0.0017 Min. :-1.31e-03 Min. : 1.65
1st Qu.:0.430 1st Qu.:0.0303 1st Qu.:-2.54e-05 1st Qu.: 3.05
Median :0.674 Median :0.0477 Median : 1.38e-05 Median : 3.88
Mean :0.785 Mean :0.0588 Mean : 1.26e-05 Mean : 4.51
3rd Qu.:0.981 3rd Qu.:0.0736 3rd Qu.: 5.53e-05 3rd Qu.: 5.33
Max. :9.461 Max. :0.8910 Max. : 1.12e-03 Max. :16.74
skewres std shapwilk lsd
Min. :-13.663 Min. :0.0077 Min. :0.142 Min. :-4.841
1st Qu.: -0.505 1st Qu.:0.1146 1st Qu.:0.891 1st Qu.:-2.168
Median : -0.056 Median :0.2307 Median :0.961 Median :-1.462
Mean : 0.051 Mean :0.2700 Mean :0.905 Mean :-1.616
3rd Qu.: 0.522 3rd Qu.:0.3944 3rd Qu.:0.982 3rd Qu.:-0.937
Max. : 14.940 Max. :2.4256 Max. :0.999 Max. : 1.014
gscore mdev gtvar rm.amplitude
Min. :0.000 Min. :0.018 Min. : 0.08 Min. :0.023
1st Qu.:0.290 1st Qu.:0.428 1st Qu.: 3.39 1st Qu.:0.533
Median :0.325 Median :0.742 Median : 7.34 Median :0.927
Mean :0.313 Mean :0.842 Mean : 9.51 Mean :1.019
3rd Qu.:0.344 3rd Qu.:1.061 3rd Qu.:14.26 3rd Qu.:1.312
Max. :0.397 Max. :9.954 Max. :91.82 Max. :8.028
rm.mean rm.stddev rm.beyond1std rm.fpr20
Min. :1.00e-08 Min. :0.00e+00 Min. :0.0039 Min. :0.0002
1st Qu.:2.00e-08 1st Qu.:0.00e+00 1st Qu.:0.1860 1st Qu.:0.1099
Median :5.00e-08 Median :0.00e+00 Median :0.2500 Median :0.1327
Mean :2.80e-07 Mean :3.00e-07 Mean :0.2349 Mean :0.1332
3rd Qu.:1.40e-07 3rd Qu.:0.00e+00 3rd Qu.:0.2935 3rd Qu.:0.1552
Max. :1.43e-04 Max. :1.13e-03 Max. :0.6117 Max. :0.8044
rm.fpr35 rm.fpr50 rm.fpr65 rm.fpr80
Min. :0.0003 Min. :0.0004 Min. :0.0006 Min. :0.0009
1st Qu.:0.2081 1st Qu.:0.3225 1st Qu.:0.4692 1st Qu.:0.6927
Median :0.2427 Median :0.3677 Median :0.5226 Median :0.7464
Mean :0.2399 Mean :0.3610 Mean :0.5109 Mean :0.7301
3rd Qu.:0.2749 3rd Qu.:0.4090 3rd Qu.:0.5706 3rd Qu.:0.7925
Max. :0.8568 Max. :0.8985 Max. :0.9342 Max. :0.9819
rm.lintrend rm.maxslope rm.mad rm.medbuf
Min. :-2.31e-03 Min. : 4 Min. :0.0054 Min. :0.491
1st Qu.:-1.90e-05 1st Qu.: 125 1st Qu.:0.0617 1st Qu.:1.000
Median : 2.06e-05 Median : 219 Median :0.1340 Median :1.000
Mean : 2.18e-05 Mean : 292 Mean :0.1708 Mean :0.996
3rd Qu.: 7.38e-05 3rd Qu.: 368 3rd Qu.:0.2562 3rd Qu.:1.000
Max. : 1.23e-03 Max. :3183 Max. :1.7287 Max. :1.000
rm.pairslope rm.peramp rm.pdfp rm.skew
Min. :0.233 Min. : 0 Min. : 0.0 Min. :-15.739
1st Qu.:0.467 1st Qu.: 1 1st Qu.: 0.3 1st Qu.: -0.809
Median :0.500 Median : 1 Median : 0.7 Median : 0.039
Mean :0.504 Mean : 30 Mean : 1.0 Mean : -0.158
3rd Qu.:0.533 3rd Qu.: 3 3rd Qu.: 1.3 3rd Qu.: 0.609
Max. :0.767 Max. :982923 Max. :789.1 Max. : 15.095
rm.kurtosis rm.std rm.rcorbor type
Min. : 1.13 Min. :0.0105 Min. :0.0000 nv:50000
1st Qu.: 3.39 1st Qu.:0.1427 1st Qu.:0.0000 tr:50000
Median : 5.04 Median :0.2712 Median :0.0000
Mean : 11.35 Mean :0.3191 Mean :0.0021
3rd Qu.: 10.49 3rd Qu.:0.4570 3rd Qu.:0.0000
Max. :267.98 Max. :2.7188 Max. :0.4035
id
1001004000494: 1
1001004000570: 1
1001004000817: 1
1001004001311: 1
1001004001396: 1
1001004002575: 1
(Other) :99994
for(i in 1:(ncol(cmdb)-2)){
plot(cmdb[,i] ~ cmdb$type, main=names(cmdb)[i])
}
cmdbig <- cmdb
save(cmdbig,file="featbig.rda")