From 61b90d5301ed2126b0737d803ab7fe5f411ed67c Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Wed, 9 Sep 2015 14:33:31 -0500 Subject: [PATCH 1/6] fixed naming --- R/window.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/window.R b/R/window.R index b76668b..807c319 100644 --- a/R/window.R +++ b/R/window.R @@ -33,7 +33,7 @@ auto.chunk.time <- function(data.in){ # finds natural breaks in time sequence of data data.in = as.data.frame(data.in[1:2]) - t.steps <- as.numeric(diff(data.in$DateTime)) + t.steps <- as.numeric(diff(data.in$times)) ###### re-write this!! MAD.norm <- MAD.values(t.steps) # deal with NAs? break.i <- MAD.norm > 2.5 @@ -48,10 +48,10 @@ auto.chunk.time <- function(data.in){ } } - block.df <- data.frame("windows"=block.int) + block.df <- data.frame("w"=block.int) windowed.data <- cbind(data.in,block.df) - windowed.data[['windows']][j+1]=blck.i + windowed.data[['w']][j+1]=blck.i return(windowed.data) } From 742bbe40b55878b6f5a8c1c2eb49a1c4d7892d2e Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Wed, 9 Sep 2015 14:33:48 -0500 Subject: [PATCH 2/6] adding dplyr and lazyeval --- DESCRIPTION | 4 +++- NAMESPACE | 3 +++ R/custom-functions.R | 6 +++++- R/expression-helpers.R | 26 ------------------------- tests/testthat/test-flag_logic.R | 4 ++-- tests/testthat/test-parse_expressions.R | 19 ------------------ 6 files changed, 13 insertions(+), 49 deletions(-) delete mode 100644 R/expression-helpers.R delete mode 100644 tests/testthat/test-parse_expressions.R diff --git a/DESCRIPTION b/DESCRIPTION index dece52b..508bbdd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,9 @@ Copyright: This software is in the public domain because it contains materials official USGS copyright policy at http://www.usgs.gov/visual-id/credit_usgs.html#copyright Imports: - yaml + yaml, + dplyr, + lazyeval Suggests: testthat LazyLoad: yes diff --git a/NAMESPACE b/NAMESPACE index 4d8f6ba..a72b65c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,9 +13,12 @@ S3method(window,sensor) export(MAD) export(flag) export(flag.data.frame) +export(persist) export(read) export(read.default) export(sensor) +importFrom(dplyr,mutate_) +importFrom(lazyeval,as.lazy) importFrom(stats,window) importFrom(tools,file_ext) importFrom(yaml,yaml.load_file) diff --git a/R/custom-functions.R b/R/custom-functions.R index 7070e68..7946c5b 100644 --- a/R/custom-functions.R +++ b/R/custom-functions.R @@ -49,7 +49,11 @@ MAD <- function(x, w){ } - +#' @export +persist <- function(x){ + tmp <- rle(x) + rep(tmp$lengths,times = tmp$lengths) +} call.cv <- function(data.in){ CV <- 100*sd(data.in)/mean(data.in) diff --git a/R/expression-helpers.R b/R/expression-helpers.R deleted file mode 100644 index 2ec4f1d..0000000 --- a/R/expression-helpers.R +++ /dev/null @@ -1,26 +0,0 @@ - -expr_fun <- function(expr){ - expr <- gsub("\\s","",expr) - return(strsplit(expr,split = '[()]')[[1]][1]) -} - -expr_var <- function(expr){ - expr <- gsub("\\s","",expr) - if (grepl(pattern = '[(]',expr)){ - return(split_vars(strsplit(expr,split = '[()]')[[1]][2])) - } else { - return(strsplit(expr,split = '[><=]')[[1]][1]) - } -} -split_vars <- function(x){ - strsplit(x, split = '[,]')[[1]] -} - -match.sqc.fun <- function(expr){ - fun = getAnywhere(expr_fun(expr)) - if (length(fun$objs) == 0) - NULL - else - fun = fun$objs[[1]] - return(fun) -} diff --git a/tests/testthat/test-flag_logic.R b/tests/testthat/test-flag_logic.R index 79d34b9..2ee97e2 100644 --- a/tests/testthat/test-flag_logic.R +++ b/tests/testthat/test-flag_logic.R @@ -11,7 +11,7 @@ test_that("is.na(x)", { values = c(1,3,2,3,4,5,5,5,4,3,5,NA,5,NA) sensor <- sensor(data.frame("DateTime"=dates,"sensor.obs"=values)) test_that("persistent", { - expect_equal(sum(calc_flags(sensor, 'n > 3', which.flagged=FALSE)), 0) - expect_equal(length(calc_flags(sensor, 'n > 2')), 3) + expect_equal(sum(calc_flags(sensor, 'persist(x) > 3', which.flagged=FALSE)), 0) + expect_equal(length(calc_flags(sensor, 'persist(x) > 2')), 3) expect_equal(length(calc_flags(sensor, 'is.na(x)')), 2) }) diff --git a/tests/testthat/test-parse_expressions.R b/tests/testthat/test-parse_expressions.R deleted file mode 100644 index d586fda..0000000 --- a/tests/testthat/test-parse_expressions.R +++ /dev/null @@ -1,19 +0,0 @@ -context("expression parsing") - -test_that("parse simple variable", { - - expr_var = sensorQC:::expr_var - - expect_equal(expr_var('MAD(x)'), 'x') - expect_equal(expr_var('x == 9'), 'x') - expect_equal(expr_var('y == 9'), 'y') -}) - -test_that("parse multiple variables", { - - expr_var = sensorQC:::expr_var - - expect_equal(expr_var('MAD(x)'), 'x') - expect_equal(expr_var('MAD(x,y)'), c('x','y')) - -}) From 28aaea8e01d5304495502fbf959be50cba423236 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Wed, 9 Sep 2015 14:48:48 -0500 Subject: [PATCH 3/6] updates to syntax --- README.Rmd | 4 +-- README.md | 72 +++++++++++++++++++++++++++--------------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/README.Rmd b/README.Rmd index a37dcd4..bc43c8c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -38,11 +38,11 @@ High-frequency aquatic sensor QAQC procedures. `sensorQC` imports data, and runs library(sensorQC) file <- system.file('extdata', 'test_data.txt', package = 'sensorQC') sensor <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M") -flag(sensor, 'x == 999999', 'n > 3', 'is.na(x)') +flag(sensor, 'x == 999999', 'persist(x) > 3', 'is.na(x)') ``` Use the `MAD` (median absolute deviation) test, and add `w` to the function call to specify "windows" (note, sensor must be windowed w/ `window()` prior to using `w`) ```{r} sensor = window(sensor, 'auto') -flag(sensor, 'x == 999999', 'n > 3', 'MAD(x,w) > 3', 'MAD(x) > 3') +flag(sensor, 'x == 999999', 'persist(x) > 3', 'MAD(x,w) > 3', 'MAD(x) > 3') ``` diff --git a/README.md b/README.md index de2c626..5a58d17 100644 --- a/README.md +++ b/README.md @@ -36,57 +36,57 @@ sensor <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M") ## number of observations:5100 ``` r -flag(sensor, 'x == 999999', 'n > 3', 'is.na(x)') +flag(sensor, 'x == 999999', 'persist(x) > 3', 'is.na(x)') ``` ## object of class "sensor" - ## DateTime sensor.obs - ## 1 2013-11-01 00:00:00 48.86 - ## 2 2013-11-01 00:00:01 49.04 - ## 3 2013-11-01 00:00:02 49.50 - ## 4 2013-11-01 00:00:03 48.91 - ## 5 2013-11-01 00:00:04 48.90 - ## 6 2013-11-01 00:00:05 48.96 - ## 7 2013-11-01 00:00:06 48.48 - ## 8 2013-11-01 00:00:07 48.97 - ## 9 2013-11-01 00:00:08 48.97 - ## 10 2013-11-01 00:00:09 48.99 - ## 11 2013-11-01 00:00:10 48.35 - ## 12 2013-11-01 00:00:11 48.51 - ## 13 2013-11-01 00:00:12 49.25 - ## 14 2013-11-01 00:00:13 48.82 - ## 15 2013-11-01 00:00:14 49.22 + ## times x + ## 1 2013-11-01 00:00:00 48.86 + ## 2 2013-11-01 00:00:01 49.04 + ## 3 2013-11-01 00:00:02 49.50 + ## 4 2013-11-01 00:00:03 48.91 + ## 5 2013-11-01 00:00:04 48.90 + ## 6 2013-11-01 00:00:05 48.96 + ## 7 2013-11-01 00:00:06 48.48 + ## 8 2013-11-01 00:00:07 48.97 + ## 9 2013-11-01 00:00:08 48.97 + ## 10 2013-11-01 00:00:09 48.99 + ## 11 2013-11-01 00:00:10 48.35 + ## 12 2013-11-01 00:00:11 48.51 + ## 13 2013-11-01 00:00:12 49.25 + ## 14 2013-11-01 00:00:13 48.82 + ## 15 2013-11-01 00:00:14 49.22 ## ... ## x == 999999 (15 flags) - ## n > 3 (4 flags) + ## persist(x) > 3 (4 flags) ## is.na(x) (0 flags) Use the `MAD` (median absolute deviation) test, and add `w` to the function call to specify "windows" (note, sensor must be windowed w/ `window()` prior to using `w`) ``` r sensor = window(sensor, 'auto') -flag(sensor, 'x == 999999', 'n > 3', 'MAD(x,w) > 3', 'MAD(x) > 3') +flag(sensor, 'x == 999999', 'persist(x) > 3', 'MAD(x,w) > 3', 'MAD(x) > 3') ``` ## object of class "sensor" - ## DateTime sensor.obs - ## 1 2013-11-01 00:00:00 48.86 - ## 2 2013-11-01 00:00:01 49.04 - ## 3 2013-11-01 00:00:02 49.50 - ## 4 2013-11-01 00:00:03 48.91 - ## 5 2013-11-01 00:00:04 48.90 - ## 6 2013-11-01 00:00:05 48.96 - ## 7 2013-11-01 00:00:06 48.48 - ## 8 2013-11-01 00:00:07 48.97 - ## 9 2013-11-01 00:00:08 48.97 - ## 10 2013-11-01 00:00:09 48.99 - ## 11 2013-11-01 00:00:10 48.35 - ## 12 2013-11-01 00:00:11 48.51 - ## 13 2013-11-01 00:00:12 49.25 - ## 14 2013-11-01 00:00:13 48.82 - ## 15 2013-11-01 00:00:14 49.22 + ## times x + ## 1 2013-11-01 00:00:00 48.86 + ## 2 2013-11-01 00:00:01 49.04 + ## 3 2013-11-01 00:00:02 49.50 + ## 4 2013-11-01 00:00:03 48.91 + ## 5 2013-11-01 00:00:04 48.90 + ## 6 2013-11-01 00:00:05 48.96 + ## 7 2013-11-01 00:00:06 48.48 + ## 8 2013-11-01 00:00:07 48.97 + ## 9 2013-11-01 00:00:08 48.97 + ## 10 2013-11-01 00:00:09 48.99 + ## 11 2013-11-01 00:00:10 48.35 + ## 12 2013-11-01 00:00:11 48.51 + ## 13 2013-11-01 00:00:12 49.25 + ## 14 2013-11-01 00:00:13 48.82 + ## 15 2013-11-01 00:00:14 49.22 ## ... ## x == 999999 (15 flags) - ## n > 3 (4 flags) + ## persist(x) > 3 (4 flags) ## MAD(x,w) > 3 (129 flags) ## MAD(x) > 3 (91 flags) From 6ff8dda6c0305379e3ead2641c8ef194bc77e77b Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Wed, 9 Sep 2015 14:49:02 -0500 Subject: [PATCH 4/6] dplyr --- NAMESPACE | 2 ++ R/custom-functions.R | 14 +++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a72b65c..1b9e256 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,8 @@ export(persist) export(read) export(read.default) export(sensor) +importFrom(dplyr,"%>%") +importFrom(dplyr,group_by_) importFrom(dplyr,mutate_) importFrom(lazyeval,as.lazy) importFrom(stats,window) diff --git a/R/custom-functions.R b/R/custom-functions.R index 7946c5b..5fbb97c 100644 --- a/R/custom-functions.R +++ b/R/custom-functions.R @@ -17,17 +17,8 @@ MAD.values <- function(vals, b = 1.4826){ MAD.windowed <- function(vals, windows){ stopifnot(length(vals) == length(windows)) - # what is the underlying distribution? (important for assigning "b") - - MAD.out <- vector(length=length(vals)) - un.win <- unique(windows) - - for (i in 1:length(un.win)){ - win.i <- un.win[i] - val.i <- windows == win.i - MAD.out[val.i] = MAD.values(vals[val.i]) - } - return(MAD.out) + mad <- group_by_(data.frame(x=vals,w=windows), 'w') %>% mutate_(mad='sensorQC:::MAD.values(x)') %>% .$mad + return(mad) } #'@title median absolute deviation outlier test #'@name MAD @@ -37,6 +28,7 @@ MAD.windowed <- function(vals, windows){ #'@param windows vector of equal length to x specifying windows #'@return a vector of MAD normalized values relative to an undefined rejection criteria (usually 2.5 or 3). #'@keywords MAD +#'@importFrom dplyr group_by_ mutate_ %>% #'@author #'Jordan S. Read #'@export From ffd744d626c881c30a815eaf12ca932c58b458a5 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Wed, 9 Sep 2015 14:49:19 -0500 Subject: [PATCH 5/6] simplifying w/ dplyr --- R/flag_functions.R | 46 +++++++--------------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/R/flag_functions.R b/R/flag_functions.R index 0583b45..a156f9a 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -2,50 +2,18 @@ calc_flags <- function(x, ...){ UseMethod('calc_flags') } +#' @importFrom dplyr mutate_ +#' @importFrom lazyeval as.lazy #' @export calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){ - flags <- sqc(expr=expr, vals=values(sensor), windows=windows(sensor)) + + flags <- mutate_(sensor$sensor[-1], flags = lazyeval::as.lazy(expr, globalenv()))$flags + if (!inherits(flags, 'logical')) + stop(expr,' failed to generate booleans') + #check for class of flags if (which.flagged) return(which(flags)) else return(flags) } - - -sqc <- function(expr, vals, windows, ...){ - - expr = tryCatch({ - parse(text = expr) - }, error = function(e) { - stop(paste0('error evaluation expression ',expr)) - }) - - vals = set.args(expr, vals, windows) - - flags <- eval(expr, envir=vals) - - return(flags & is.finite(flags) & !is.na(flags)) -} - -set.args <- function(expr, vals, windows){ - val.call <- function(x){ - do.call(paste0('to.',x), list(vals=vals, windows=windows)) - } - arg.names = expr_var(expr) - args = sapply(arg.names, val.call) - return(setNames(args, arg.names)) -} - -to.n <- function(vals, ...){ - tmp <- rle(vals) - list('n'=rep(tmp$lengths,times = tmp$lengths)) -} - -to.x <- function(vals, ...){ - list('x'=vals) -} - -to.w <- function(..., windows){ - list('w'=windows) -} From 1462961f6148afd7d9a851f1d8ce300585c7a9de Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Wed, 9 Sep 2015 14:49:28 -0500 Subject: [PATCH 6/6] names --- R/read.R | 2 +- R/sensor-class.R | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R/read.R b/R/read.R index b2de123..2aefb14 100644 --- a/R/read.R +++ b/R/read.R @@ -93,7 +93,7 @@ read.wide_burst <- function(file,date.format){ } date.vec <- head(date.vec,cnt-1) sens.vec <- head(sens.vec,cnt-1) - data.out <- data.frame('DateTime'=date.vec, 'sensor.obs'=sens.vec) + data.out <- data.frame('DateTime'=date.vec, 'x'=sens.vec) # should we also return metadata? return(data.out) diff --git a/R/sensor-class.R b/R/sensor-class.R index 7e335e4..350a550 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -5,6 +5,7 @@ sensor <- function(x, flag.defs, ...){ #' @export sensor.data.frame <- function(x, flag.defs = NULL, ...){ + names(x) <- c('times','x','w')[seq_len(length(names(x)))] sensor = list(sensor=x) flags = define_flags(flag.defs,...) if (!is.null(flags))