diff --git a/.gitignore b/.gitignore index 1db65d8..315892e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,12 @@ .R* *.*~ .Rproj.user + +# Local package copy for autotest +.local/* + +# Tmp directory +tmp/* + +# Ignore plots created by testing functionality +tests/testthat/*.pdf \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 7a262d3..04e9bc6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,10 +12,10 @@ LazyLoad: yes Depends: plyr, reshape2, + ggplot2, isotopia Suggests: testthat, - ggplot2, gridExtra, xlsx Roxygen: list(wrap = FALSE) @@ -28,6 +28,8 @@ Collate: 'IrmsContinuousFlowDataClass.R' 'IrmsDualInletDataClass.R' 'IsodatFileClass.R' + 'IsodatDualInletFileClass.R' + 'IsodatClumpedCO2FileClass.R' 'IsodatHydrogenContinuousFlowFileClass.R' 'export.R' 'isoread.R' diff --git a/Makefile b/Makefile index 7a8f9f4..a4fc167 100644 --- a/Makefile +++ b/Makefile @@ -27,3 +27,13 @@ check: build cd ..;\ R CMD check $(PKGNAME)_$(PKGVERS).tar.gz --as-cran +local-install: + rm -rf .local + mkdir .local + R CMD Install --library=.local . + +autotest: local-install + + R -q -e "library(isoread, lib.loc = '.local')" \ + -e "library(testthat)" \ + -e "auto_test_package(pkg='.')" diff --git a/NAMESPACE b/NAMESPACE index 350f6c6..3347aae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.0.0): do not edit by hand +# Generated by roxygen2 (4.0.2): do not edit by hand export(export_data) export(isoread) @@ -9,4 +9,6 @@ export(reload) export(summarize_all) exportClasses(BinaryFile) exportClasses(IrmsContinuousFlowData) +exportClasses(IsodatClumpedCO2File) +exportClasses(IsodatDualInletFile) exportClasses(IsodatHydrogenContinuousFlowFile) diff --git a/R/BinaryFileClass.R b/R/BinaryFileClass.R index 9613a1d..807abcc 100644 --- a/R/BinaryFileClass.R +++ b/R/BinaryFileClass.R @@ -83,6 +83,8 @@ BinaryFile <- setRefClass( return(read) }, + # IMPLEMENT skip_after_each for easy recuriing motive recognition + # current problem with that is that it loops through by type (=col) parse_array = function(types, n, id = NA, skip_first = 0) { "repeatedly read the same set of information into a data frame @@ -124,40 +126,48 @@ BinaryFile <- setRefClass( pos <<- as.integer(pos + nbyte) }, - find_key = function(pattern, occurence = 1) { - "find a key by a regexp pattern" + find_key = function(pattern, occurence = NULL, fixed = FALSE, byte_min = 0, byte_max = length(rawdata)) { + "finds all keys matching 'key' or a specific occurence of it (use -1 for last occurence) + #' @param fixed whether to find the key(s) by regexp match or fixed string (default = pattern) + #' @param byte_min only look for keys that start after this position + #' @param byte_max only look for keys that start before this position + #' @return the lines of the keys data frame with all the information about the found key(s)" if (nrow(keys) == 0) stop("no keys available, make sure load() was called") - if (nrow(match <- keys[grep(pattern, keys$value),]) == 0) - stop("pattern '", pattern, "' was not found") + sub_keys <- subset(keys, byteStart > byte_min & byteStart < byte_max) + if (nrow(sub_keys) == 0) + stop("no keys in this byte interval: ", byte_min, " - ", byte_max) - if (occurence == -1) occurence <- nrow(match) + if (length(idx <- grep(pattern, sub_keys$value, fixed = fixed)) == 0) + stop("key '", pattern, "' was not found") - if (occurence > nrow(match)) - stop("pattern '", pattern, "' was found but only has ", nrow(match), " occurences ", - "(trying to select occurence #", occurence, ")") + if (!is.null(occurence)) { + if (occurence == -1) occurence <- length(idx) - return(match[occurence, "value"]) + if (occurence > length(idx)) + stop("key '", key, "' was found but only has ", length(idx), " occurences ", + "(trying to select occurence #", occurence, ")") + } else { + occurence <- 1:length(idx) # return ALL found occurences + } + + return(sub_keys[idx[occurence], , drop=F]) }, - move_to_key = function(key, occurence = 1) { - "moves position to the end of a specific occurence of a key (use -1 for last occurence)" - - if (nrow(keys) == 0) - stop("no keys available, make sure load() was called") - - if (nrow(match <- subset(keys, value==key)) == 0) - stop("key '", key, "' was not found") - - if (occurence == -1) occurence <- nrow(match) - - if (occurence > nrow(match)) - stop("key '", key, "' was found but only has ", nrow(match), " occurences ", - "(trying to select occurence #", occurence, ")") - - pos <<- as.integer(match[occurence, "byteEnd"]) + 1L + move_to_key = function(key, occurence = 1, fixed = TRUE) { + "moves position to the end of a specific key or occurence of a key + #' @param key either a string or a data.frame line with key value and byteEnd (the way it is returned by find_key) + #' @param occurence if key is a string, which occurence to move to? (use -1 for last occurence) + #' @param fixed whether to find the key (if a string) by regexp match or fixed string (default = fixed string)" + + if (is(key, "character")) key <- find_key(key, occurence, fixed) + else if (is(key, "list")) key <- as.data.frame(key) + + if (!is(key, "data.frame") || nrow(key) != 1 || !("byteEnd" %in% names(key))) + stop("not a valid key entry, can't move there: ", key) + pos <<- as.integer(key["byteEnd"]) + 1L }, read_file = function(){ @@ -278,7 +288,10 @@ BinaryFile <- setRefClass( rbind( data.frame(Property = c("File location", "Date"), Value = c(file.path(filepath, filename), format(creation_date))), - data.frame(Property = names(data), Value = vapply(data, as.character, FUN.VALUE = character(1), USE.NAMES = FALSE)) + data.frame(Property = names(data), + Value = vapply(data, + function(i) if (is(i, "data.frame")) "data frame (not shown)" else as.character(i[1]), + FUN.VALUE = character(1), USE.NAMES = FALSE)) ) }, diff --git a/R/IrmsContinuousFlowDataClass.R b/R/IrmsContinuousFlowDataClass.R index 68520fa..f626da3 100644 --- a/R/IrmsContinuousFlowDataClass.R +++ b/R/IrmsContinuousFlowDataClass.R @@ -56,13 +56,15 @@ IrmsContinousFlowData <- setRefClass( # DATA CHECKS ============================ - check_data = function(...) { - "check the data consistency, calls \\code{check_crom_data} and \\code{check_peak_table}" - callSuper(...) + # FIXME: refactor such that check_chrom_data is renmaed directly to check_mass_data + check_mass_data = function(...) { check_chrom_data(...) - check_peak_table(...) }, + # FIXME: refactor such that check_data_table is renamed directly to check_peak_table + check_data_table = function(...) { + check_peak_table(...) + }, check_chrom_data = function(masses = names(.self$plotOptions$masses), ratios = names(.self$plotOptions$ratios), ..., warn = TRUE) { @@ -139,6 +141,11 @@ IrmsContinousFlowData <- setRefClass( # DATA RETRIEVAL ============== + # FIXME: refactor such that check_chrom_data is renmaed directly to check_mass_data + get_data_table = function(...){ + get_peak_table(...) + }, + get_peak_table = function(type = c("ref", "data", "both")) { "retrieve the peak table" diff --git a/R/IrmsDataClass.R b/R/IrmsDataClass.R index 3a630ba..492aff0 100644 --- a/R/IrmsDataClass.R +++ b/R/IrmsDataClass.R @@ -5,7 +5,10 @@ IrmsData <- setRefClass( "IrmsData", fields = list ( - plotOptions = 'list' + plotOptions = 'list', # stores options for plotting the data + massData = 'data.frame', # stores raw data for all measured masses (e.g. voltages) + dataTable = 'data.frame', # stores processed data table (=data summary) + dataTableColumns = 'data.frame' # the columns of the data table ), methods = list( #' constructor @@ -16,7 +19,19 @@ IrmsData <- setRefClass( init_irms_data = function() { "initialize irms data container" - plotOptions <<- list() + + # template for plot options + plotOptions <<- list( + masses = list() # example entry: mass46 = list(label = "Mass 46", color="black") + ) + + # template for dataTableColumn definitions + # data - name of the column header for in the data + # column - name of the column stored in the data table + # units - units of the data are in + # type - which mode it is (character, numeric, logical, Ratio, Abundance, Delta, etc.) + # show - whether to show this column in standard data table outputs + dataTableColumns <<- data.frame(data = character(), column = character(), units = character(), type = character(), show = logical(), stringsAsFactors = FALSE) }, #' @example setSettings(a=5, b='test', ...) @@ -35,11 +50,43 @@ IrmsData <- setRefClass( plotOptions <<- modifyList(plotOptions, options) }, + # DATA CHECKS ============================ + #' check internal consistency of data check_data = function(...) { + check_mass_data(...) + check_data_table(...) + }, + + check_mass_data = function(...) { + "checks the consistency of the raw mass data" + }, + + check_data_table = function(...) { + "checks the consistency of the table data" }, + # DATA RETRIEVAL ============== + + #' get data for masses + #' @param masses which masses to retrieve, all defined ones by default + #' @param melt whether to melt the data frame + get_mass_data = function(masses = names(.self$plotOptions$masses), melt = FALSE, ...) { + "get the mass trace data for specific masses, can be provided in \\code{melt = TRUE} long format + for easy use in ggplot style plotting" + stop("not implemented for this class") + }, + + get_data_table = function(...) { + "retrieve the data table" + stop("not implemented for this class") + }, + + # PLOTTING =================== + + #' plot data plot = function(...) { + "plot data with standard plot functions (fast) to standard output" stop("not implemented for this class") }, diff --git a/R/IrmsDualInletDataClass.R b/R/IrmsDualInletDataClass.R index eb6c67e..f23198a 100644 --- a/R/IrmsDualInletDataClass.R +++ b/R/IrmsDualInletDataClass.R @@ -9,5 +9,161 @@ IrmsDualInletData <- setRefClass( "IrmsDualInletData", contains = "IrmsData", fields = list (), - methods = list() + methods = list( + + #' initialize irms data container + init_irms_data = function(){ + callSuper() + + # default plot options + set_plot_options( + labels = list(xmasses = "Cycle", ymasses = "Signal [mV]") # default mass data plot labels + ) + + # if overwriting default in derived classes, make sure to define the cycle column in dataTableColumn definitions! + # (but it can be set to show = FALSE if desired) + # dataTableColumns <<- data.frame(data = "cycle", column = "cycle", units = "", type = "integer", show = TRUE, stringsAsFactors = FALSE) + }, + + # DATA CHECKS ============================ + + check_mass_data = function(...) { + # check if the masses defined in plotOptions actually exist + if (ncol(massData) == 0) + stop("No raw data is loaded. Make sure to run load() to load all data from the file.") + + missing <- setdiff( + names(.self$plotOptions$masses), + grep("mass\\d+", names(massData), value = T)) + + if ( length(missing) > 0 ) + stop("Not all masses appear to be recorded in this file, missing: ", paste(missing, collapse = ", ")) + + return(TRUE) + }, + + check_data_table = function(...) { + # checks the consistency of the data table and converts data types if necessary + # by default, checks all columns defined in dataTableColumns + + if (ncol(dataTable) == 0) + stop("No data table is loaded. Make sure to run load() to load all data from the file.") + + # check for existence of all columns + if (length(missing <- setdiff(dataTableColumns$column, names(dataTable))) > 0) { + # for the missing columns, try to find and convert the original data column names to the dataTable names (easier to access) + ptc_indices <- which(dataTableColumns$column %in% missing) # indices of missing columns in dataTableColumns + if (length(missing <- setdiff(dataTableColumns$data[ptc_indices], names(dataTable))) > 0) + stop("Some data columns ('", paste(missing, collapse = ", ") ,"') do not exist in the loaded dataTable.") + + # change original column names to new name + pt_cols <- sapply(dataTableColumns$data[ptc_indices], function(i) which(names(dataTable) == i), simplify = TRUE) + names(dataTable)[pt_cols] <<- dataTableColumns$column[ptc_indices] + } + + # bring data table columns into right order + dataTable <<- dataTable[dataTableColumns$column] + + # check for proper class and convert if necessary + if (any(types <- (sapply(dataTable, class, simplify=T) != dataTableColumns$type))) { + ptc_indices <- which(types) # indices of the columns to convert + + for (i in ptc_indices) { + value <- convert_data( + value = dataTable[[dataTableColumns$column[i]]], + data_type = dataTableColumns$type[i]) + dataTable[[dataTableColumns$column[i]]] <<- value + } + } + }, + + convert_data = function(value, data_type) { + "function converts data table entries to their appropriate data types - overwrite in derived classes for more specialized behaviour" + suppressWarnings( + try(switch( + data_type, + "integer" = as.integer(value), + "character" = as.character(value), + "numeric" = as.numeric(value), + "logical" = as.logical(value), + stop("data type not supported: ", data_type)), + TRUE)) + }, + + # DATA RETRIEVAL =========== + + get_mass_data = function(masses = names(.self$plotOptions$masses), melt = FALSE) { + check_mass_data() + + if (length(missing <- setdiff(masses, names(massData))) > 0) + stop("Some masses ('", paste(missing, collapse = ", ") ,"') do not exist in the loaded massData.") + + if (!melt) # wide format + return(massData[c("analysis", "cycle", masses)]) + else # long format + return(melt(massData[c("analysis", "cycle", masses)], + .(analysis, cycle), variable.name = "mass", value.name = "intensity")) + }, + + #' by default, returns all data table columns that are enabled with show = TRUE + #' @param summarize whether to show whole data table or just the summary + get_data_table = function(select = default_select(), summarize = FALSE) { + + default_select <- function() { + dataTableColumns$column[dataTableColumns$show] + } + + check_data_table() + + if (length(missing <- setdiff(select, names(dataTable))) > 0) + stop("Some data ('", paste(missing, collapse = ", ") ,"') do not exist in the loaded dataTable.") + + if (!summarize) + return(dataTable[select]) + + # summarize data table + select <- select[select != "cycle"] # exclude cycle form the summary (since it gets summarized) + summary <- ddply(melt(dataTable[select], id.vars = NULL, variable.name = "Variable"), .(Variable), + plyr:::summarize, + Mean = mean(value), + `Std. Devi.` = sd(value), + `Std. Error.` = `Std. Devi.`/sqrt(length(value))) + return(summary) + }, + + # PLOTTING =================== + + plot = function(masses = names(.self$plotOptions$masses), ...) { + stop("not implemented yet") + }, + + #' ggplot data + make_ggplot = function(masses = names(.self$plotOptions$masses), ...) { + library(ggplot2) + + plot.df <- get_mass_data(masses = masses, melt = T) # pass masses FIXME + plot.df <- merge(plot.df, data.frame( + mass = names(plotOptions$masses), + mass_label = sapply(plotOptions$masses, function(x) x$label)), by="mass") + x_breaks <- seq(min(plot.df$cycle), max(plot.df$cycle), by=1) + + p <- ggplot(plot.df, + aes(cycle, intensity, shape = analysis, linetype = analysis, fill = mass)) + + geom_line(colour = "black") + + geom_point(colour = "black") + + scale_x_continuous(breaks = x_breaks) + + scale_shape_manual("Type", values = c(21, 22)) + + scale_linetype_manual("Type", values = c(1, 2)) + + scale_fill_manual("Mass", breaks = names(plotOptions$masses), + labels = vapply(plotOptions$masses, function(x) x$label, FUN.VALUE=character(1)), + values = vapply(plotOptions$masses, function(x) x$color, FUN.VALUE=character(1)), + guide = "none") + + theme_bw() + theme(legend.position = "bottom") + + facet_wrap(~mass_label, scales = "free") + + labs(y = plotOptions$labels$ymasses) + + return(p) + } + + ) ) \ No newline at end of file diff --git a/R/IsodatClumpedCO2FileClass.R b/R/IsodatClumpedCO2FileClass.R new file mode 100644 index 0000000..177a20a --- /dev/null +++ b/R/IsodatClumpedCO2FileClass.R @@ -0,0 +1,35 @@ +#' @include IsodatDualInletFileClass.R +NULL + +#' Clumped dual inlet data class +#' +#' +#' @name IsodatClumpedCO2File +#' @exportClass IsodatClumpedCO2File +#' @seealso \link{IrmsDualInletData}, \link{BinaryFile}, \link{IsodatFile}, \link{IsodatDualInletFile} +IsodatClumpedCO2File <- setRefClass( + "IsodatClumpedCO2File", + contains = c("IsodatDualInletFile"), + fields = list (), + methods = list( + + #' initialize irms data container + init_irms_data = function(){ + callSuper() + + # specifically define the data table columns of CO2 dual inlet files + dataTableColumns <<- + data.frame( + data = c("cycle", "d 45CO2/44CO2 ", "d 46CO2/44CO2 ", + "d 13C/12C ", "d 18O/16O ", "d 17O/16O ", + "AT% 13C/12C ", "AT% 18O/16O "), + column = c("cycle", "d 45CO2/44CO2", "d 46CO2/44CO2", + "d13C", "d18O", "d17O", + "at% 13C", "at% 18O"), + units = c("", rep("permil", 5), "", ""), + type = c("integer", rep("numeric", 7)), + show = TRUE, stringsAsFactors = FALSE) + } + + ) +) \ No newline at end of file diff --git a/R/IsodatDualInletFileClass.R b/R/IsodatDualInletFileClass.R new file mode 100644 index 0000000..8c55475 --- /dev/null +++ b/R/IsodatDualInletFileClass.R @@ -0,0 +1,155 @@ +#' @include IsodatFileClass.R +#' @include IrmsDualInletDataClass.R +NULL + +#' Clumped dual inlet data class +#' +#' +#' @name IsodatDualInletFile +#' @exportClass IsodatDualInletFile +#' @seealso \link{BinaryFile}, \link{IsodatFile}, \link{IrmsDualInletData}, \link{IrmsData} +IsodatDualInletFile <- setRefClass( + "IsodatDualInletFile", + contains = c("IsodatFile", "IrmsDualInletData"), + fields = list (), + methods = list( + #' initialize + initialize = function(...) { + callSuper(...) + init_irms_data() + }, + + #' initialize irms data container + init_irms_data = function(){ + callSuper() + # overwrite in derived classes and set data table definitions properly! + # see IrmsDualInletDataClass for details on requirements and functionality + }, + + # READ DATA ========================= + + #' expand process function specifically for dual inlet type data + process = function(...) { + callSuper() + + # find recorded masses + masses <- find_key("Mass \\d+", + byte_min = find_key("CTraceInfo", occ = 1, fix = T)$byteEnd, + byte_max = find_key("CPlotRange", occ = 1, fix = T)$byteStart)$value + + if (length(masses) == 0) + stop("Error: no keys named 'Mass ..' found. Cannot identify recorded mass traces in this file.") + + # unless mass plot options are already manually defined (in init_irms_data), define them automatically here and assign colors + mass_names <- sub("Mass (\\d+)", "mass\\1", masses) + if (length(plotOptions$masses) == 0) { + # color blind friendly pallete (9 colors) + palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#D55E00", "#0072B2", "#CC79A7", "#999999", "#F0E442") + if (length(masses) > length(palette)) + stop("Currently only supporting up to ", length(palette), " automatically assigned different colors for masses but ", + "this file is recording data for ", length(masses), " different masses. Plesae define the plotOptions manually.") + + set_plot_options( + masses = setNames( + sapply(seq_along(masses), function(i) list(list(label = masses[i], color = palette[i]))), + mass_names) + ) + } + + # extract raw voltage data from the cycles + raw_data_keys <- find_key("^(Standard|Sample) \\w+$", + byte_min = find_key("CDualInletRawData", occ = 1, fix = T)$byteEnd, + byte_max = find_key("CTwoDoublesArrayData", occ = 1, fix = T)$byteStart) + + if (nrow(raw_data_keys) == 0) + stop("could not find raw data in this file") + + # extract cycle information + raw_data_keys <- mutate(raw_data_keys, + analysis = sub("^(Standard|Sample) (\\w+)$", "\\1", value), + cycle.0idx = sub("^(Standard|Sample) (\\w+)$", "\\2", value), # 0 based index, adjust in next line + cycle = ifelse(cycle.0idx == "Pre", 0, suppressWarnings(as.integer(cycle.0idx)) + 1L)) + n_cycles <- max(raw_data_keys$cycle) + + # read in all masses and cycles + massData <<- do.call(data.frame, + args = c(list(stringsAsFactors = FALSE, analysis = character(), cycle = integer()), + lapply(plotOptions$masses, function(i) numeric()))) + + for (i in 1:nrow(raw_data_keys)) { + move_to_key(raw_data_keys[i, ]) + has_intensity_block <- nrow(subset(keys, value == "CIntensityData" & byteStart > raw_data_keys[i, "byteStart"] & byteEnd < raw_data_keys[i, "byteEnd"] + 64)) > 0 + massData[i, ] <<- c(list(raw_data_keys[i, "analysis"], raw_data_keys[i, "cycle"]), + as.list(parse("double", length = length(mass_names), skip_first = if (has_intensity_block) 82 else 64))) + } + + # evaluated data / data table + # NOTE: this could (should ?) be calculated from the raw voltage data directly + eval_data_keys <- find_key("^(d |AT).+$", + byte_min = find_key("CDualInletEvaluatedData", occ = 1, fix = T)$byteEnd, + byte_max = find_key("Sequence Line Information", occ = 1, fix = T)$byteStart) + if (nrow(eval_data_keys) == 0) + stop("could not find evaluated data in this file") + + eval_data <- list(cycle = 1:n_cycles) + for (i in 1:nrow(eval_data_keys)) { + move_to_key(eval_data_keys[i,]) + gap_to_data <- switch( + substr(eval_data_keys[i, "value"], 1, 2), + `d ` = 54, `AT` = 50) + # these are evaluated data points for ALL cycles + eval_data[[eval_data_keys[i,"value"]]] <- parse("double", length = 2 * n_cycles, skip_first = gap_to_data)[c(FALSE, TRUE)] + } + dataTable <<- data.frame(eval_data, check.names = F) + + # unless dataTableColumns are already manually defined, define them here + if (nrow(dataTableColumns) == 0) { + dataTableColumns <<- + data.frame(data = names(dataTable), column = names(dataTable), + units = "", type = "numeric", show = TRUE, stringsAsFactors = FALSE) + } + + # grid infos + rawtable <- rawdata[subset(keys, value=="CMeasurmentInfos")$byteEnd:subset(keys, value=="CMeasurmentErrors")$byteStart] + dividers <- c(grepRaw("\xff\xfe\xff", rawtable, all=TRUE), length(rawtable)) + if (length(dividers) == 0) + stop("this file does not seem to have the expected hex code sequence FF FE FF as dividers in the grid info") + + for (i in 2:length(dividers)) { + # read ASCII data for each block + raw_ascii <- grepRaw("([\u0020-\u007e][^\u0020-\u007e])+", rawtable[(dividers[i-1]+4):dividers[i]], all=T, value = T) + x <- if (length(raw_ascii) > 0) rawToChar(raw_ascii[[1]][c(TRUE, FALSE)]) else "" + if (x == "CUserInfo") data[[paste0("Info_", sub("^(\\w+).*$", "\\1", value))]] <<- value # store value with first word as ID + else value <- x # keep value + } + + # sequence line information + rawtable <- rawdata[subset(keys, value=="Sequence Line Information")$byteEnd:subset(keys, value=="Visualisation Informations")$byteStart] + if (length(rawtable) < 10) + stop("this file does not seem to have a data block for the sequence line information") + + dividers <- grepRaw("\xff\xfe\xff", rawtable, all=TRUE) + if (length(dividers) == 0) + stop("this file does not seem to have the expected hex code sequence FF FE FF as dividers in the sequence line information") + + for (i in 2:length(dividers)) { + # read ASCII data for each block + raw_ascii <- grepRaw("([\u0020-\u007e][^\u0020-\u007e])+", rawtable[(dividers[i-1]+4):dividers[i]], all=T, value = T) + x <- if (length(raw_ascii) > 0) rawToChar(raw_ascii[[1]][c(TRUE, FALSE)]) else "" + if (i %% 2 == 1) data[[x]] <<- value # store key / value pair in data list + else value <- x # keep value for key (which comes AFTER its value) + } + + }, + + #' custom show function to display roughly what data we've got going + show = function() { + cat("\nShowing summary of", class(.self), "\n") + callSuper() + cat("\n\nMass data:\n") + print(get_mass_data()) + cat("\n\nData table:\n") + print(get_data_table(summarize = TRUE)) + } + ) +) \ No newline at end of file diff --git a/R/IsodatHydrogenContinuousFlowFileClass.R b/R/IsodatHydrogenContinuousFlowFileClass.R index 5753728..e7c75fb 100644 --- a/R/IsodatHydrogenContinuousFlowFileClass.R +++ b/R/IsodatHydrogenContinuousFlowFileClass.R @@ -115,10 +115,10 @@ IsodatHydrogenContinuousFlowFile <- setRefClass( # other information move_to_key("H3 Factor") parse("double", id = "H3factor", skip_first = 8) - data$GCprogram <<- find_key(".gcm$") - data$MSprogram <<- find_key(".met$") - data$Filename <<- find_key(".cf$") - data$ASprogram <<- find_key("Internal") + data$GCprogram <<- find_key(".gcm$")$value + data$MSprogram <<- find_key(".met$")$value + data$Filename <<- find_key(".cf$")$value + data$ASprogram <<- find_key("Internal")$value # reorganize data, move to IrmsDataClass structure if (readChromData) { diff --git a/R/isoread.R b/R/isoread.R index 88d2dd9..9add310 100644 --- a/R/isoread.R +++ b/R/isoread.R @@ -7,6 +7,7 @@ #' @docType package #' @title isoread package #' @author Sebastian Kopf +#' @author Max Lloyd NULL #' @include IsodatHydrogenContinuousFlowFileClass.R @@ -26,6 +27,8 @@ NULL #' @param ... parameters passed to the \code{load} and \code{process} functions of the IsodatFile objects #' @return List of file \code{type} specific objects. #' \itemize{ +#' \item{'DUAL'}{ = instance(s) of a basic \code{\link{IsodatDualInletFile}} which implements \code{\link{IrmsDualInletData}}}. +#' \item{'CO2_CLUMPED'}{ = instance(s) of the more specialized \code{\link{IsodatClumbedCO2File}} which extends \code{\link{IsodatDualInletFile}}}. #' \item{'H_CSIA'}{ = instance(s) of \code{\link{IsodatHydrogenContinuousFlowFile}} which implements \code{\link{IrmsContinuousFlowData}}}. #' } #' If file names start with a number, @@ -36,6 +39,8 @@ isoread <- function(files, type, load_chroms = T, ...) { typeClass <- switch( type, H_CSIA = 'IsodatHydrogenContinuousFlowFile', + DUAL = 'IsodatDualInletFile', + CO2_CLUMPED = 'IsodatClumpedCO2File', stop("not a currently supported file type: '", type, "'")) files <- as.list(files) diff --git a/R/zzz.R b/R/zzz.R index df00b05..e941bc1 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,9 +1,8 @@ #FIXME remove again (just a helper during development) +# automatically create a global instance of a test file on attachment of the package .onAttach <- function(libname, pkgname) { - #isoread("/Users/sk/Dropbox/VM Windows/6520__F8-5_5uL_isodat2.cf", readChromData = TRUE, type = "H_CSIA") ->> i + #i <<- suppressMessages(isoread(system.file("extdata", "dual_inlet_clumped_carbonate.did", package="isoread"), type = "CO2_CLUMPED")) + #i$load() } -# for auto-testing, just start R in separate console inside the package and -# run the following code: -# library(testthat) -# auto_test_package(pkg=".") \ No newline at end of file +# for auto-testing, just run 'make autotest' on a terminal in the isoread folder diff --git a/README.md b/README.md index b9cf77b..bc8b36e 100644 --- a/README.md +++ b/README.md @@ -5,30 +5,58 @@ R interface to IRMS (isotope ratio mass spectrometry) file formats typically use This package allows the reading and processing of stable isotope data directly from the data files and thus provides a tool for reproducible data reduction. This package is definitely still a work-in-progress, however the master branch will always be a functional version (get the 'dev' branch for the active development version) and I'll make an effort to keep it backwards compatible as it evolves. -The underlying object structure of the package is designed to allow expansion towards a number of different types of data (the uml diagram contains a rough visual sketch of the class hierarchy) but currently, the only supported format (the only one I have test data from and had time to implement) are files containing compound specific hydrogen isotope data but expansions will come over time. +Currently, **isoread** supports reading files containing compound specific hydrogen isotope data, as well as clumped carbonate dual inlet data. The underlying object structure of the package is designed to allow easy expansion towards a number of different types of data and both supported file types are dynamically implemented and should be easily expandable to other continuous flow and dual inlet isotope data files, so expansions will hopefully come over time. -##Installation +## How to use the isoread package + +### Installation Hadley Wickham's **devtools** package provides a super convenient way of installing ```R``` packages directly from GitHub. To install **devtools**, run the following from the R command line: ```coffee -install.packages('devtools', depen=T) # development tools +install.packages('devtools', depen=T) ``` - -Then simply install the latest version of **isoread** directly from GitHub by running the following code (if it is the first time you install the **isoread** package, all missing dependencies will be automatically installed as well -> **ggplot2, plyr, reshape2, stringr** as well as their respective dependencies, which might take a minute, except for the **isotopia** package which is not on CRAN yet - see code below): +Then simply install the latest version of **isoread** directly from GitHub by running the following code (if it is the first time you install the **isoread** package, all missing dependencies will be automatically installed as well + their respective dependencies, which might take a minute, except for the **isotopia** package which is not on CRAN yet and requires manual installation - see code below): ```coffee library(devtools) -install_github('isotopia', 'sebkopf') # not on CRAN yet -install_github('isoread', 'sebkopf') +install_github('sebkopf/isotopia') # not on CRAN yet +install_github('sebkopf/isoread') ``` -##Examples +### Examples + +The following examples can be run with the test data provided by the **isoread** package and illustrate the direct reading of isotope data from the binary data files. Please use the help files in R for details on functions and paramters (e.g. via ```?isoread``` - note: the object methods' help files are not supported by ```Roxygen``` yet but this is [currently being implemented](http://lists.r-forge.r-project.org/pipermail/roxygen-devel/2014-January/000456.html) so will come soon!). + +#### Continuous flow -The following example can be run with the test data provided by the **isoread** package and illustrates the direct reading of a compound-specific hydrogen isotope dataset from the binary data file. A summary of the retrieved data can be printed out via ```$show()``` and both ```$plot()``` and ```$make_ggplot()``` commands for the data set are already fully implemented and provide an easy quick way for visualization (of course you can access all the raw data in the object as well via ```$get_mass_data()``` and ```$get_ratio_data()``` and process it as needed). Please use the help files in R for details on functions and paramters (e.g. via ```?isoread``` - note: the object methods' help files are not supported by ```Roxygen``` yet but this is [currently being implemented](http://lists.r-forge.r-project.org/pipermail/roxygen-devel/2014-January/000456.html) so will come soon!). +The following example llustrates the direct reading of a compound-specific hydrogen isotope dataset from the binary data file. A summary of the retrieved data can be printed out via ```$show()``` and both ```$plot()``` (fast plotting of the chromatographic data) and ```$make_ggplot()``` (ggplot that is slower but easy to manipulate). Of course, you can access all the raw data in the object as well via ```$get_mass_data()``` and ```$get_ratio_data()``` and process it as needed). ```coffee library(isoread) -obj <- isoread(system.file("extdata", "6520__F8-5_5uL_isodat2.cf", package="isoread"), type = c("H_CSIA")) +obj <- isoread( + system.file("extdata", "6520__F8-5_5uL_isodat2.cf", package="isoread"), + type = c("H_CSIA")) obj$show() obj$plot() obj$make_ggplot() ``` + +For a more detailed introduction, **check out the [continuous flow intro](inst/doc/continuous_flow_intro.Rmd)** and the resulting [HTML output](https://rawgit.com/sebkopf/isoread/master/inst/doc/continuous_flow_intro.html)! + +#### Dual Inlet + +Thanks to a push from [Max Lloyd](https://github.com/maxmansaxman), **isoread** now has basic support for dual inlet isotope data and specifically supports reading clumped CO2 runs. The following example illustrates the direct reading of a clumped CO2 dual inlet dataset from the binary data file, and prints out a summary of the retrieved data via ```$show()``` and ```$make_ggplot()```. + +```coffee +library(isoread) +obj <- isoread( + system.file("extdata", "dual_inlet_clumped_carbonate.did", package="isoread"), + type = "CO2_CLUMPED") +obj$show() +obj$make_ggplot() +``` + +For a more detailed introduction, **check out the [dual inlet intro](inst/doc/dual_inlet_intro.Rmd)** and the resulting [HTML output](https://rawgit.com/sebkopf/isoread/master/inst/doc/dual_inlet_intro.html)! + +## Development + +If you have use cases for **isoread** that are not currently supported, please make use of the [Issue Tracker](https://github.com/sebkopf/isoread/issues) to collect feature ideas, expansion requests, and of course bug reports. If you are interested in helping with development, that's fantastic! Please fork the repository and branch off from the [dev branch](https://github.com/sebkopf/isoread/tree/dev) since it contains the most up-to-date development version of **isoread**. Make sure to write [```testthat``` tests](http://r-pkgs.had.co.nz/tests.html) for your work (stored in the tests/testthat directory). All tests can be run automatically and continuously during development to make it easier to spot any code problems on the go. The easiest way to run them is by running ```make autotest``` in the **isoread** directory from command line (it will test everything automatically in a completely separate R session). diff --git a/inst/doc/continuous_flow_intro.Rmd b/inst/doc/continuous_flow_intro.Rmd new file mode 100644 index 0000000..25ed929 --- /dev/null +++ b/inst/doc/continuous_flow_intro.Rmd @@ -0,0 +1,98 @@ +--- +title: "Continuous Flow Intro" +output: html_document +--- + +## Installation (not run) + +```{r, eval=FALSE} +library(devtools) +install_github('isoread', 'sebkopf') +``` + +## Reading a file + +Here, we read a simple isodat file that is provided as an example in the module. **isoread** takes all the information directly from the binary, which makes it easy to record each step of what is happening with the data. + +```{r load} +library(isoread) +file <- isoread( + system.file("extdata", "6520__F8-5_5uL_isodat2.cf", package="isoread"), + type = "H_CSIA") +``` + +## Chromatographic data + +The *file* variable now contains an isoread object with all the information from the binary file and we can take a look at the chromatographic data in the object, here we look at the first 10 lines (using the k-table or kable command from the knitr package for table output): + +```{r show-data, results='asis'} +library(knitr) +kable(head(file$get_mass_data(), n = 10)) +``` + +### Plot Chromatogram + +For convenience, **isoread** also implements several plotting functions based on standard ```plot``` as well as the ```ggplot``` module so we can have a look at the whole chromatograms: + +```{r ggplot, fig.width=10, fig.height=7} +file$make_ggplot() +``` + +Notice that **isoread** plots all masses and ratios by default and labels the peaks with their peak numbers (reference peaks are marked with *). The plotting functions are of course a lot more flexible and we can use isoread functionality to plot just a specific time window of the mass trace chromatogram, and switch the time units to minutes instead of seconds as illustrated below: + +```{r} +file$plot_masses(tlim = c(12.3, 12.6), tunits = "min") +``` + +## File information + +Since **isoread** has access to the original raw binary data file, it can extract other parameters stored with the data, here shown with the example of the H3factor registered as the most current during the analysis: + +```{r, results='asis'} +kable(file$get_info("H3factor")) +``` + +## Peak table + +The table of peaks detected by isodat during the analysis or added by the user later on are also directly accessible. The complete set of 29 columns is available through **isoread**, here a small subset of key components: + +```{r, results='asis'} +kable( +subset(file$get_peak_table(), select = c("Peak Nr.", "Status", + "Ref. Peak", "Component", "Rt", "Start", "End", "Ampl. 2", + "d 2H/1H"))) +``` + +Currently, none of the Components in this peak table are identified, but we can generate a mapping file that identifies which component comes out approximately at which retention time. A simple mapping table, which identifies peaks by retention time, could look like this (here only for 2 components): + +```{r, results='asis'} +map <- data.frame(Rt = c(940, 1135), Component = c("C16:0 FAME", "C18:0 FAME"), stringsAsFactors=F) +kable(map) +``` + +Typically, one would maintain this information for example in an excel file and load it directly from there. The map can then be applied to the peak table by **isoread**, which makes the identified peaks accessible by name: + +```{r, results='asis'} +file$map_peaks(map) +kable( +file$get_peak_by_name(c("C16:0 FAME", "C18:0 FAME"), + select = c("Peak Nr.", "Component", "Rt", "Start", "End", "Ampl. 2", "d 2H/1H"))) +``` + +Lastly, the delta value reported in column *d 2H/1H* is automatically loaded as a delta value object using [**isotopia**](http://sebkopf.github.io/isotopia/) and can be used accordingly with all the functionality from **isotopia**. For a simple example, conversion to a fractional abundance (and switch to percent notation): + +```{r} +library(isotopia) +d <- file$get_peak_by_name(c("C16:0 FAME", "C18:0 FAME"), select = "d 2H/1H") +print(d) +print(switch_notation(to_abundance(d), "percent")) +``` + +## Extensions + +Having this information available of course opens various possibilities for the implementation of useful features that are specific to the data. For example, an overview of how consistent the reference peaks in a run were is helpful for determining if one of them might be offset by an overlapping analyte or contaminant. This is implement in **isoread** by the plot_refs() functionality: + +```{r} +file$plot_refs() +``` + diff --git a/inst/doc/continuous_flow_intro.html b/inst/doc/continuous_flow_intro.html new file mode 100644 index 0000000..d0e43db --- /dev/null +++ b/inst/doc/continuous_flow_intro.html @@ -0,0 +1,553 @@ + + + + + + + + + + + + + +Continuous Flow Intro + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+

Installation (not run)

+
library(devtools)
+install_github('isoread', 'sebkopf', ref = "dev")
+
+
+

Reading a file

+

Here, we read a simple isodat file that is provided as an example in the module. isoread takes all the information directly from the binary, which makes it easy to record each step of what is happening with the data.

+
library(isoread)
+
## Loading required package: plyr
+## Loading required package: reshape2
+## Loading required package: ggplot2
+## Loading required package: isotopia
+
file <- isoread(
+  system.file("extdata", "6520__F8-5_5uL_isodat2.cf", package="isoread"), 
+  type = "H_CSIA")
+
## Reading file /Library/Frameworks/R.framework/Versions/3.1/Resources/library/isoread/extdata/6520__F8-5_5uL_isodat2.cf
+
+
+

Chromatographic data

+

The file variable now contains an isoread object with all the information from the binary file and we can take a look at the chromatographic data in the object, here we look at the first 10 lines (using the k-table or kable command from the knitr package for table output):

+
library(knitr)
+kable(head(file$get_mass_data(), n = 10))
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
timemass2mass3time.stime.minmass2.offsetmass3.offset
0.209194.660.020.2090.0035394.660.02
0.418194.560.050.4180.0070394.560.05
0.627194.560.210.6270.0104394.560.21
0.836194.659.920.8360.0139394.659.92
1.045194.659.721.0450.0174394.659.72
1.254194.559.701.2540.0209394.559.70
1.463194.559.591.4630.0244394.559.59
1.672194.559.721.6720.0279394.559.72
1.881194.559.741.8810.0314394.559.74
2.090194.559.742.0900.0348394.559.74
+
+

Plot Chromatogram

+

For convenience, isoread also implements several plotting functions based on standard plot as well as the ggplot module so we can have a look at the whole chromatograms:

+
file$make_ggplot()
+

plot of chunk ggplot

+

Notice that isoread plots all masses and ratios by default and labels the peaks with their peak numbers (reference peaks are marked with *). The plotting functions are of course a lot more flexible and we can use isoread functionality to plot just a specific time window of the mass trace chromatogram, and switch the time units to minutes instead of seconds as illustrated below:

+
file$plot_masses(tlim = c(12.3, 12.6), tunits = "min")
+

plot of chunk unnamed-chunk-2

+
+
+
+

File information

+

Since isoread has access to the original raw binary data file, it can extract other parameters stored with the data, here shown with the example of the H3factor registered as the most current during the analysis:

+
kable(file$get_info("H3factor"))
+ + + + + + + + + + + + + + + +
PropertyValue
11H3factor2.79431047797221
+
+
+

Peak table

+

The table of peaks detected by isodat during the analysis or added by the user later on are also directly accessible. The complete set of 29 columns is available through isoread, here a small subset of key components:

+
kable(
+subset(file$get_peak_table(), select = c("Peak Nr.", "Status", 
+    "Ref. Peak", "Component", "Rt", "Start", "End", "Ampl. 2", 
+    "d 2H/1H")))
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Peak Nr.StatusRef. PeakComponentRtStartEndAmpl. 2d 2H/1H
1AutoFALSE-286.3283.4293.03978-160.9
2AutoFALSE-321.2318.3327.93979-160.4
3AutoFALSE-612.0606.3634.94993-154.2
4AutoTRUE-671.5666.1699.34906-151.9
5AutoFALSE-747.8740.7768.15227-218.1
6AutoFALSE-809.5801.5829.35044-210.4
7AutoTRUE-860.7855.4889.54129-151.9
8AutoFALSE-936.5927.8961.64534-155.5
9AutoFALSE-1002.2993.41023.14354-198.1
10AutoTRUE-1055.01049.01086.04070-151.9
11AutoFALSE-1135.91126.71154.34377-189.5
12AutoFALSE-1201.31191.91223.14384-207.9
13AutoTRUE-1249.41244.21283.14160-151.9
14AutoFALSE-1333.61324.41356.24316-168.1
15AutoFALSE-1395.31386.31416.83706-193.3
16AutoTRUE-1459.41453.61490.24183-151.9
17AutoFALSE-1608.71600.91636.54303-154.3
18AutoFALSE-1739.71736.41746.23974-160.1
19AutoFALSE-1779.41776.71786.13972-160.6
+

Currently, none of the Components in this peak table are identified, but we can generate a mapping file that identifies which component comes out approximately at which retention time. A simple mapping table, which identifies peaks by retention time, could look like this (here only for 2 components):

+
map <- data.frame(Rt = c(940, 1135), Component = c("C16:0 FAME", "C18:0 FAME"), stringsAsFactors=F)
+kable(map)
+ + + + + + + + + + + + + + + + + +
RtComponent
940C16:0 FAME
1135C18:0 FAME
+

Typically, one would maintain this information for example in an excel file and load it directly from there. The map can then be applied to the peak table by isoread, which makes the identified peaks accessible by name:

+
file$map_peaks(map)
+kable(
+file$get_peak_by_name(c("C16:0 FAME", "C18:0 FAME"), 
+       select = c("Peak Nr.", "Component", "Rt", "Start", "End", "Ampl. 2", "d 2H/1H")))
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Peak Nr.ComponentRtStartEndAmpl. 2d 2H/1H
88C16:0 FAME936.5927.8961.64534-155.5
1111C18:0 FAME1135.91126.71154.34377-189.5
+

Lastly, the delta value reported in column d 2H/1H is automatically loaded as a delta value object using isotopia and can be used accordingly with all the functionality from isotopia. For a simple example, conversion to a fractional abundance (and switch to percent notation):

+
library(isotopia)
+d <- file$get_peak_by_name(c("C16:0 FAME", "C18:0 FAME"), select = "d 2H/1H")
+print(d) 
+
## An isotope value object of type 'Delta value': d2H [permil] vs. VSMOW
+## [1] -155.5 -189.5
+
print(switch_notation(to_abundance(d), "percent"))
+
## An isotope value object of type 'Abundance value': F 2H [%]
+## [1] 0.01315 0.01262
+
+
+

Extensions

+

Having this information available of course opens various possibilities for the implementation of useful features that are specific to the data. For example, an overview of how consistent the reference peaks in a run were is helpful for determining if one of them might be offset by an overlapping analyte or contaminant. This is implement in isoread by the plot_refs() functionality:

+
file$plot_refs()
+

plot of chunk unnamed-chunk-8

+
+ + +
+ + + + + + + + diff --git a/inst/doc/dual_inlet_intro.Rmd b/inst/doc/dual_inlet_intro.Rmd new file mode 100644 index 0000000..aaf5fee --- /dev/null +++ b/inst/doc/dual_inlet_intro.Rmd @@ -0,0 +1,62 @@ +--- +title: "Dual Inlet Intro" +output: html_document +--- + +## Installation (not run) + +```{r, eval=FALSE} +library(devtools) +install_github('isoread', 'sebkopf') +``` + +## Load file + +```{r load} +library(isoread) +file <- isoread(system.file("extdata", "dual_inlet_clumped_carbonate.did", package="isoread"), type = "CO2_CLUMPED") +``` + +## Raw data + +```{r raw-data, results='asis', fig.width=10, fig.height=7} +library(knitr) +kable(file$get_mass_data()) +file$make_ggplot() +``` + +#### Or only a few masses + +```{r subset} +file$make_ggplot(masses = c("mass44", "mass47")) +``` + +## Processed data + +#### Whole table + +The entire table of processed data. + +```{r processed-data-all, results='asis'} +kable(file$get_data_table()) +``` + +#### Summary + +```{r processed-data-sum, results='asis'} +kable(file$get_data_table(summarize = TRUE)) +``` + +#### Or only a few of these + +```{r processed-subset, results='asis'} +kable(file$get_data_table(select = c("d13C", "d18O"), sum = T)) +``` + +## Other info + +And whatever other information was pulled out of the file. + +```{r file-info, results='asis'} +kable(file$get_info()) +``` diff --git a/inst/doc/dual_inlet_intro.html b/inst/doc/dual_inlet_intro.html new file mode 100644 index 0000000..9c59fcf --- /dev/null +++ b/inst/doc/dual_inlet_intro.html @@ -0,0 +1,535 @@ + + + + + + + + + + + + + +Dual Inlet Intro + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+

Installation (not run)

+
library(devtools)
+install_github('isoread', 'sebkopf')
+
+
+

Load file

+
library(isoread)
+
## Loading required package: plyr
+## Loading required package: reshape2
+## Loading required package: ggplot2
+## Loading required package: isotopia
+
file <- isoread(system.file("extdata", "dual_inlet_clumped_carbonate.did", package="isoread"), type = "CO2_CLUMPED")
+
## Reading file /Library/Frameworks/R.framework/Versions/3.1/Resources/library/isoread/extdata/dual_inlet_clumped_carbonate.did
+
+
+

Raw data

+
library(knitr)
+kable(file$get_mass_data())
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
analysiscyclemass44mass45mass46mass47mass48mass49
Standard1159411899621954251229.80-181.0
Standard2159341898721944251129.82-180.8
Standard3159171896621920250829.78-180.5
Standard4159021894921899250629.76-180.2
Standard5158961894221891250629.76-180.3
Standard6158851892921876250429.73-180.2
Standard7158761891821864250229.70-180.0
Sample1159551912322238255931.09-181.2
Sample2159461911122225255831.09-181.0
Sample3159261908822197255431.07-180.7
Sample4159111907022176255231.02-180.5
Sample5159101906822175255231.03-180.3
Sample6159131907222178255231.04-180.5
Sample7159141907322181255331.06-180.4
Standard0159461900221961251329.79-181.3
+
file$make_ggplot()
+

plot of chunk raw-data

+
+

Or only a few masses

+
file$make_ggplot(masses = c("mass44", "mass47"))
+

plot of chunk subset

+
+
+
+

Processed data

+
+

Whole table

+

The entire table of processed data.

+
kable(file$get_data_table())
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
cycled 45CO2/44CO2d 46CO2/44CO2d13Cd18Od17Oat% 13Cat% 18O
13.32937.332.19337.376.2021.1080.2076
23.32137.312.18537.356.1931.1080.2076
33.32637.322.19137.356.1961.1080.2076
43.32037.322.18437.356.1961.1080.2076
53.31937.332.18237.376.2021.1080.2076
63.31837.322.18237.366.1981.1080.2076
73.32237.322.18637.356.1961.1080.2076
+
+
+

Summary

+
kable(file$get_data_table(summarize = TRUE))
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
VariableMeanStd. Devi.Std. Error.
d 45CO2/44CO23.32200.00400.0015
d 46CO2/44CO237.32120.00700.0026
d13C2.18610.00430.0016
d18O37.35700.00700.0026
d17O6.19760.00350.0013
at% 13C1.10800.00000.0000
at% 18O0.20760.00000.0000
+
+
+

Or only a few of these

+
kable(file$get_data_table(select = c("d13C", "d18O"), sum = T))
+ + + + + + + + + + + + + + + + + + + + + + + +
VariableMeanStd. Devi.Std. Error.
d13C2.1860.00430.0016
d18O37.3570.00700.0026
+
+
+
+

Other info

+

And whatever other information was pulled out of the file.

+
kable(file$get_info())
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PropertyValue
File location/Library/Frameworks/R.framework/Versions/3.1/Resources/library/isoread/extdata/dual_inlet_clumped_carbonate.did
Date2014-11-16 15:23:29
Info_PeakPeak Center found at [61032]
Info_BackgroundBackground: 8.87 mV,11.31 mV,12.98 mV,6.40 mV,1.90 mV,5.88 mV (old253)
Info_PressAdjustPressAdjust: L: 15972.5 R: 15971.6 ( Manual Adjustment )
Line158
Peak Center1
Pressadjust1
Background1
Identifier 1CIT Carrara
Identifier 213
Analysis49077
Comment
Preparation
Post Script
MethodCO2_multiply_16V.met
+
+ + +
+ + + + + + + + diff --git a/inst/doc/isoread_0.1.pdf b/inst/doc/isoread_0.1.pdf new file mode 100644 index 0000000..79a8d78 Binary files /dev/null and b/inst/doc/isoread_0.1.pdf differ diff --git a/inst/extdata/dual_inlet_clumped_carbonate.did b/inst/extdata/dual_inlet_clumped_carbonate.did new file mode 100644 index 0000000..6d00f39 Binary files /dev/null and b/inst/extdata/dual_inlet_clumped_carbonate.did differ diff --git a/man/BinaryFile.Rd b/man/BinaryFile.Rd index b066715..d247123 100644 --- a/man/BinaryFile.Rd +++ b/man/BinaryFile.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{class} \name{BinaryFile} \alias{BinaryFile} @@ -33,7 +33,12 @@ removed during cleanup unless clean_keys = FALSE)} \item{\code{cleanup(clean_raw = TRUE, clean_keys = TRUE, ...)}}{clean up the object by removing the raw data and keys (and other large but only transiently important information) from memory} -\item{\code{find_key(pattern, occurence = 1)}}{find a key by a regexp pattern} +\item{\code{find_key(pattern, occurence = NULL, fixed = FALSE, byte_min = 0, + byte_max = length(rawdata))}}{finds all keys matching 'key' or a specific occurence of it (use -1 for last occurence) +#' @param fixed whether to find the key(s) by regexp match or fixed string (default = pattern) +#' @param byte_min only look for keys that start after this position +#' @param byte_max only look for keys that start before this position +#' @return the lines of the keys data frame with all the information about the found key(s)} \item{\code{find_keys(asciiL = 10, unicodeL = 5)}}{finds all unicode and ascii strings and stores them for navigation around the file} @@ -43,7 +48,10 @@ removed during cleanup unless clean_keys = FALSE)} \item{\code{load(...)}}{load the data from the file and generate key lookup} -\item{\code{move_to_key(key, occurence = 1)}}{moves position to the end of a specific occurence of a key (use -1 for last occurence)} +\item{\code{move_to_key(key, occurence = 1, fixed = TRUE)}}{moves position to the end of a specific key or occurence of a key +#' @param key either a string or a data.frame line with key value and byteEnd (the way it is returned by find_key) +#' @param occurence if key is a string, which occurence to move to? (use -1 for last occurence) +#' @param fixed whether to find the key (if a string) by regexp match or fixed string (default = fixed string)} \item{\code{parse(type, length = 1, id = NA, skip_first = 0)}}{parse binary data at current position in the data stream advances pointer by the size of the read data diff --git a/man/IrmsContinuousFlowData.Rd b/man/IrmsContinuousFlowData.Rd index 80970b8..2766f45 100644 --- a/man/IrmsContinuousFlowData.Rd +++ b/man/IrmsContinuousFlowData.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{class} \name{IrmsContinuousFlowData} \alias{IrmsContinousFlowData} @@ -32,7 +32,9 @@ all masses and ratios} \item{\code{export_data(file, ...)}}{export the data stored in this object to file} -\item{\code{get_mass_data(masses = names(.self$plotOptions$masses), melt = FALSE)}}{get the mass trace data for specific masses, can be provided in \code{melt = TRUE} format +\item{\code{get_data_table(...)}}{retrieve the data table} + +\item{\code{get_mass_data(masses = names(.self$plotOptions$masses), melt = FALSE, ...)}}{get the mass trace data for specific masses, can be provided in \code{melt = TRUE} long format for easy use in ggplot style plotting} \item{\code{get_peak(peak_nr, select = names(peakTable))}}{retrieve information for a peak in the peak table (identified by peak_nr), can specify which columns to retrieve @@ -66,6 +68,8 @@ to existing columns, otherwise they are disregarded with a warning. Note: make sure to have the data.frame that is passed in set with \code{stringsAsFactors = F} (usually the desired setting for the mapping)} +\item{\code{plot(...)}}{plot data with standard plot functions (fast) to standard output} + \item{\code{plot_data(y, ylab = "", title = "data peaks")}}{plot the data of the actual sample peaks, see \code{plot_peak_table} for details on syntax} \item{\code{plot_masses(tlim = NULL, ylim = NULL, diff --git a/man/IrmsData.Rd b/man/IrmsData.Rd index 6c9ccfd..efde5a4 100644 --- a/man/IrmsData.Rd +++ b/man/IrmsData.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{class} \name{IrmsData} \alias{IrmsData} @@ -16,10 +16,17 @@ IrmsData reference class \describe{ \item{\code{export_data(file, ...)}}{export the data stored in this object to file} +\item{\code{get_data_table(...)}}{retrieve the data table} + +\item{\code{get_mass_data(masses = names(.self$plotOptions$masses), melt = FALSE, ...)}}{get the mass trace data for specific masses, can be provided in \code{melt = TRUE} long format +for easy use in ggplot style plotting} + \item{\code{init_irms_data()}}{initialize irms data container} \item{\code{make_ggplot(...)}}{generate a ggplot object for the data in this IrmsData object} +\item{\code{plot(...)}}{plot data with standard plot functions (fast) to standard output} + \item{\code{set_plot_options(...)}}{set plot options} \item{\code{summarize(file, ....)}}{summarize the data stored in this object and save it to file} diff --git a/man/IrmsDualInletData.Rd b/man/IrmsDualInletData.Rd index c738d87..2be5d4e 100644 --- a/man/IrmsDualInletData.Rd +++ b/man/IrmsDualInletData.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{class} \name{IrmsDualInletData} \alias{IrmsDualInletData} @@ -6,6 +6,22 @@ \description{ IrmsDualInletData reference class } +\section{Methods}{ + +\describe{ +\item{\code{convert_data(value, data_type)}}{function converts data table entries to their appropriate data types - overwrite in derived classes for more specialized behaviour} + +\item{\code{get_data_table(...)}}{retrieve the data table} + +\item{\code{get_mass_data(masses = names(.self$plotOptions$masses), melt = FALSE, ...)}}{get the mass trace data for specific masses, can be provided in \code{melt = TRUE} long format +for easy use in ggplot style plotting} + +\item{\code{init_irms_data()}}{initialize irms data container} + +\item{\code{make_ggplot(...)}}{generate a ggplot object for the data in this IrmsData object} + +\item{\code{plot(...)}}{plot data with standard plot functions (fast) to standard output} +}} \note{ not implemented yet for any actual data reading } diff --git a/man/IsodatClumpedCO2File.Rd b/man/IsodatClumpedCO2File.Rd new file mode 100644 index 0000000..5ebbad8 --- /dev/null +++ b/man/IsodatClumpedCO2File.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{class} +\name{IsodatClumpedCO2File} +\alias{IsodatClumpedCO2File} +\title{Clumped dual inlet data class} +\description{ +Clumped dual inlet data class +} +\seealso{ +\link{IrmsDualInletData}, \link{BinaryFile}, \link{IsodatFile}, \link{IsodatDualInletFile} +} + diff --git a/man/IsodatDualInletFile.Rd b/man/IsodatDualInletFile.Rd new file mode 100644 index 0000000..6cf8a16 --- /dev/null +++ b/man/IsodatDualInletFile.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\docType{class} +\name{IsodatDualInletFile} +\alias{IsodatDualInletFile} +\title{Clumped dual inlet data class} +\description{ +Clumped dual inlet data class +} +\section{Methods}{ + +\describe{ +\item{\code{initialize(file, ...)}}{initialize BinaryFile object, requires a file path} + +\item{\code{process(...)}}{process the raw data to fill the data list} +}} +\seealso{ +\link{BinaryFile}, \link{IsodatFile}, \link{IrmsDualInletData}, \link{IrmsData} +} + diff --git a/man/IsodatFile.Rd b/man/IsodatFile.Rd index 2180fb9..4dc437a 100644 --- a/man/IsodatFile.Rd +++ b/man/IsodatFile.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{class} \name{IsodatFile} \alias{IsodatFile} diff --git a/man/IsodatHydrogenContinuousFlowFile.Rd b/man/IsodatHydrogenContinuousFlowFile.Rd index 8c315a1..e39e86a 100644 --- a/man/IsodatHydrogenContinuousFlowFile.Rd +++ b/man/IsodatHydrogenContinuousFlowFile.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{class} \name{IsodatHydrogenContinuousFlowFile} \alias{IsodatHydrogenContinuousFlowFile} diff --git a/man/export_data.Rd b/man/export_data.Rd index 4c23c3b..455df51 100644 --- a/man/export_data.Rd +++ b/man/export_data.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{export_data} \alias{export_data} \title{Convenience function to export data from multiple diff --git a/man/isoread-package.Rd b/man/isoread-package.Rd index 940af45..63e94aa 100644 --- a/man/isoread-package.Rd +++ b/man/isoread-package.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \docType{package} \name{isoread-package} \alias{isoread-package} @@ -12,5 +12,7 @@ See \code{\link{isoread}} for details on how to use. } \author{ Sebastian Kopf + +Max Lloyd } diff --git a/man/isoread.Rd b/man/isoread.Rd index 2c0064d..c7ae40f 100644 --- a/man/isoread.Rd +++ b/man/isoread.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{isoread} \alias{isoread} \title{Read isotope data files} @@ -6,8 +6,6 @@ isoread(files, type, load_chroms = T, ...) } \arguments{ -\item{file}{path to the file(s) to read} - \item{type}{type of the files to be read \itemize{ \item{'H_CSIA'}{ = compound specific IRMS data for hydrogen isotopes} @@ -16,10 +14,14 @@ isoread(files, type, load_chroms = T, ...) \item{load_chroms}{whether to keep the chromatograms in the objects (otherwise only peak tables are kept)} \item{...}{parameters passed to the \code{load} and \code{process} functions of the IsodatFile objects} + +\item{file}{path to the file(s) to read} } \value{ List of file \code{type} specific objects. \itemize{ + \item{'DUAL'}{ = instance(s) of a basic \code{\link{IsodatDualInletFile}} which implements \code{\link{IrmsDualInletData}}}. + \item{'CO2_CLUMPED'}{ = instance(s) of the more specialized \code{\link{IsodatClumbedCO2File}} which extends \code{\link{IsodatDualInletFile}}}. \item{'H_CSIA'}{ = instance(s) of \code{\link{IsodatHydrogenContinuousFlowFile}} which implements \code{\link{IrmsContinuousFlowData}}}. } If file names start with a number, diff --git a/man/isoread_folder.Rd b/man/isoread_folder.Rd index 985701d..c5b21df 100644 --- a/man/isoread_folder.Rd +++ b/man/isoread_folder.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{isoread_folder} \alias{isoread_folder} \title{Reads all isodat files in a folder.} diff --git a/man/map_binary_data_type.Rd b/man/map_binary_data_type.Rd index 16179cb..d0b9477 100644 --- a/man/map_binary_data_type.Rd +++ b/man/map_binary_data_type.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{map_binary_data_type} \alias{map_binary_data_type} \title{Binary data type mapping} diff --git a/man/map_peaks.Rd b/man/map_peaks.Rd index 7d8a2f5..5e501ee 100644 --- a/man/map_peaks.Rd +++ b/man/map_peaks.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{map_peaks} \alias{map_peaks} \title{Map peak table} diff --git a/man/parse_binary_data.Rd b/man/parse_binary_data.Rd index a485620..d2f7c94 100644 --- a/man/parse_binary_data.Rd +++ b/man/parse_binary_data.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{parse_binary_data} \alias{parse_binary_data} \title{Wrapper for parsing binary data.} diff --git a/man/quickview.Rd b/man/quickview.Rd index aa9d7c4..1cd9d99 100644 --- a/man/quickview.Rd +++ b/man/quickview.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{quickview} \alias{quickview} \title{File quickview} diff --git a/man/reload.Rd b/man/reload.Rd index 7dbfd05..12cbb19 100644 --- a/man/reload.Rd +++ b/man/reload.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{reload} \alias{reload} \title{Reload an isodat file object.} diff --git a/man/summarize_all.Rd b/man/summarize_all.Rd index ffe31a0..40ce621 100644 --- a/man/summarize_all.Rd +++ b/man/summarize_all.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.0.0): do not edit by hand +% Generated by roxygen2 (4.0.2): do not edit by hand \name{summarize_all} \alias{summarize_all} \title{Summarize a collection of IrmsData objects} diff --git a/structure.uml b/structure.uml index 5962ddf..d2a4ab0 100644 --- a/structure.uml +++ b/structure.uml @@ -58,7 +58,7 @@ CodeGeneration - + ArgoUML (using Netbeans XMI Writer version 1.0) 0.34(6) revised on $Date: 2010-01-11 22:20:14 +0100 (Mon, 11 Jan 2010) $ @@ -169,12 +169,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -209,7 +272,7 @@ @@ -235,22 +298,13 @@ name = 'return' isSpecification = 'false' kind = 'return'/> - - - - - - - - - + - @@ -259,13 +313,13 @@ - - + - @@ -274,58 +328,13 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + - @@ -334,6 +343,24 @@ + + + + + + + + + + + + - - - @@ -506,15 +530,51 @@ - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -622,7 +682,7 @@ > BinaryFile filepath : String filename : String rawdata : Binary data : data.frame load() process() + get_info() + show() IrmsData plotOptions : list + massData : data.frame + dataTable : data.frame + dataTableColumns : data.frame check_data() + >init_irms_data() plot() + >check_data() ggplot() + >check_mass_data() summarize() + >check_data_table() export_data() - - - - - - - - IrmsContinuousFlowData - - - - - get_mass_data() + - - - - - - - get_data_table() + - plot() + chromData : data.frame - make_ggplot() + peakTable : data.frame - summarize() + peakTableColumns : data.frame + >export_data() - + + + + + - IrmsContinuousFlowData + - - check_chrom_data() - check_peak_table() - + + + + + + get_peak_table() - + + + + + + + get_mass_data() - + get_ratio_data() - get_peak_by_rt() - plot_peak_table() - plot_refs() - map_peaks() - identify_peaks() - set_ref_peaks() - - - + + IrmsDualInletData - - + + IsodatFile + y="400" /> + y="193" /> IsodatHydrogenContinuousFlowFile + angle="-90.0" + offset="27" /> - - + + - - + + + - IsodatDualInletFile + + + + + + + + - - - - - - - - + + + + + - - - - - - + - - + + + sourcePortFig="Fig12.0" + destPortFig="Fig5.0" + sourceFigNode="Fig12" + destFigNode="Fig5" + + + + + + + + + + sourcePortFig="Fig12.0" + destPortFig="Fig7.0" + sourceFigNode="Fig12" + destFigNode="Fig7" + + + + + + + + + + + + + + + currently not -implemented -yet + justification="Center" + >IsodatClumpedCO2File + + + + + + + + + + + + + + + + + + + + - - sourcePortFig="Fig12.0" - destPortFig="Fig5.0" - sourceFigNode="Fig12" - destFigNode="Fig5" + sourcePortFig="Fig15.0" + destPortFig="Fig12.0" + sourceFigNode="Fig15" + destFigNode="Fig12" + - - - + + diff --git a/tests/testthat/test-01-binary.R b/tests/testthat/test-01-binary.R index 0bc2123..001261b 100644 --- a/tests/testthat/test-01-binary.R +++ b/tests/testthat/test-01-binary.R @@ -58,8 +58,8 @@ test_that("Testing BinaryFile Class", { expect_that(test$move_to_key("H3 Factor") > 1, is_true()) test$skip(8) expect_that(test$parse("double", id = "H3") > 2, is_true()) # rough check for a reasonable H3 factor - expect_true(length(test$find_key(".gcm$")) > 0, label = "Looking for gc program file name") - expect_true(length(test$find_key(".met$")) > 0, label = "Looking for MS method") - expect_true(length(test$find_key(".cf$")) > 0) - expect_true(length(test$find_key("Internal")) > 0) + expect_true(length(test$find_key(".gcm$")$value) > 0, label = "Looking for gc program file name") + expect_true(length(test$find_key(".met$")$value) > 0, label = "Looking for MS method") + expect_true(length(test$find_key(".cf$")$value) > 0) + expect_true(length(test$find_key("Internal")$value) > 0) }) \ No newline at end of file diff --git a/tests/testthat/test-02-isoread.R b/tests/testthat/test-02-isoread.R index 7aa3980..c06cb27 100644 --- a/tests/testthat/test-02-isoread.R +++ b/tests/testthat/test-02-isoread.R @@ -20,11 +20,13 @@ test_that("Testing Isodat Hydrogen Continous Flow File Class (H_CSIA)", { test$cleanup(clean_chrom_data = FALSE, clean_keys = TRUE) expect_equal(test$keys, data.frame()) - # plotting test (indirect just by checking if it works) + # plotting test (indirect just by checking if it works) - NOTE: plotting into a temp file so it does not generate Rplot.pdf when run from cmd line expect_true({ test$set_plot_options(masses = list(mass3 = list(color = "red"))) + pdf(file = (tfile <- tempfile())) test$plot(tlim = c(10, 15), tunits = "min") - TRUE + dev.off() + file.exists(tfile) }) expect_that(test$make_ggplot(masses = NULL), is_a("ggplot")) expect_that(test$make_ggplot(ratios = NULL), is_a("ggplot")) @@ -77,7 +79,7 @@ test_that("Testing Isodat Hydrogen Continous Flow File Class (H_CSIA)", { expect_equal(test$get_peak_by_name(c("test1", "test2"))$Formula, c("C2O", "H25")) # data plotting - expect_that(test$plot_data(), is_a("ggplot")) + expect_that(test$plot_data(), is_a("ggplot")) # summarizing expect_message(test$summarize(file = tempfile()), "Summary saved") @@ -98,7 +100,9 @@ test_that("Testing isoread whole folder read", { # make sure they can all be printed expect_true({ + pdf(file = (tfile <- tempfile())) sapply(isofiles, function(i) i$plot()) - TRUE + dev.off() + file.exists(tfile) }) }) diff --git a/tests/testthat/test-04-dualInlet.R b/tests/testthat/test-04-dualInlet.R new file mode 100644 index 0000000..7656c38 --- /dev/null +++ b/tests/testthat/test-04-dualInlet.R @@ -0,0 +1,79 @@ +context("Dual Inlet") + +test_that("Testing general dual inlet binary file",{ + # load test file + expect_that(test <- suppressMessages(isoread(system.file("extdata", "dual_inlet_clumped_carbonate.did", package="isoread"), type = "DUAL")), is_a("IsodatDualInletFile")) + + # check key number + expect_that({test$load(); nrow(test$keys)}, equals(1842)) + + # check presence of certain keys + test_keys <- c("CTraceInfo", "CPlotRange", "CDualInletRawData", "DualInlet RawData Standard Block", + "DualInlet RawData Sample Block", "CTwoDoublesArrayData", "CDualInletEvaluatedData", + "CMeasurmentInfos", "CMeasurmentErrors", "Sequence Line Information", "Visualisation Informations") + sapply(test_keys, function(key) expect_true(test$move_to_key(key) > 1)) + + # check that masses are detected properly and data is returned properly + test$set_plot_options(masses = list(mass42 = list(label = "Mass 42"))) + expect_error(test$check_mass_data(), "Not all masses appear to be recorded in this file") + test$set_plot_options(masses = list(mass42 = NULL)) + expect_that({test$process(); test$get_mass_data()}, equals(structure(list( + analysis = c("Standard", "Standard", "Standard", "Standard", "Standard", "Standard", "Standard", "Sample", "Sample", "Sample", "Sample", "Sample", "Sample", "Sample", "Standard"), + cycle = c(1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 0), + mass44 = c(15941.1363271265, 15933.5415417438, 15916.5753680181, 15901.6242158319, 15896.0021519182, 15884.6349693404, 15875.8903911327, 15955.1761050668, 15945.6432073637, 15925.9044562907, 15911.0955861597, 15909.8304295061, 15912.5607067829, 15914.033518328, 15946.4199904763), + mass45 = c(18995.6641449228, 18986.6426935265, 18966.3386596933, 18948.599548316, 18941.9738190935, 18928.5070148353, 18918.1242702615, 19122.7104838963, 19111.1587191928, 19087.5745071665, 19069.7008046558, 19068.2356840326, 19071.5658532181, 19073.465510929, 19001.9437826595), + mass46 = c(21954.1903700836, 21943.5768978526, 21919.9657675996, 21899.1733602254, 21891.4939747018, 21876.1851946469, 21863.9566288155, 22237.9853884335, 22224.6454843867, 22197.0543020471, 22176.1811399929, 22174.6160373849, 22178.4807256429, 22180.5188199481, 21960.6199807974), + mass47 = c(2512.34539994474, 2511.28020726069, 2508.49706375876, 2506.10421681761, 2505.60046143274, 2503.55467620561, 2502.44431399069, 2559.24464173059, 2557.85735348105, 2554.38782454663, 2552.02286606039, 2551.66523819173, 2552.28051194425, 2552.79964917294, 2513.27984695638), + mass48 = c(29.8004766868714, 29.8181826767711, 29.7807423862016, 29.7630362554129, 29.7581767374068, 29.7292305805958, 29.695128630388, 31.0943601057772, 31.0915312685677, 31.0684586128672, 31.0194843214432, 31.0281917024862, 31.0420487673658, 31.0584470903803, 29.7874191651753), + mass49 = c(-180.975239499152, -180.774840579306, -180.458274708744, -180.231083464175, -180.309544685111, -180.201251727574, -180.034033824346, -181.245969751486, -180.997565144103, -180.700227016011, -180.465344900286, -180.273383292619, -180.511179915295, -180.373364705306, -181.27120940821)), + .Names = c("analysis", "cycle", "mass44", "mass45", "mass46", "mass47", "mass48", "mass49"), row.names = c(NA, 15L), class = "data.frame"))) + + expect_error(test$get_mass_data(masses = c("mass2")), "Some masses .* do not exist in the loaded massData") + + # evaluated data + expect_equal(test$get_data_table(summarize = FALSE), structure(list( + cycle = c(1, 2, 3, 4, 5, 6, 7), + `d 45CO2/44CO2 ` = c(3.32877703714618, 3.32095526378628, 3.32633836566676, 3.32003038117312, 3.31874078883043, 3.31775484493191, 3.32175175548288), + `d 46CO2/44CO2 ` = c(37.3306472298882, 37.3117987381053, 37.3175143140962, 37.3172067628513, 37.3297678783726, 37.3228487105193, 37.3183680061899), + `d 13C/12C ` = c(2.19299170711751, 2.18529477038931, 2.19085700819077, 2.18410570112626, 2.18226436164115, 2.18146010548259, 2.18590870180324), + `d 18O/16O ` = c(37.3664562584672, 37.3476045904049, 37.353313758336, 37.3530212841975, 37.3656004334371, 37.358675484273, 37.3541796890982), + `d 17O/16O ` = c(6.20232096749262, 6.19288569364174, 6.19574314518756, 6.19559676140047, 6.20189262822857, 6.1984266942936, 6.19617654484061), + `AT% 13C/12C ` = c(1.10805634956821, 1.10804793388553, 1.10805401552957, 1.10804663377992, 1.10804462049446, 1.10804374113609, 1.10804860514639), + `AT% 18O/16O ` = c(0.207580927072588, 0.207577162613362, 0.207578302667939, 0.207578244264249, 0.207580756174273, 0.207579373342358, 0.20757847558426)), + .Names = c("cycle", "d 45CO2/44CO2 ", "d 46CO2/44CO2 ", "d 13C/12C ", "d 18O/16O ", "d 17O/16O ", "AT% 13C/12C ", "AT% 18O/16O "), row.names = c(NA, -7L), class = "data.frame")) + + expect_error(test$get_data_table(select = c("non column")), "Some data .* do not exist in the loaded dataTable") + + expect_equal(test$get_data_table(select = c("d 13C/12C ", "d 18O/16O "), summarize = TRUE), + structure(list(Variable = structure(1:2, .Label = c("d 13C/12C ", "d 18O/16O "), class = "factor"), + Mean = c(2.18612605082155, 37.3569787854591), + `Std. Devi.` = c(0.00430160752664701, 0.00697422407563108), + `Std. Error.` = c(0.00162585482190166, 0.00263600892739416)), + .Names = c("Variable", "Mean", "Std. Devi.", "Std. Error."), + row.names = c(NA, -2L), class = "data.frame")) + + # other information + expect_equal(test$data$`Identifier 1`, "CIT Carrara") + expect_equal(test$data$Analysis, "49077") + expect_equal(test$data$Info_Background, "Background: 8.87 mV,11.31 mV,12.98 mV,6.40 mV,1.90 mV,5.88 mV (old253)") + + # data retrieval + expect_equal(names(test$get_mass_data(masses = c("mass46", "mass49"))), c("analysis", "cycle", "mass46", "mass49")) + expect_equal(names(test$get_data_table(select = c("cycle", "d 45CO2/44CO2 ", "AT% 18O/16O "), summarize = FALSE)), + c("cycle", "d 45CO2/44CO2 ", "AT% 18O/16O ")) + + # plotting + expect_error(test$plot(), "not implemented yet") + expect_that(test$make_ggplot(masses = c("mass44", "mass45")), is_a("ggplot")) +}) + + +test_that("Testing specialized instance of dual inlet file: isodat clumped CO2", { + + expect_that(test <- suppressMessages(isoread(system.file("extdata", "dual_inlet_clumped_carbonate.did", package="isoread"), type = "CO2_CLUMPED")), is_a("IsodatFile")) + + expect_equal( + as.character(test$get_data_table(select = c("d13C", "d18O", "d17O", "at% 13C", "at% 18O"), summarize = TRUE)$Variable), + c("d13C", "d18O", "d17O", "at% 13C", "at% 18O")) + +}) \ No newline at end of file