Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #109 from isoverse/dev
Browse files Browse the repository at this point in the history
upgrade master to version 1.1.4
sebkopf authored Mar 1, 2020
2 parents 9d08da8 + ba10ffb commit 7cc2076
Showing 94 changed files with 400 additions and 280 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: isoreader
Title: Read IRMS data files
Description: R interface to IRMS (isotope ratio mass spectrometry) file formats typically used in stable isotope geochemistry.
Version: 1.1.3
Version: 1.1.4
Authors@R: person("Sebastian", "Kopf", email = "sebastian.kopf@colorado.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2044-0201"))
URL: https://github.com/isoverse/isoreader
BugReports: https://github.com/isoverse/isoreader/issues
120 changes: 72 additions & 48 deletions R/isoread_cf.R
Original file line number Diff line number Diff line change
@@ -39,10 +39,11 @@ iso_read_cf <- function(ds, options = list()) {
extract_isodat_continuous_flow_vendor_data_table, ds,
cap_at_fun = function(bin) {
C_blocks <- filter(bin$C_blocks, block == "CRawData", start >= bin$pos)
if (nrow(C_blocks) > 0)
if (nrow(C_blocks) > 0) {
cap_at_next_C_block(bin, "CRawData")
else
} else {
cap_at_next_C_block(bin, "CErrorGridStorage")
}
})
}

@@ -54,59 +55,82 @@ extract_cf_raw_voltage_data <- function(ds) {
# move to beginning of intensity information (the larger block coming
ds$binary <- ds$binary %>%
set_binary_file_error_prefix("cannot identify measured masses") %>%
move_to_C_block_range("CRawDataScanStorage", "CClockScanPart")
# can have data in multiple positions (e.g. if peak jumping) throughout the rest of the binary
move_to_C_block("CRawDataScanStorage", reset_cap = TRUE)

# get gas name (only used for error reporting)
ds$binary <- ds$binary %>%
move_to_next_pattern(re_block("etx"), re_or(re_text("/"), re_text("-"), re_text(".")), re_block("fef-0"), re_block("fef-x"),
re_text("Trace Data "), max_gap = 0) %>%
capture_data("gas", "text", re_null(4), re_block("stx"))
gas_config <- ds$binary$data$gas

# data start
data_start_re <- re_combine(
re_block("stx"), re_block("fef-0"), re_block("stx"),
re_direct(".{4}", size = 4, label = ".{4}"))
ds$binary <- ds$binary %>% move_to_next_C_block("CBinary") %>% move_to_next_pattern(data_start_re, max_gap = 0)
data_start <- ds$binary$pos
# get trace positions
gas_positions <- ds$binary %>%
find_next_patterns(re_block("fef-0"), re_block("fef-x"), re_text("Trace Data "), re_block("text"), re_null(4), re_block("stx"))

# find all masses at end of data
data_end_re <- re_combine(
re_direct(".{2}", size = 2, label = ".{2}"), re_block("stx"),
re_block("fef-0"), re_block("stx"), re_null(4))
mass_re <- re_combine(re_block("fef-x"), re_text("Mass "))
mass_positions <- ds$binary %>% move_to_next_pattern(data_end_re) %>% find_next_patterns(mass_re)
# raw_data
raw_data <- tibble::tibble()

masses <- c()
for (pos in mass_positions) {
# a bit tricky to capture but this should do the trick reliably
raw_mass <-
ds$binary %>% move_to_pos(pos + mass_re$size) %>%
capture_data("mass", "raw", re_block("fef-x"), ignore_trailing_zeros = FALSE) %>%
{ .$data$mass }
text_mass <- parse_raw_data(grepRaw("^([0-9]\\x00)+", raw_mass, value = TRUE), type = "text")
masses <- c(masses, text_mass)
# loop through gas positions
for (gas_pos in gas_positions) {
ds$binary <- ds$binary %>%
move_to_pos(gas_pos) %>%
skip_pos(30) %>%
capture_data("gas", "text", re_null(4), re_block("stx"))

gas_config <- ds$binary$data$gas

# data start
data_start_re <- re_combine(
re_block("stx"), re_block("fef-0"), re_block("stx"),
re_direct(".{4}", size = 4, label = ".{4}"))
ds$binary <- ds$binary %>% move_to_next_pattern(data_start_re)
data_start <- ds$binary$pos

# find all masses at end of data
data_end_re <- re_combine(
re_direct(".{2}", size = 2, label = ".{2}"), re_block("stx"),
re_block("fef-0"), re_block("stx"), re_null(4))
ds$binary <- ds$binary %>% move_to_next_pattern(data_end_re)
data_end <- ds$binary$pos - data_end_re$size

mass_re <- re_combine(re_block("fef-x"), re_text("Mass "))
mass_positions <- ds$binary %>%
cap_at_next_pattern(re_text("MS/Clock")) %>%
find_next_patterns(mass_re)

masses <- c()
for (pos in mass_positions) {
# a bit tricky to capture but this should do the trick reliably
raw_mass <-
ds$binary %>% move_to_pos(pos + mass_re$size) %>%
capture_data("mass", "raw", re_block("fef-x"), ignore_trailing_zeros = FALSE) %>%
{ .$data$mass }
text_mass <- parse_raw_data(grepRaw("^([0-9]\\x00)+", raw_mass, value = TRUE), type = "text")
masses <- c(masses, text_mass)
}

if (is.null(masses)) stop("could not identify measured ions for gas '", gas_config, "'", call. = FALSE)
masses_columns <- str_c("v", masses, ".mV")

# read in data
n_data_points <- (data_end - data_start)/(4L + length(masses) * 8L)
if (n_data_points %% 1 > 0)
stop("number of data points for ", gas_config, " is not an integer (", n_data_points, ")", call. = FALSE)

ds$binary<- ds$binary %>%
move_to_pos(data_start) %>%
capture_n_data("voltages", c("float", rep("double", length(masses))), n_data_points)
voltages <- bind_rows(ds$binary$data$voltages %>% dplyr::as_tibble() %>% setNames(c("time.s", masses_columns)))

# check for data
if (nrow(voltages) == 0)
stop("could not find raw voltage data for gas ", gas_config, call. = FALSE)

# raw data
raw_data <- dplyr::bind_rows(raw_data, voltages)

}

if (is.null(masses)) stop("could not identify measured ions for gas '", gas_config, "'", call. = FALSE)
masses_columns <- str_c("v", masses, ".mV")

# read in data
ds$binary<- ds$binary %>%
move_to_pos(data_start) %>%
capture_data("voltages", c("float", rep("double", length(masses))), data_end_re)
voltages <- bind_rows(ds$binary$data$voltages %>% dplyr::as_tibble() %>% setNames(c("time.s", masses_columns)))

# check for data
if (nrow(voltages) == 0)
stop("could not find raw voltage data for gas ", gas_config, call. = FALSE)

# add time point column
tp <- time.s <- NULL # global vars
ds$raw_data <-
voltages %>% arrange(time.s) %>%
mutate(tp = 1:n()) %>%
select(tp, time.s, everything())
raw_data %>% arrange(.data$time.s) %>%
mutate(tp = 1:n()) %>%
select(.data$tp, .data$time.s, everything())

return(ds)
}
93 changes: 64 additions & 29 deletions R/isoread_isodat.R
Original file line number Diff line number Diff line change
@@ -30,7 +30,8 @@ extract_isodat_resistors <- function(ds) {
}

# find resistors
R_pre_re <- re_combine(re_or(re_text("/"), re_text("-"), size = 2), re_block("fef-0"), re_block("fef-0"), re_null(4), re_block("x-000"))
R_pre_re <- re_combine(re_or(re_text("/"), re_text("-"), re_text(","), re_text("."), size = 2),
re_block("fef-0"), re_block("fef-0"), re_null(4), re_block("x-000"))
R_post_re <- re_combine(re_block("x-000"))

positions <- find_next_patterns(ds$binary, R_pre_re, re_direct(".{20}", label = ".{20}"), R_post_re)
@@ -90,7 +91,7 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) {
}

# instrument reference name reg exps
instrument_pre1 <- re_combine(re_block("etx"), re_or(re_text("/"), re_text(","), re_text("-")), re_block("fef-0"), re_block("fef-x")) ###
instrument_pre1 <- re_combine(re_block("etx"), re_or(re_text("/"), re_text(","), re_text("-"), re_text(".")), re_block("fef-0"), re_block("fef-x"))
instrument_pre2 <- re_combine(re_null(4), re_block("stx"), re_block("nl"), re_text("Instrument"))
instrument_post2 <- re_combine(re_null(4), re_direct("[^\\x00]{2}", label = "[^00]{2}"), re_block("etx"))

@@ -118,9 +119,15 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) {
}

# run refs capture
start_pos <- find_next_patterns(
ds$binary, re_combine(instrument_pre1, re_block("text"), instrument_pre2))
if (length(start_pos) == 0) {
stop("could not find reference names at position ", ds$binary$pos,
", no match for search ",
instrument_pre1$label, "<NAME>", instrument_pre2$label, call. = FALSE)
}
refs <- tibble(
start_pos = find_next_patterns(
ds$binary, re_combine(instrument_pre1, re_block("text"), instrument_pre2)),
start_pos = start_pos,
data = map(start_pos, capture_ref_names)
) %>% unnest(data)

@@ -227,20 +234,34 @@ extract_isodat_sequence_line_info <- function(ds) {
move_to_next_pattern(re_text("Sequence Line Information"))

seq_line_info <- list()
# note: fef-x block seems to be used in .dxf, nl in .did
re_end_of_info <- re_combine(
re_null(4),
re_or(re_combine(re_direct("..", label = ".."),
re_block("etx")), re_block("C-block")))
while(!is.null(find_next_pattern(ds$binary, re_end_of_info))) {
ds$binary <- ds$binary %>%
move_to_next_pattern(re_text("/"), re_block("fef-x")) %>%
capture_data("value", "text", re_or(re_block("fef-x"), re_block("nl")), re_block("text"), re_null(4),
data_bytes_max = 500, move_past_dots = FALSE) %>%
move_to_next_pattern(re_or(re_block("fef-x"), re_block("nl"))) %>%
capture_data("info", "text", re_end_of_info,
data_bytes_max = 500, move_past_dots = FALSE) %>%
move_to_next_pattern(re_null(4))
re_or(re_combine(re_not_null(2), re_block("etx")),
re_block("C-block")))
caps <- find_next_patterns(ds$binary, re_end_of_info)
if (length(caps) == 0) stop("could not any data", call. = FALSE)
positions <- c(ds$binary$pos, head(caps, -1))
# note: fef-x block seems to be used in .dxf, nl in .did
re_val_var_break <- re_or(re_block("fef-x"), re_block("nl"))
re_val_var_break$size <- 4

# loop through all
for (i in 1:length(positions)) {
# capture value
ds$binary <- ds$binary %>%
move_to_pos(positions[i], reset_cap = TRUE) %>%
cap_at_pos(caps[i]) %>%
move_to_next_pattern(re_or(re_text("/"), re_text(".")), re_block("fef-x")) %>%
capture_data("value", "text", re_val_var_break, data_bytes_max = 500, move_past_dots = TRUE)

# capture info name
info_length <- (ds$binary$max_pos - ds$binary$pos)/2
if (info_length %% 1 > 0)
stop("length of sequence info for value '", ds$binary$data$value, "' is not an integer (", info_length, ")", call. = FALSE)
ds$binary <- ds$binary %>%
capture_n_data("info", "text", (ds$binary$max_pos - ds$binary$pos)/2)

# store info
if (!is.null(ds$binary$data$info))
ds$file_info[[ds$binary$data$info]] <- ds$binary$data$value
}
@@ -445,7 +466,9 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU
# NOTE: the retention time is only ALMOST repeated each time, if there are significant
# chromatographic shifts (as is alwayst he case for H2), these will in fact NOT quite be
# identical. Isodat seems to report only the major ion (first ion here) so we are doing the same
rts_df <- bind_rows(rts)
rts_df <- dplyr::bind_rows(rts) %>%
# filter out false matches
dplyr::filter(mass > 0)
if (nrow(rts_df) == 0) return(ds) # no vendor data table entries found

# retention times
@@ -471,14 +494,16 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU

### rest of data table
extracted_dt <- extract_isodat_main_vendor_data_table(ds, C_block = "CGCPeakList", cap_at_fun = cap_at_fun)
if (nrow(extracted_dt$cell_values) == 0L) {
stop("could not find any vendor table data", call. = FALSE)
}

# store vendor data table
.check. <- NULL # global var
data_table <- full_join(peaks, mutate(extracted_dt$cell_values, .check. = TRUE), by = "Nr.")
if (any(is.na(data_table$Start) | any(is.na(data_table$.check.)))) {
data_table <- full_join(peaks, mutate(extracted_dt$cell_values, .check = TRUE), by = "Nr.")
if (any(is.na(data_table$Start) | any(is.na(data_table$.check)))) {
ds <- register_warning(ds, details = "vendor data table has unexpected empty cells, process vendor table with care")
}
ds$vendor_data_table <- select(data_table, -.check.)
ds$vendor_data_table <- select(data_table, -.data$.check)

# safe information on the column units
attr(ds$vendor_data_table, "units") <-
@@ -501,9 +526,6 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU
extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL, col_include = "*",
skip_row_check = function(column, value) FALSE) {

# global vars
column <- continue_pos <- NULL

# main data table
ds$binary <- ds$binary %>%
set_binary_file_error_prefix("cannot process vendor data table") %>%
@@ -517,7 +539,7 @@ extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL

# find columns and row data for the whole data table
pre_column_re <- re_combine(
re_or(re_text("/"), re_text("-"), size = 2),
re_or(re_text("/"), re_text("-"), re_text(","), re_text("."), size = 2),
re_block("fef-0"), re_block("fef-0"), re_null(4), re_block("x-000"), re_block("fef-x"))
positions <- find_next_patterns(ds$binary, pre_column_re)

@@ -531,7 +553,11 @@ extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL
# get column name
ds$binary <- ds$binary %>%
move_to_pos(pos + pre_column_re$size) %>%
move_to_next_pattern(re_block("fef-x")) %>% # skip ID column since it is not unique in peak jumping files
# skip ID column since it is not unique in peak jumping files
move_to_next_pattern(re_block("fef-x"))

# capture column
ds$binary <- ds$binary %>%
capture_data("column", "raw", re_block("fef-x"), move_past_dots = TRUE, ignore_trailing_zeros = FALSE)

# check for columns starting with delta symbol, replace with d instead of delta symbol
@@ -552,12 +578,21 @@ extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL
# check whether still in row skip
if (skip_row) next # skip

# check if have a proper units next
if (is.null(find_next_pattern(ds$binary, re_block("text"), re_block("fef-x"), max_gap = 0))) {
# this is something else, not a proper units block
next # skip
}

# get column formatting
ds$binary <- ds$binary %>%
capture_data("format", "text", re_block("fef-x"), move_past_dots = TRUE) # retrieve format (!not always the same)
# retrieve format (!not always the same)
capture_data("format", "text", re_block("fef-x"), move_past_dots = TRUE)

# skip data columns without formatting infromation right away
if(ds$binary$data$format %in% c("", " ")) next # skip
# skip data columns without propre formatting infromation right away
if(ds$binary$data$format %in% c("", " ") || nchar(ds$binary$data$format) > 4) {
next # skip
}

# store information about new column if not already stored
if (!col %in% names(columns)) {
Loading

0 comments on commit 7cc2076

Please sign in to comment.