Skip to content

Commit

Permalink
Merge pull request #156 from isoverse/dev
Browse files Browse the repository at this point in the history
update to version 1.3.0
  • Loading branch information
sebkopf authored Feb 16, 2021
2 parents 223dfd0 + ba613dd commit 00d401c
Show file tree
Hide file tree
Showing 11 changed files with 94 additions and 75 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: isoreader
Title: Read Stable Isotope Data Files
Description: Interface to the raw data file formats commonly encountered in scientific disciplines that make use of stable isotopes.
Version: 1.2.8
Version: 1.3.0
Authors@R:
c(person(
given = "Sebastian", family = "Kopf",
Expand Down Expand Up @@ -41,15 +41,15 @@ Imports:
lubridate (>= 1.7.9.2),
readr (>= 1.4.0),
progress (>= 1.2.2),
xml2 (>= 1.3.1),
rhdf5 (>= 2.0.0),
UNF (>= 2.0.6)
Suggests:
devtools,
testthat,
feather (>= 0.3.5),
readxl (>= 1.3.1),
openxlsx (>= 4.1.5),
xml2 (>= 1.3.1),
rhdf5 (>= 2.0.0),
knitr,
rmarkdown,
covr
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ all: docu check
docu:
Rscript -e "devtools::document(roclets=c('rd', 'collate', 'namespace'))"

# test package functionality without all example files (= as if on CRAN)
check:
R -q -e "devtools::check(env_vars = c())"

# test package functionality without all example files
auto_test:
R -q -e "rm(list = ls()); options("isoreader.run_file_tests" = FALSE); testthat::auto_test_package()"
R -q -e "rm(list = ls()); Sys.setenv(NOT_CRAN = \"false\"); testthat::auto_test_package()"

# test all example files
# test with all example files (= as if not on CRAN)
auto_test_all:
R -q -e "rm(list = ls()); testthat::auto_test_package()"
10 changes: 0 additions & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,6 @@ importFrom(readr,parse_logical)
importFrom(readr,parse_number)
importFrom(readr,problems)
importFrom(readr,stop_for_problems)
importFrom(rhdf5,H5close)
importFrom(rhdf5,h5ls)
importFrom(rhdf5,h5read)
importFrom(rhdf5,h5readAttributes)
importFrom(rlang,"!!!")
importFrom(rlang,"!!")
importFrom(rlang,":=")
Expand Down Expand Up @@ -295,9 +291,3 @@ importFrom(vctrs,vec_ptype2.double)
importFrom(vctrs,vec_ptype2.integer)
importFrom(vctrs,vec_ptype_abbr)
importFrom(vctrs,vec_ptype_full)
importFrom(xml2,as_list)
importFrom(xml2,read_xml)
importFrom(xml2,xml_child)
importFrom(xml2,xml_children)
importFrom(xml2,xml_find_all)
importFrom(xml2,xml_text)
26 changes: 22 additions & 4 deletions R/isoread_flow_iarc.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ iso_read_flow_iarc <- function(ds, options = list()) {
if(!iso_is_file(ds) || !is(ds, "continuous_flow"))
stop("data structure must be a 'continuous_flow' iso_file", call. = FALSE)

# check for availability of xml2
if (!requireNamespace("xml2", quietly = TRUE)) {
stop(
"'xml2' package is required to read .iarc, please run: install.packages('xml2')",
call. = FALSE
)
return(invisible(iso_files))
}

# check for availability of rhdf5
if (!requireNamespace("rhdf5", quietly = TRUE)) {
stop(
"'rhdf5' package is required to read .iarc, please run: install.packages('BiocManager'); BiocManager::install('rhdf5')",
call. = FALSE
)
return(invisible(iso_files))
}

# unzipping iarc archive ====
folder_name <- ds$file_info$file_path %>% basename() %>% { str_replace(., fixed(get_file_ext(.)), "") }
folder_path <- file.path(tempdir(), folder_name)
Expand Down Expand Up @@ -170,18 +188,18 @@ process_iarc_sample_data <- function(iso_file, task, gas_configs, folder_path) {
# will also add H3 factor if part of the gas configuration
# @param iso_file
read_irms_data_file <- function(iso_file, filepath, gas_config, run_time.s, data_units = "nA", data_scaling = 1e-9) {
if (!"DataSet" %in% h5ls(filepath)$name)
if (!"DataSet" %in% rhdf5::h5ls(filepath)$name)
stop("expected DataSet attribute not present in HDF5 data file", call. = FALSE)

# attributes (NOTE: not sure what to do with the $Tuning information (usually not filled))
dataset_attributes <- h5readAttributes(filepath, "DataSet")
dataset_attributes <- rhdf5::h5readAttributes(filepath, "DataSet")
if (!dataset_attributes$Species %in% names(gas_config$species))
stop("gas configuration for species ", dataset_attributes$Species, " not specified", call. = FALSE)
config <- gas_config$species[[dataset_attributes$Species]]

# read irms data and determine which beams are used
irms_data <- h5read(filepath, "DataSet") %>% dplyr::as_tibble()
H5close() # garbage collect
irms_data <- rhdf5::h5read(filepath, "DataSet") %>% dplyr::as_tibble()
rhdf5::H5close() # garbage collect

if (!"Scan" %in% names(irms_data))
stop("Scan column missing from data file ", basename(filepath), call. = FALSE)
Expand Down
2 changes: 0 additions & 2 deletions R/package.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#' @importFrom stringr str_c str_detect str_to_title str_replace str_replace_all str_replace_na str_match str_match_all str_interp str_subset str_extract fixed
#' @importFrom methods is
#' @importFrom utils unzip head tail modifyList packageVersion
#' @importFrom xml2 xml_find_all xml_child xml_text read_xml xml_children as_list
#' @importFrom rhdf5 h5ls h5read h5readAttributes H5close
#' @importFrom UNF unf
NULL

Expand Down
42 changes: 21 additions & 21 deletions R/utils_xml_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
map_xml_children <- function(nodes, select = NULL) {
nodes %>%
map_df(function(node) {
as_list(node) %>%
xml2::as_list(node) %>%
# if select is specific, only take the children specific
{ if(is.null(select)) . else .[select[select %in% names(.)]] } %>%
# map all as text ignoring everything that does not have exactly 1 value
Expand All @@ -17,22 +17,22 @@ map_xml_children <- function(nodes, select = NULL) {
# retrieve Identifier/Value pairs from 'container' type children of current node
xml_fetch_container_value <- function(xml, ids, container = "PersistedPropertyBagProperty") {
sapply(ids, function(id) {
xml %>% xml_find_all(str_c(".//", container, "[Identifier[.='", id, "']]")) %>%
xml_child("Value") %>% xml_text() %>% list()
xml %>% xml2::xml_find_all(str_c(".//", container, "[Identifier[.='", id, "']]")) %>%
xml2::xml_child("Value") %>% xml2::xml_text() %>% list()
})
}

# XML iarc xml file processing ========

# process iarc info xml file
process_iarc_info_xml <- function(filepath) {
info_xml <- read_xml(filepath, encoding = "UTF-8")
info_version <- info_xml %>% xml_child("Version") %>% xml_text()
info_xml <- xml2::read_xml(filepath, encoding = "UTF-8")
info_version <- info_xml %>% xml2::xml_child("Version") %>% xml2::xml_text()

# retrieve processing lists information
processing_lists <-
info_xml %>% xml_child("ProcessingLists") %>%
xml_children() %>%
info_xml %>% xml2::xml_child("ProcessingLists") %>%
xml2::xml_children() %>%
map_xml_children()

# version safety check
Expand Down Expand Up @@ -69,12 +69,12 @@ process_iarc_methods_xml <- function(filepaths) {
method_params <-
filepaths %>%
lapply(function(methods_file) {
method_xml <- read_xml(methods_file, encoding = "UTF-8")
method_xml <- xml2::read_xml(methods_file, encoding = "UTF-8")
# id
method_id <- method_xml %>% xml_child("Id") %>% xml_text()
method_id <- method_xml %>% xml2::xml_child("Id") %>% xml2::xml_text()
# method parameters
method_xml %>%
xml_find_all(".//SerialisedFlowParameter") %>%
xml2::xml_find_all(".//SerialisedFlowParameter") %>%
map_xml_children() %>%
mutate(MethodId = method_id,
MethodFile = basename(methods_file))
Expand All @@ -101,20 +101,20 @@ process_iarc_tasks_xml <- function(filepaths, method_parameters) {

process_iarc_task_xml <- function(task_file) {
# read file
task_xml <- read_xml(task_file, encoding = "UTF-8")
task_xml <- xml2::read_xml(task_file, encoding = "UTF-8")

# retrieve general task info
task_info <-
c("GlobalIdentifier", "Name", "Id",
"AcquisitionStartDate", "AcquisitionEndDate", # not sure these are useful
"CompletionState", "MethodId", "ProcessingListTypeIdentifier") %>%
sapply(function(child) task_xml %>% xml_child(child) %>% xml_text() %>% list())
sapply(function(child) task_xml %>% xml2::xml_child(child) %>% xml2::xml_text() %>% list())

# retrieve task values based on methods information (if there is any)
if (nrow(method_parameters) > 0) {
task_values <-
task_xml %>%
xml_find_all(".//SerialisableTaskValue") %>%
xml2::xml_find_all(".//SerialisableTaskValue") %>%
map_xml_children() %>%
# link with parameters defined in methods
mutate(
Expand All @@ -131,7 +131,7 @@ process_iarc_tasks_xml <- function(filepaths, method_parameters) {
# retrieve task data (where the real information is recorded)
task_data <-
task_xml %>%
xml_find_all(".//SerialisableDataSet") %>%
xml2::xml_find_all(".//SerialisableDataSet") %>%
map_xml_children(
select = c("Id", "AcquireDataStatus", "AcquireStartDate", "AcquireEndDate", "TypeIdentifier")) %>%
mutate(
Expand Down Expand Up @@ -188,8 +188,8 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) {
Label <- NumeratorBeamChannel <- numerator_mass <- DenominatorBeamChannel <- denominator_mass <- NULL

# read file
xml <- read_xml(filepath, encoding = "UTF-8")
global_id <- xml %>% xml_child("DefinitionUniqueIdentifier") %>% xml_text()
xml <- xml2::read_xml(filepath, encoding = "UTF-8")
global_id <- xml %>% xml2::xml_child("DefinitionUniqueIdentifier") %>% xml2::xml_text()

# safety check
if (global_id != processing_list_id) {
Expand All @@ -201,16 +201,16 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) {
# find the species
xml_find_species <- function(node) {
# potentially useful(?): DetectionBeamChannel
node %>% xml_child("SerialisedPropertyBagProperties") %>%
node %>% xml2::xml_child("SerialisedPropertyBagProperties") %>%
xml_fetch_container_value("Species") %>% { .$Species }
}

# find the channel masses from the beam ratio definitions
xml_find_channel_masses <- function(node) {
# find the beam ratio definitions
ratio_defs <-
node %>% xml_child("SerialisedChildPropertyBags") %>%
xml_find_all(".//SerialisablePropertyBag[Identifier[.='{42D28191-A6E9-4B7B-8C3D-0F0037624F7D}']]") %>%
node %>% xml2::xml_child("SerialisedChildPropertyBags") %>%
xml2::xml_find_all(".//SerialisablePropertyBag[Identifier[.='{42D28191-A6E9-4B7B-8C3D-0F0037624F7D}']]") %>%
map(xml_fetch_container_value, c("NumeratorBeamChannel", "DenominatorBeamChannel", "Label")) %>%
bind_rows()
if (nrow(ratio_defs) == 0) return (tibble(channel = character(), mass = character()))
Expand All @@ -237,7 +237,7 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) {
# find the H3 factor
xml_find_H3_factor <- function(node) {
H3_factor <-
node %>% xml_child("SerialisedPropertyBagProperties") %>%
node %>% xml2::xml_child("SerialisedPropertyBagProperties") %>%
xml_fetch_container_value(c("ApplyH3CorrectionFactor", "H3CorrectionFactor"))
if (!is.na(H3_factor$ApplyH3CorrectionFactor) && H3_factor$ApplyH3CorrectionFactor == "True")
return(as.numeric(H3_factor$H3CorrectionFactor))
Expand All @@ -246,7 +246,7 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) {

# process channel configurations
species_config <- xml %>%
xml_find_all("//SerialisablePropertyBag[Identifier[.='10DC1602-5ED4-4D62-BAB0-2693E3FBC3AF']]") %>%
xml2::xml_find_all("//SerialisablePropertyBag[Identifier[.='10DC1602-5ED4-4D62-BAB0-2693E3FBC3AF']]") %>%
sapply(function(node) {
species <- xml_find_species(node)
if (is.null(species) || is.na(species)) # no species definition found
Expand Down
12 changes: 10 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,23 @@ You can install the latest release of isoreader from [CRAN](https://cran.r-proje
install.packages("isoreader")
```

To install the current development version directly from GitHub, please use the devtools package:
Some isoreader features including Excel and feather export depend on optional packages that are not required for the core functionality of isoreader. To use this functionality, please install the following packages manually if not already installed (isoreader will throw an informative warning if they are needed but missing):

```{r, optional-installation, eval = FALSE}
# optional extensions
install.packages(c("feather", "openxlsx", "xml2", "BiocManager"))
BiocManager::install("rhdf5")
```

To install the current development version of isoreader directly from GitHub, please use the devtools package:

```{r gh-installation, eval = FALSE}
# installs the development tools package if not yet installed
if(!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")
devtools::install_github("isoverse/isoreader")
```

Troubleshooting note: depending on your workspace and operating system, you may have to re-start your R session or manually install some dependencies. For example, the [Bioconductor](https://www.bioconductor.org/) package manager may need manual installation: `install.packages("BiocManager")`; also, the `digest` package sometimes causes trouble - re-install with `remove.packages("digest"); install.packages("digest")`.
Troubleshooting note: depending on your workspace and operating system, you may have to re-start your R session or manually install some dependencies. For example, the `digest` package sometimes causes trouble - re-install with `remove.packages("digest"); install.packages("digest")`.

## Show me some code

Expand Down
26 changes: 18 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,20 @@ You can install the latest release of isoreader from
install.packages("isoreader")
```

To install the current development version directly from GitHub, please
use the devtools package:
Some isoreader features including Excel and feather export depend on
optional packages that are not required for the core functionality of
isoreader. To use this functionality, please install the following
packages manually if not already installed (isoreader will throw an
informative warning if they are needed but missing):

``` r
# optional extensions
install.packages(c("feather", "openxlsx", "xml2", "BiocManager"))
BiocManager::install("rhdf5")
```

To install the current development version of isoreader directly from
GitHub, please use the devtools package:

``` r
# installs the development tools package if not yet installed
Expand All @@ -64,11 +76,9 @@ devtools::install_github("isoverse/isoreader")

Troubleshooting note: depending on your workspace and operating system,
you may have to re-start your R session or manually install some
dependencies. For example, the
[Bioconductor](https://www.bioconductor.org/) package manager may need
manual installation: `install.packages("BiocManager")`; also, the
`digest` package sometimes causes trouble - re-install with
`remove.packages("digest"); install.packages("digest")`.
dependencies. For example, the `digest` package sometimes causes trouble
- re-install with `remove.packages("digest");
install.packages("digest")`.

## Show me some code

Expand All @@ -89,7 +99,7 @@ iso_files <- iso_read_scan(data_folder)
#> Info: reading file 'full_scan_example.scn' with '.scn' reader...
#> Info: reading file 'peak_shape_scan_example.scn' with '.scn' reader...
#> Info: reading file 'time_scan_example.scn' with '.scn' reader...
#> Info: finished reading 4 files in 0.94 secs
#> Info: finished reading 4 files in 1.00 secs

iso_files
#> Data from 4 scan iso files:
Expand Down
18 changes: 10 additions & 8 deletions tests/testthat/test-continuous-flow.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,11 @@ test_that("test that parameter checks are performed", {
})


test_that("test that dxf files can be read", {
test_that("test that continous flow files can be read", {

# skip on CRAN to reduce checktime to below 10 minutes
skip_on_cran()

# check if tests are enabled
run_file_tests <- getOption("isoreader.run_file_tests")
if (!is.null(run_file_tests) && identical(run_file_tests, FALSE)) {
skip("Currently not testing all continuous flow data files.")
}

# test specific files
iso_turn_reader_caching_off()

Expand Down Expand Up @@ -61,14 +55,22 @@ test_that("test that dxf files can be read", {
expect_equal(nrow(iso_get_vendor_data_table(cf)), 6)
expect_equal(ncol(iso_get_vendor_data_table(cf)), 61)

# skip if optional dependencies are not installed
skip_if_not_installed("xml2")
skip_if_not_installed("rhdf5")
expect_true(file.exists(file <- iso_get_reader_example("continuous_flow_example.iarc")))
expect_is(iarc <- iso_read_continuous_flow(file), "iso_file_list")
expect_equal(nrow(problems(iarc)), 0)

})

test_that("test that additional continous flow files can be read", {

# additional test files (skip on CRAN because test files not includes due to tarball size limits) =====
skip_on_cran()
test_folder <- file.path("test_data") # test_folder <- file.path("tests", "testthat", "test_data") # direct

iso_turn_reader_caching_off()

# testing wrapper
check_continuous_flow_test_file <- function(file) {
file_path <- get_isoreader_test_file(file, local_folder = test_folder)
Expand Down
Loading

0 comments on commit 00d401c

Please sign in to comment.