diff --git a/DESCRIPTION b/DESCRIPTION index 97f49a9..9364df0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ -Date: 2024-03-04 +Date: 2024-03-16 Package: CHNOSZ -Version: 2.1.0-4 +Version: 2.1.0-5 Title: Thermodynamic Calculations and Diagrams for Geochemistry Authors@R: c( person("Jeffrey", "Dick", , "j3ffdick@gmail.com", role = c("aut", "cre"), diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index cd57fc7..f5842c9 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -15,11 +15,11 @@ \newcommand{\Cp}{\ifelse{latex}{\eqn{C_P}}{\ifelse{html}{\out{CP}}{Cp}}} \newcommand{\DG0}{\ifelse{latex}{\eqn{{\Delta}G^{\circ}}}{\ifelse{html}{\out{ΔG°}}{ΔG°}}} -\section{Changes in CHNOSZ version 2.1.0-3 (2024-03-02)}{ +\section{Changes in CHNOSZ version 2.1.0-5 (2024-03-16)}{ \itemize{ - \item Move \code{read.fasta()}, \code{count.aa()}, and \code{aasum()} to canprot package. + \item Move \code{read.fasta()}, \code{count.aa()}, and \code{aasum()} to canprot package with different names. \item Remove \code{seq2aa()}. diff --git a/inst/TODO b/inst/TODO index 4204368..ea18109 100644 --- a/inst/TODO +++ b/inst/TODO @@ -57,7 +57,3 @@ don't have to compile anything to install CHNOSZ updates). - Re-enter data from PS01 and AP01 (original units in J not cal) - subcrt(): Don't output properties in HKF region 4 - -[20220503] - -- Don't use palply in read.fasta() diff --git a/inst/tinytest/test-add.protein.R b/inst/tinytest/test-add.protein.R index d8cd69c..e99da99 100644 --- a/inst/tinytest/test-add.protein.R +++ b/inst/tinytest/test-add.protein.R @@ -12,7 +12,7 @@ ip2 <- add.protein(aa) expect_equal(ip1, ip2, info = info) info <- "Errors and messages occur in some circumstances" -expect_error(add.protein(canprot::count.aa("AAA")), "does not have the same columns as thermo\\(\\)\\$protein", info = info) +expect_error(add.protein(canprot::count_aa("AAA")), "does not have the same columns as thermo\\(\\)\\$protein", info = info) expect_message(add.protein(pinfo(pinfo("CYC_BOVIN"))), "replaced 1 existing protein\\(s\\)", info = info) info <- "group additivity for proteins gives expected values" @@ -32,12 +32,12 @@ expect_equal(Cp, lprop$Cp, tolerance = 1e-5, info = info) expect_equal(V, lprop$V, tolerance = 1e-4, info = info) expect_equal(formula, lprop$formula, info = info) -info <- "read.fasta() identifies sequences correctly and gives amino acid compositions in the correct format" +info <- "read_fasta() identifies sequences correctly and gives amino acid compositions in the correct format" ffile <- system.file("extdata/protein/rubisco.fasta", package = "CHNOSZ") -aa <- canprot::read.fasta(ffile) -expect_equal(aa[1, ], canprot::read.fasta(ffile, 1), info = info) +aa <- canprot::read_fasta(ffile) +expect_equal(aa[1, ], canprot::read_fasta(ffile, 1), info = info) # Use unlist here so that different row names are not compared -aa8 <- canprot::read.fasta(ffile, 1:8) +aa8 <- canprot::read_fasta(ffile, 1:8) expect_equal(unlist(aa[1:8, ]), unlist(aa8), info = info) expect_message(ip1 <- add.protein(aa8), "added 8 new protein\\(s\\)", info = info) expect_message(ip2 <- add.protein(aa8), "replaced 8 existing protein\\(s\\)", info = info) diff --git a/man/add.protein.Rd b/man/add.protein.Rd index f1bb11f..5940cce 100644 --- a/man/add.protein.Rd +++ b/man/add.protein.Rd @@ -45,7 +45,7 @@ protein.formula(iprotein) } \seealso{ -\code{\link[canprot]{read.fasta}} for reading amino acid compositions from FASTA files. +\code{\link[canprot]{read_fasta}} for reading amino acid compositions from FASTA files. \code{\link{pinfo}} for protein-level functions (length, chemical formulas, reaction coefficients of basis species). } diff --git a/man/rank.affinity.Rd b/man/rank.affinity.Rd index aecea80..1bdb731 100644 --- a/man/rank.affinity.Rd +++ b/man/rank.affinity.Rd @@ -41,7 +41,7 @@ Because balancing on a basis species (i.e., dividing by its reaction coefficient datfile <- system.file("extdata/cpetc/rubisco.csv", package = "CHNOSZ") fastafile <- system.file("extdata/protein/rubisco.fasta", package = "CHNOSZ") dat <- read.csv(datfile) -aa <- canprot::read.fasta(fastafile) +aa <- canprot::read_fasta(fastafile) groups <- sapply(c("A", "B", "E"), "==", dat$domain, simplify = FALSE) names(groups) <- c("Archaea", "Bacteria", "Eukaryota") ip <- add.protein(aa, as.residue = TRUE) diff --git a/vignettes/anintro.Rmd b/vignettes/anintro.Rmd index 2aa1471..d077670 100644 --- a/vignettes/anintro.Rmd +++ b/vignettes/anintro.Rmd @@ -1242,7 +1242,7 @@ Calculations based on the formulas, such as the average oxidation state of carbo Let's compare the `r zc` of Rubisco with optimal growth temperature of organisms, as shown in Figure 6a of @Dic14. First we read a CSV file with the IDs of the proteins and the optimal growth temperatures (*T*opt); the midpoint of the range of *T*opt is used for plotting. -Then we use `canprot::read.fasta()` to read a FASTA file holding the amino acid sequences of the proteins; the function returns a data frame with the amino acid counts. +Then we use `canprot::read_fasta()` to read a FASTA file holding the amino acid sequences of the proteins; the function returns a data frame with the amino acid counts. To put the proteins in the right order, the IDs in the CSV file are matched to the names of the proteins in the FASTA file. Then, we calculate `r zc` from the formulas of the proteins. Next, point symbols are assigned according to domain (Archaea, Bacteria, Eukaryota); numbers inside the symbols show the ordering of *T*opt in three temperature ranges (0--35 °C, 37.5--60 °C, and 65--100 °C). @@ -1255,7 +1255,7 @@ file.copy("rubisco.svg", fig_path(".svg")) # datfile <- system.file("extdata/cpetc/rubisco.csv", package = "CHNOSZ") # fastafile <- system.file("extdata/protein/rubisco.fasta", package = "CHNOSZ") # dat <- read.csv(datfile) -# aa <- canprot::read.fasta(fastafile) +# aa <- canprot::read_fasta(fastafile) # Topt <- (dat$T1 + dat$T2) / 2 # idat <- match(dat$ID, substr(aa$protein, 4, 9)) # aa <- aa[idat, ] @@ -1290,7 +1290,7 @@ file.copy("rubisco.svg", fig_path(".svg")) datfile <- system.file("extdata/cpetc/rubisco.csv", package = "CHNOSZ") fastafile <- system.file("extdata/protein/rubisco.fasta", package = "CHNOSZ") dat <- read.csv(datfile) -aa <- canprot::read.fasta(fastafile) +aa <- canprot::read_fasta(fastafile) Topt <- (dat$T1 + dat$T2) / 2 idat <- match(dat$ID, substr(aa$protein, 4, 9)) aa <- aa[idat, ] @@ -1339,7 +1339,7 @@ lapply(c("CHNOS", "QEC"), function(thisbasis) { By projecting the compositions of proteins into the `QEC` set of basis species, *n*`r o2` emerges as a strong indicator of oxidation state, while *n*`r h2o` is a relatively uncorrelated (i.e. independent) variable. These independent variables make it easier to distinguish the effects of oxidation and hydration state in proteomic datasets [@DYT20]. -- The [canprot](https://github.com/jedick/canprot) package has functions to calculate chemical metrics (*Z*C, *n*`r o2`, and *n*`r h2o`) directly from amino acid compositions of proteins, and to read amino acid compositions from FASTA files (`canprot::read.fasta()`). +- The [canprot](https://github.com/jedick/canprot) package has functions to calculate chemical metrics (*Z*C, *n*`r o2`, and *n*`r h2o`) directly from amino acid compositions of proteins, and to read amino acid compositions from FASTA files (`canprot::read_fasta()`). ## Normalization to residues diff --git a/vignettes/mklinks.sh b/vignettes/mklinks.sh index 2660e11..dc2f254 100755 --- a/vignettes/mklinks.sh +++ b/vignettes/mklinks.sh @@ -10,7 +10,6 @@ sed -i 's/?mosaic<\/code>/?buffer<\/a><\/code>/g' anintro.html sed -i 's/?solubility<\/code>/?solubility<\/a><\/code>/g' anintro.html sed -i 's/?ionize.aa<\/code>/?ionize.aa<\/a><\/code>/g' anintro.html -sed -i 's/?count.aa<\/code>/?count.aa<\/a><\/code>/g' anintro.html sed -i 's/?thermo<\/code>/?thermo<\/a><\/code>/g' anintro.html sed -i 's/?hkf<\/code>/?hkf<\/a><\/code>/g' anintro.html sed -i 's/?cgl<\/code>/?cgl<\/a><\/code>/g' anintro.html