From 0e8bf38ad314e555200bcd13ce3a0e33a79f5ed9 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Wed, 7 Jul 2021 16:01:41 -0400 Subject: [PATCH 1/6] [ci skip] start readme clenup --- README.md | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index db442a098..bfd468c1d 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,13 @@ salmon logo -[![Build Status](https://travis-ci.org/COMBINE-lab/salmon.svg?branch=master)](https://travis-ci.org/COMBINE-lab/salmon) [![Documentation Status](https://readthedocs.org/projects/salmon/badge/?version=latest)](http://salmon.readthedocs.org/en/latest) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/salmon/README.html) -**Try out alevin (salmon's single-cell processing module)! Get started with the [tutorial](https://combine-lab.github.io/alevin-tutorial/#blog)** - -**Try out the new [alevin-fry](https://alevin-fry.readthedocs.io/en/latest/) framework for single-cell analysis!** +**Try out the new [alevin-fry](https://alevin-fry.readthedocs.io/en/latest/) framework for single-cell analysis; tutorials can be found [here](https://combine-lab.github.io/alevin-fry-tutorials/)!** **Help guide the development of Salmon, [take our survey](https://docs.google.com/forms/d/e/1FAIpQLSeWhBNE_fA_0uVHvbAlAulDmfmowv7rAYla879DZpqCARyRTQ/viewform)** -### Pre-computed decoy transcriptomes - -tl;dr: fast is good but fast and accurate is better ! -Although the precomputed decoys (<=v.14.2) are still compatible with the latest major release (v1.0.0). We recommend updating your index using the full genome, as it can give significantly higher accuracy. For more information, please check our extensive benchmarking comparing different alignment methods and their performance on RNA-seq quantification in the latest revised preprint [manuscript](https://www.biorxiv.org/content/10.1101/657874v2). -Please use the [tutorial](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) for a step-by-step guide on how to efficiently index the reference transcriptome and genome for accurate gentrome based RNA-seq quantification. - -Specifically, there are 3 possible ways in which the salmon index can be created: - -* cDNA-only index : salmon_index - https://combine-lab.github.io/salmon/getting_started/. This method will result in the smallest index and require the least resources to build, but will be the most prone to possible spurious alignments. - -* SA mashmap index: salmon_partial_sa_index - (regions of genome that have high sequence similarity to the transcriptome) - Details can be found in [this README](https://github.com/COMBINE-lab/SalmonTools/blob/master/README.md) and using [this script](https://raw.githubusercontent.com/COMBINE-lab/SalmonTools/master/scripts/generateDecoyTranscriptome.sh). While running mashmap can require considerable resources, the resulting decoy files are fairly small. This will result in an index bigger than the cDNA-only index, but still mucch smaller than the full genome index below. It will confer many, though not all, of the benefits of using the entire genome as a decoy sequence. - -* SAF genome index: salmon_sa_index - (the full genome is used as decoy) - The tutorial for creating such an index can be found [here](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/). This will result in the largest index, but likely does the best job in avoiding spurious alignments to annotated transcripts. - -**Facing problems with Indexing ?, Check if anyone else already had this problem in the issues section or fill the index generation [request form](https://forms.gle/3baJc5SYrkSWb1z48)** What is Salmon? =============== @@ -48,9 +30,25 @@ The documentation for Salmon is available on [ReadTheDocs](http://readthedocs.or Salmon is, and will continue to be, [freely and actively supported on a best-effort basis](https://oceangenomics.com/about/#open). If you need industrial-grade technical support, please consider the options at [oceangenomics.com/contact](http://oceangenomics.com/contact). +### Pre-computed decoy transcriptomes + +tl;dr: fast is good but fast and accurate is better! +Although the precomputed decoys (<=v.14.2) are still compatible with the latest major release (v1.5.1). We recommend updating your index using the full genome, as it can give significantly higher accuracy. For more information, please check our extensive benchmarking comparing different alignment methods and their performance on RNA-seq quantification in the latest revised preprint [manuscript](https://www.biorxiv.org/content/10.1101/657874v2). +Please use the [tutorial](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) for a step-by-step guide on how to efficiently index the reference transcriptome and genome for accurate gentrome based RNA-seq quantification. + +Specifically, there are 3 possible ways in which the salmon index can be created: + +* cDNA-only index : salmon_index - https://combine-lab.github.io/salmon/getting_started/. This method will result in the smallest index and require the least resources to build, but will be the most prone to possible spurious alignments. + +* SA mashmap index: salmon_partial_sa_index - (regions of genome that have high sequence similarity to the transcriptome) - Details can be found in [this README](https://github.com/COMBINE-lab/SalmonTools/blob/master/README.md) and using [this script](https://raw.githubusercontent.com/COMBINE-lab/SalmonTools/master/scripts/generateDecoyTranscriptome.sh). While running mashmap can require considerable resources, the resulting decoy files are fairly small. This will result in an index bigger than the cDNA-only index, but still mucch smaller than the full genome index below. It will confer many, though not all, of the benefits of using the entire genome as a decoy sequence. + +* SAF genome index: salmon_sa_index - (the full genome is used as decoy) - The tutorial for creating such an index can be found [here](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/). This will result in the largest index, but likely does the best job in avoiding spurious alignments to annotated transcripts. + +**Facing problems with Indexing ?, Check if anyone else already had this problem in the issues section or fill the index generation [request form](https://forms.gle/3baJc5SYrkSWb1z48)** + Chat live about Salmon ====================== -You can chat with the Salmon developers and other users via Gitter! +You can chat with the Salmon developers and other users via Gitter (**Note**: Gitter is much less frequently monitored than GitHub, so if you have an important problem or question, please consider opening an issue here on GitHub)! [![Join the chat at https://gitter.im/COMBINE-lab/salmon](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/COMBINE-lab/salmon?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) From 70839813c76cd61804e5767f4e9b8d82f635c3f4 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Wed, 7 Jul 2021 17:13:20 -0400 Subject: [PATCH 2/6] [ci skip] Update Readme --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bfd468c1d..3e96c29df 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,11 @@ The documentation for Salmon is available on [ReadTheDocs](http://readthedocs.or Salmon is, and will continue to be, [freely and actively supported on a best-effort basis](https://oceangenomics.com/about/#open). If you need industrial-grade technical support, please consider the options at [oceangenomics.com/contact](http://oceangenomics.com/contact). -### Pre-computed decoy transcriptomes +Decoy sequences in transcriptomes +================================= tl;dr: fast is good but fast and accurate is better! -Although the precomputed decoys (<=v.14.2) are still compatible with the latest major release (v1.5.1). We recommend updating your index using the full genome, as it can give significantly higher accuracy. For more information, please check our extensive benchmarking comparing different alignment methods and their performance on RNA-seq quantification in the latest revised preprint [manuscript](https://www.biorxiv.org/content/10.1101/657874v2). -Please use the [tutorial](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) for a step-by-step guide on how to efficiently index the reference transcriptome and genome for accurate gentrome based RNA-seq quantification. - -Specifically, there are 3 possible ways in which the salmon index can be created: +[Alignment and mapping methodology influence transcript abundance estimation](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02151-8), and accounting for the [accounting for fragments of unexpected origin can improve transcript quantification](https://www.biorxiv.org/content/10.1101/2021.01.17.426996v1). To this end, salmon provides the ability to index both the transcriptome as well as decoy seuqence that can be considered during mapping and quantification. The decoy sequence accounts for reads that might otherwise be (spuriously) attributed to some annotated transcript. This [tutorial](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) provides a step-by-step guide on how to efficiently index the reference transcriptome and genome to produce a decoy-aware index. Specifically, there are 3 possible ways in which the salmon index can be created: * cDNA-only index : salmon_index - https://combine-lab.github.io/salmon/getting_started/. This method will result in the smallest index and require the least resources to build, but will be the most prone to possible spurious alignments. @@ -44,7 +42,10 @@ Specifically, there are 3 possible ways in which the salmon index can be created * SAF genome index: salmon_sa_index - (the full genome is used as decoy) - The tutorial for creating such an index can be found [here](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/). This will result in the largest index, but likely does the best job in avoiding spurious alignments to annotated transcripts. -**Facing problems with Indexing ?, Check if anyone else already had this problem in the issues section or fill the index generation [request form](https://forms.gle/3baJc5SYrkSWb1z48)** +**Facing problems with Indexing?**, Check if anyone else already had this problem in the issues section or fill the index generation [request form](https://forms.gle/3baJc5SYrkSWb1z48) + +### **NOTE**: +If you are generating an index to be used for single-cell or single-nucleus quantification with [alevin-fry](https://github.com/COMBINE-lab/alevin-fry), then we recommend you consider building a spliced+intron (_splici_) reference. This serves much of the purpose of a decoy-aware index when quantifying with alevin-fry, while also providing the capability to attribute splicing status to mapped fragments. More details about the _splici_ reference and the Unspliced/Spliced/Ambiguous quantification mode it enables can be found [here](https://combine-lab.github.io/alevin-fry-tutorials/2021/improving-txome-specificity/). Chat live about Salmon ====================== From 4f6ca144120ef5aa625ce18dbacd2d4604741fe2 Mon Sep 17 00:00:00 2001 From: Avi Srivastava Date: Wed, 7 Jul 2021 20:47:27 -0400 Subject: [PATCH 3/6] changing 724 to 737 --- doc/source/alevin.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/alevin.rst b/doc/source/alevin.rst index bd3663229..49fc32d34 100644 --- a/doc/source/alevin.rst +++ b/doc/source/alevin.rst @@ -82,9 +82,9 @@ values here can speed up the run substantially. In our testing we found that usu This is an optional argument, where user can explicitly specify the whitelist CB to use for cell detection and CB sequence correction. If not given, alevin generates its own set of putative CBs. -.. note:: Not 10x 724k whitelist +.. note:: Not 10x 737k whitelist - This flag does not use the biologically known whitelist provided by 10x, instead it's per experiment level whitelist file e.g. the file generated by cellranger with the name `barcodes.tsv`. + This flag does not use the technologically defined whitelisted cellular barcodes provided by 10x, instead it's a per experiment level list of subsampled cellular barcodes that need to quantified for consistency with other tools for example an input would be a file generated by cellranger with the name `barcodes.tsv` (uncompressed). """""""""""" ``--noQuant`` From 7ffe35679100dc095a9931ba715884509f3465c9 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Mon, 19 Jul 2021 23:46:33 -0400 Subject: [PATCH 4/6] Add reasonable cmd_info and meta_info to RAD mode This addresses #688 by adding reasonably fully featured cmd_info.json and meta_info.json files when alevin is run in RAD mode. Some fields are not present in meta_info.json, but important ones like the sequence hash are. --- include/GZipWriter.hpp | 3 ++ scripts/fetchPufferfish.sh | 4 +- src/Alevin.cpp | 16 ++++++-- src/GZipWriter.cpp | 83 ++++++++++++++++++++++++++++++++++++++ src/SalmonAlevin.cpp | 10 +++-- 5 files changed, 108 insertions(+), 8 deletions(-) diff --git a/include/GZipWriter.hpp b/include/GZipWriter.hpp index 8ea847aca..3bc59202a 100644 --- a/include/GZipWriter.hpp +++ b/include/GZipWriter.hpp @@ -41,6 +41,9 @@ class GZipWriter { template bool writeMeta(const SalmonOpts& opts, const ExpT& experiment, const MappingStatistics& mstats); + template + bool writeMetaFryMode(const SalmonOpts& opts, const SCExpT& experiment, const MappingStatistics& mstats); + template bool writeMetaAlevin(const AlevinOpts& opts, boost::filesystem::path aux_dir); diff --git a/scripts/fetchPufferfish.sh b/scripts/fetchPufferfish.sh index 3f17ebeea..b4173500f 100755 --- a/scripts/fetchPufferfish.sh +++ b/scripts/fetchPufferfish.sh @@ -22,8 +22,8 @@ if [ -d ${INSTALL_DIR}/src/pufferfish ] ; then rm -fr ${INSTALL_DIR}/src/pufferfish fi -SVER=salmon-v1.5.1 -#SVER=develop +#SVER=salmon-v1.5.1 +SVER=develop #SVER=sketch-mode EXPECTED_SHA256=468e0c23a32d81524f7acadc8326efb155628970c15fd6cb843d26a61478bfde diff --git a/src/Alevin.cpp b/src/Alevin.cpp index d7215d61d..3701c7937 100644 --- a/src/Alevin.cpp +++ b/src/Alevin.cpp @@ -890,14 +890,24 @@ void initiatePipeline(AlevinOpts& aopt, if (aopt.just_align) { // if we are just aligning + + // write out the cmd_info.json to make sure we have that + boost::filesystem::path outputDirectory = vm["output"].as(); + bool isWriteOk = aut::writeCmdInfo(outputDirectory / "cmd_info.json", orderedOptions); + if(!isWriteOk){ + fmt::print(stderr, "Writing cmd_info.json in output directory failed.\nExiting now."); + exit(1); + } + + // do the actual mapping auto rc = alevin_sc_align(aopt, sopt, orderedOptions); + if (rc == 0) { aopt.jointLog->info("sc-align successful."); - std::exit(0); } else { aopt.jointLog->error("sc-align exited with return code {}", rc); - std::exit(rc); } + std::exit(rc); } /* @@ -946,7 +956,7 @@ void initiatePipeline(AlevinOpts& aopt, // Write out information about the command / run bool isWriteOk = aut::writeCmdInfo(cmdInfoPath / "cmd_info.json", orderedOptions); if(!isWriteOk){ - fmt::print(stderr, "writing in output directory failed\n Exiting Now"); + fmt::print(stderr, "Writing cmd_info.json in output directory failed.\nExiting now."); exit(1); } } diff --git a/src/GZipWriter.cpp b/src/GZipWriter.cpp index f3ce91d0b..1719d8188 100644 --- a/src/GZipWriter.cpp +++ b/src/GZipWriter.cpp @@ -891,6 +891,84 @@ bool GZipWriter::writeMeta(const SalmonOpts& opts, const ExpT& experiment, const return true; } +/** + * Write the ``main'' metadata to file when executing in alevin-fry mode. Currently this + * writes a stripped down version of meta_info.json: + * -- A json file with information about the run + */ +template +bool GZipWriter::writeMetaFryMode(const SalmonOpts& opts, const ExpT& experiment, const MappingStatistics& mstats) { + + namespace bfs = boost::filesystem; + using salmon::utils::DuplicateTargetStatus; + + bfs::path auxDir = path_ / opts.auxDir; + bool auxSuccess = boost::filesystem::create_directories(auxDir); + auto numBootstraps = 0; + auto numSamples = 0; + + bfs::path info = auxDir / "meta_info.json"; + { + std::ofstream os(info.string()); + cereal::JSONOutputArchive oa(os); + + std::string sampType = "none"; + auto& transcripts = experiment.transcripts(); + oa(cereal::make_nvp("salmon_version", std::string(salmon::version))); + oa(cereal::make_nvp("samp_type", sampType)); + + std::string optType = "rad_mode"; + oa(cereal::make_nvp("opt_type", optType)); + + std::vector errors; + oa(cereal::make_nvp("quant_errors", errors)); + + auto libStrings = getLibTypeStrings(experiment); + oa(cereal::make_nvp("num_libraries", libStrings.size())); + oa(cereal::make_nvp("library_types", libStrings)); + + auto has_dups = experiment.index_retains_duplicates(); + switch(has_dups) { + case DuplicateTargetStatus::RETAINED_DUPLICATES: + oa(cereal::make_nvp("keep_duplicates", true)); + break; + case DuplicateTargetStatus::REMOVED_DUPLICATES: + oa(cereal::make_nvp("keep_duplicates", false)); + break; + case DuplicateTargetStatus::UNKNOWN: + default: + break; + } + + auto numValidTargets = transcripts.size(); + auto numDecoys = experiment.getNumDecoys(); + oa(cereal::make_nvp("num_valid_targets", numValidTargets)); + oa(cereal::make_nvp("num_decoy_targets", numDecoys)); + + oa(cereal::make_nvp("length_classes", experiment.getLengthQuantiles())); + oa(cereal::make_nvp("index_seq_hash", experiment.getIndexSeqHash256())); + oa(cereal::make_nvp("index_name_hash", experiment.getIndexNameHash256())); + oa(cereal::make_nvp("index_seq_hash512", experiment.getIndexSeqHash512())); + oa(cereal::make_nvp("index_name_hash512", experiment.getIndexNameHash512())); + oa(cereal::make_nvp("index_decoy_seq_hash", experiment.getIndexDecoySeqHash256())); + oa(cereal::make_nvp("index_decoy_name_hash", experiment.getIndexDecoyNameHash256())); + oa(cereal::make_nvp("num_bootstraps", numSamples)); + oa(cereal::make_nvp("num_processed", experiment.numObservedFragments())); + oa(cereal::make_nvp("num_mapped", experiment.numMappedFragments())); + //oa(cereal::make_nvp("num_decoy_fragments", mstats.numDecoyFragments.load())); + //oa(cereal::make_nvp("num_dovetail_fragments", mstats.numDovetails.load())); + oa(cereal::make_nvp("num_fragments_filtered_vm", mstats.numFragmentsFiltered.load())); + oa(cereal::make_nvp("num_alignments_below_threshold_for_mapped_fragments_vm", + mstats.numMappingsFiltered.load())); + //oa(cereal::make_nvp("percent_mapped", + // experiment.effectiveMappingRate() * 100.0)); + oa(cereal::make_nvp("call", std::string("quant"))); + oa(cereal::make_nvp("start_time", opts.runStartTime)); + oa(cereal::make_nvp("end_time", opts.runStopTime)); + } + return true; +} + bool GZipWriter::writeAbundances( std::vector& alphas, std::vector& transcripts) { @@ -1730,6 +1808,11 @@ GZipWriter::writeMeta(const SalmonOpts& opts, const SCExpT& experiment, const MappingStatistics& mstats); +template bool +GZipWriter::writeMetaFryMode(const SalmonOpts& opts, + const SCExpT& experiment, + const MappingStatistics& mstats); + template bool GZipWriter::writeMeta>(const SalmonOpts& opts, const BulkAlignLibT& experiment, const MappingStatistics& mstats); diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp index bd15d8c90..535876f10 100644 --- a/src/SalmonAlevin.cpp +++ b/src/SalmonAlevin.cpp @@ -2668,8 +2668,8 @@ int alevin_sc_align(AlevinOpts& aopt, using std::string; namespace bfs = boost::filesystem; namespace po = boost::program_options; - try{ - //auto fileLog = sopt.fileLog; + + try { auto jointLog = aopt.jointLog; auto indexDirectory = sopt.indexDirectory; auto outputDirectory = sopt.outputDirectory; @@ -2749,6 +2749,11 @@ int alevin_sc_align(AlevinOpts& aopt, do_sc_align(experiment, sopt, mstats, sopt.numThreads, aopt); + + // write meta-information about the run + GZipWriter gzw(outputDirectory, jointLog); + sopt.runStopTime = salmon::utils::getCurrentTimeAsString(); + gzw.writeMetaFryMode(sopt, experiment, mstats); } catch (po::error& e) { std::cerr << "Exception : [" << e.what() << "]. Exiting.\n"; std::exit(1); @@ -2762,7 +2767,6 @@ int alevin_sc_align(AlevinOpts& aopt, << " alevin --help\nExiting.\n"; std::exit(1); } - return 0; } From 5c91e22e9d6729b3167bfa196035e2d3c003e261 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Thu, 22 Jul 2021 21:45:28 -0400 Subject: [PATCH 5/6] bump version prepare for 1.5.2 --- current_version.txt | 2 +- doc/source/conf.py | 2 +- docker/Dockerfile | 2 +- docker/build_test.sh | 2 +- include/SalmonConfig.hpp | 8 +-- include/concurrentqueue.h | 133 ++++++++++++++++++++++++++----------- include/cuckoohash_map.hh | 4 +- scripts/fetchPufferfish.sh | 6 +- 8 files changed, 108 insertions(+), 51 deletions(-) diff --git a/current_version.txt b/current_version.txt index 233e1d268..4a74b730d 100644 --- a/current_version.txt +++ b/current_version.txt @@ -1,3 +1,3 @@ VERSION_MAJOR 1 VERSION_MINOR 5 -VERSION_PATCH 1 +VERSION_PATCH 2 diff --git a/doc/source/conf.py b/doc/source/conf.py index 270df9eeb..362952326 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -57,7 +57,7 @@ # The short X.Y version. version = '1.5' # The full version, including alpha/beta/rc tags. -release = '1.5.1' +release = '1.5.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docker/Dockerfile b/docker/Dockerfile index a4f2d2de0..11d62350c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,7 +6,7 @@ MAINTAINER salmon.maintainer@gmail.com ENV PACKAGES git gcc make g++ libboost-all-dev liblzma-dev libbz2-dev \ ca-certificates zlib1g-dev libcurl4-openssl-dev curl unzip autoconf apt-transport-https ca-certificates gnupg software-properties-common wget -ENV SALMON_VERSION 1.5.1 +ENV SALMON_VERSION 1.5.2 # salmon binary will be installed in /home/salmon/bin/salmon diff --git a/docker/build_test.sh b/docker/build_test.sh index 1eac40c90..1690b2c68 100644 --- a/docker/build_test.sh +++ b/docker/build_test.sh @@ -1,3 +1,3 @@ #! /bin/bash -SALMON_VERSION=1.5.1 +SALMON_VERSION=1.5.2 docker build --no-cache -t combinelab/salmon:${SALMON_VERSION} -t combinelab/salmon:latest . diff --git a/include/SalmonConfig.hpp b/include/SalmonConfig.hpp index 85160af21..695983a01 100644 --- a/include/SalmonConfig.hpp +++ b/include/SalmonConfig.hpp @@ -9,13 +9,13 @@ the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - Sailfish is distributed in the hope that it will be useful, + Salmon is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with Sailfish. If not, see . + along with Salmon. If not, see .
(::GetCurrentThreadId()); } } } -#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL) namespace moodycamel { namespace details { static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); @@ -225,18 +233,43 @@ namespace moodycamel { namespace details { #endif #endif +namespace moodycamel { namespace details { #ifndef MOODYCAMEL_ALIGNAS -// VS2013 doesn't support alignas or alignof +// VS2013 doesn't support alignas or alignof, and align() requires a constant literal #if defined(_MSC_VER) && _MSC_VER <= 1800 #define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) #define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned::value, T>::type + template struct Vs2013Aligned { }; // default, unsupported alignment + template struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; }; + template struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; }; + template struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; }; + template struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; }; + template struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; }; + template struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; }; + template struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; }; + template struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; }; + template struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; }; #else + template struct identity { typedef T type; }; #define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) #define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity::type #endif #endif +} } +// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, +// we can apply per-function compile-time suppression. +// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef MOODYCAMEL_NO_TSAN + #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif // TSAN +#endif // TSAN // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { @@ -339,6 +372,12 @@ struct ConcurrentQueueDefaultTraits // that this limit is enforced at the block level (for performance reasons), i.e. // it's rounded up to the nearest block size. static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; #ifndef MCDBGQ_USE_RELACY @@ -1608,7 +1647,7 @@ class ConcurrentQueue private: static_assert(std::alignment_of::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); - MOODYCAMEL_ALIGNAS(MOODYCAMEL_ALIGNOF(T)) char elements[sizeof(T) * BLOCK_SIZE]; + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; public: Block* next; std::atomic elementsCompletelyDequeued; @@ -1649,7 +1688,7 @@ class ConcurrentQueue { } - virtual ~ProducerBase() { }; + virtual ~ProducerBase() { } template inline bool dequeue(U& element) @@ -1858,7 +1897,7 @@ class ConcurrentQueue ++pr_blockIndexSlotsUsed; } - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { // The constructor may throw. We want the element not to appear in the queue in // that case (without corrupting the queue): MOODYCAMEL_TRY { @@ -1884,7 +1923,7 @@ class ConcurrentQueue blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } @@ -1998,7 +2037,7 @@ class ConcurrentQueue } template - bool enqueue_bulk(It itemFirst, size_t count) + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) { // First, we need to make sure we have enough room to enqueue all of the elements; // this means pre-allocating blocks and putting them in the block index (but only if @@ -2100,7 +2139,7 @@ class ConcurrentQueue block = block->next; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } } @@ -2115,11 +2154,11 @@ class ConcurrentQueue this->tailBlock = firstAllocatedBlock; } while (true) { - auto stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); if (details::circular_less_than(newTailIndex, stopIndex)) { stopIndex = newTailIndex; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { while (currentTailIndex != stopIndex) { new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); } @@ -2134,7 +2173,7 @@ class ConcurrentQueue // may only define a (noexcept) move constructor, and so calls to the // cctor will not compile, even if they are in an if branch that will never // be executed - new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); ++currentTailIndex; ++itemFirst; } @@ -2181,8 +2220,9 @@ class ConcurrentQueue this->tailBlock = this->tailBlock->next; } - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) { - blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } this->tailIndex.store(newTailIndex, std::memory_order_release); @@ -2199,7 +2239,7 @@ class ConcurrentQueue desiredCount = desiredCount < max ? desiredCount : max; std::atomic_thread_fence(std::memory_order_acquire); - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);; + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); tail = this->tailIndex.load(std::memory_order_acquire); auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); @@ -2226,7 +2266,7 @@ class ConcurrentQueue auto index = firstIndex; do { auto firstIndexInBlock = index; - auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; auto block = localBlockIndex->entries[indexIndex].block; if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { @@ -2460,8 +2500,8 @@ class ConcurrentQueue newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty(); - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { // May throw, try to insert now before we publish the fact that we have this new block MOODYCAMEL_TRY { new ((*newBlock)[currentTailIndex]) T(std::forward(element)); @@ -2479,7 +2519,7 @@ class ConcurrentQueue this->tailBlock = newBlock; - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } @@ -2563,6 +2603,10 @@ class ConcurrentQueue return false; } +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4706) // assignment within conditional expression +#endif template bool enqueue_bulk(It itemFirst, size_t count) { @@ -2598,6 +2642,7 @@ class ConcurrentQueue auto head = this->headIndex.load(std::memory_order_relaxed); assert(!details::circular_less_than(currentTailIndex, head)); bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (full || !(indexInserted = insert_block_index_entry(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) { // Index allocation or block allocation failed; revert any other allocations // and index insertions done so far for this operation @@ -2648,11 +2693,11 @@ class ConcurrentQueue this->tailBlock = firstAllocatedBlock; } while (true) { - auto stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); if (details::circular_less_than(newTailIndex, stopIndex)) { stopIndex = newTailIndex; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { while (currentTailIndex != stopIndex) { new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); } @@ -2660,7 +2705,7 @@ class ConcurrentQueue else { MOODYCAMEL_TRY { while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); ++currentTailIndex; ++itemFirst; } @@ -2712,6 +2757,9 @@ class ConcurrentQueue this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif template size_t dequeue_bulk(It& itemFirst, size_t max) @@ -2743,7 +2791,7 @@ class ConcurrentQueue auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); do { auto blockStartIndex = index; - auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; auto entry = localBlockIndex->index[indexIndex]; @@ -2841,7 +2889,7 @@ class ConcurrentQueue if (localBlockIndex == nullptr) { return false; // this can happen if new_block_index failed in the constructor } - auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); idxEntry = localBlockIndex->index[newTail]; if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || idxEntry->value.load(std::memory_order_relaxed) == nullptr) { @@ -3411,7 +3459,7 @@ class ConcurrentQueue } auto newHash = new (raw) ImplicitProducerHash; - newHash->capacity = newCapacity; + newHash->capacity = static_cast(newCapacity); newHash->entries = reinterpret_cast(details::align_for(raw + sizeof(ImplicitProducerHash))); for (size_t i = 0; i != newCapacity; ++i) { new (newHash->entries + i) ImplicitProducerKVP; @@ -3525,23 +3573,26 @@ class ConcurrentQueue template static inline void* aligned_malloc(size_t size) { - if (std::alignment_of::value <= std::alignment_of::value) + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) return (Traits::malloc)(size); - size_t alignment = std::alignment_of::value; - void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); - if (!raw) - return nullptr; - char* ptr = details::align_for(reinterpret_cast(raw) + sizeof(void*)); - *(reinterpret_cast(ptr) - 1) = raw; - return ptr; + else { + size_t alignment = std::alignment_of::value; + void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); + if (!raw) + return nullptr; + char* ptr = details::align_for(reinterpret_cast(raw) + sizeof(void*)); + *(reinterpret_cast(ptr) - 1) = raw; + return ptr; + } } template static inline void aligned_free(void* ptr) { - if (std::alignment_of::value <= std::alignment_of::value) + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) return (Traits::free)(ptr); - (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); + else + (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); } template @@ -3647,7 +3698,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue& queue) : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) { initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = -1; + lastKnownGlobalOffset = static_cast(-1); } template @@ -3655,7 +3706,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) { initialOffset = reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = -1; + lastKnownGlobalOffset = static_cast(-1); } template @@ -3682,6 +3733,10 @@ inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, ty } -#if defined(__GNUC__) +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) #pragma GCC diagnostic pop #endif diff --git a/include/cuckoohash_map.hh b/include/cuckoohash_map.hh index 88f1f4334..a1824a61d 100644 --- a/include/cuckoohash_map.hh +++ b/include/cuckoohash_map.hh @@ -705,7 +705,9 @@ private: // true if the key is small and simple, which means using partial keys for // lookup would probably slow us down static constexpr bool is_simple() { - return std::is_pod::value && sizeof(key_type) <= 8; + return std::is_standard_layout::value && + std::is_trivial::value && + sizeof(key_type) <= 8; } // Whether or not the data is nothrow-move-constructible. diff --git a/scripts/fetchPufferfish.sh b/scripts/fetchPufferfish.sh index b4173500f..b800fc133 100755 --- a/scripts/fetchPufferfish.sh +++ b/scripts/fetchPufferfish.sh @@ -22,11 +22,11 @@ if [ -d ${INSTALL_DIR}/src/pufferfish ] ; then rm -fr ${INSTALL_DIR}/src/pufferfish fi -#SVER=salmon-v1.5.1 -SVER=develop +SVER=salmon-v1.5.2 +#SVER=develop #SVER=sketch-mode -EXPECTED_SHA256=468e0c23a32d81524f7acadc8326efb155628970c15fd6cb843d26a61478bfde +EXPECTED_SHA256=86c7ff465d40b8184dca7f6afee693ad1db63be5bf63242161ea39d3507d6d25 mkdir -p ${EXTERNAL_DIR} curl -k -L https://github.com/COMBINE-lab/pufferfish/archive/${SVER}.zip -o ${EXTERNAL_DIR}/pufferfish.zip From 96ea980f8b671dbaf4f65116f2d5cc957b77e35c Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Thu, 22 Jul 2021 23:08:05 -0400 Subject: [PATCH 6/6] trigger develop build --- src/Alevin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Alevin.cpp b/src/Alevin.cpp index 3701c7937..9debe68fa 100644 --- a/src/Alevin.cpp +++ b/src/Alevin.cpp @@ -1020,7 +1020,6 @@ salmon-based processing of single-cell RNA-seq data. green[3] = '0' + static_cast(fmt::GREEN); red[3] = '0' + static_cast(fmt::RED); - bool noTgMap {false}; bool dropseq = vm["dropseq"].as(); bool indrop = vm["indrop"].as();