From f832e4d82fbf6a68caf634f9b1b16cac616fcd7e Mon Sep 17 00:00:00 2001 From: hxj5 Date: Thu, 23 Feb 2023 13:50:12 +0800 Subject: [PATCH 1/2] minor update --- doc/manual.rst | 12 ++++++------ src/cellsnp.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/manual.rst b/doc/manual.rst index fa8e785..8b7536b 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -168,7 +168,7 @@ version you are using): missing REFs in the input VCF for Mode 1. --chrom STR The chromosomes to use, comma separated [1 to 22] --cellTAG STR Tag for cell barcodes, turn off with None [CB] - --UMItag STR Tag for UMI: UR, Auto, None. For Auto mode, use UR if barcodes is inputted, + --UMItag STR Tag for UMI: UR, Auto, None. For Auto mode, use UR if barcodes are inputted, otherwise use None. None mode means no UMI but read counts [Auto] --minCOUNT INT Minimum aggragated count [20] --minMAF FLOAT Minimum minor allele frequency [0.00] @@ -190,15 +190,15 @@ version you are using): Some Details: -**-b, --barcodeFile FILE** A plain file listing all effective cell barcode, e.g., the ``barcodes.tsv`` file in the CellRanger directory, ``outs/filtered_gene_bc_matrices/``. +``-b, --barcodeFile FILE`` A plain file listing all effective cell barcode, e.g., the ``barcodes.tsv`` file in the CellRanger directory, ``outs/filtered_gene_bc_matrices/``. -**-f, --refseq FILE** Faidx indexed reference sequence file. If set, the real (genomic) ref extracted from this file would be used for Mode 2 or for the missing REFs in the input VCF for Mode 1. Without this option, cellsnp-lite mode 2 would take the allele with the highest count as REF and the second highest as ALT, with little input information about the actual (genomic) reference. This is different from mode 1, which uses the REF and ALT alleles specified in the input VCF. +``-f, --refseq FILE`` Faidx indexed reference sequence file. If set, the real (genomic) ref extracted from this file would be used for Mode 2 or for the missing REFs in the input VCF for Mode 1. Without this option, cellsnp-lite mode 2 would take the allele with the highest count as REF and the second highest as ALT, with little input information about the actual (genomic) reference. This is different from mode 1, which uses the REF and ALT alleles specified in the input VCF. -**--chrom STR** The chromosomes to use, comma separated. For mode2, by default it runs on chr1 to 22 on human. For mouse, you need to specify it to 1,2,...,19 (replace the ellipsis). +``--chrom STR`` The chromosomes to use, comma separated. For mode2, by default it runs on chr1 to 22 on human. For mouse, you need to specify it to 1,2,...,19 (replace the ellipsis). -**--UMItag STR** Tag for UMI: UR, Auto, None. For Auto mode, use UR if barcodes is inputted, otherwise use None. None mode means no UMI but read counts. **For data without UMI, such as bulk RNA-seq, scDNA-seq, scATAC-seq, SMART-seq etc, please set --UMItag None**. Otherwise, all pileup counts will be zero. +``--UMItag STR`` Tag for UMI: UB, Auto, None. For Auto mode, use UB if barcodes are inputted, otherwise use None. None mode means no UMI but read counts. **For data without UMI, such as bulk RNA-seq, scDNA-seq, scATAC-seq, SMART-seq etc**, please set ``--UMItag None``. Otherwise, all pileup counts will be zero. -**--minMAF FLOAT** Minimum minor allele frequency. The parameter minMAF is minimum minor allele frequency, which is the minimum between the allele frequencies of REF and ALT for a given SNP site. Here, both allele frequencies are derived from aggregated read counts from all cells (i.e., total_REF_read / total_reads, or total_ALT_read / total_reads). This parameter can be used for SNP filtering. +``--minMAF FLOAT`` Minimum minor allele frequency. The parameter minMAF is minimum minor allele frequency, which is the minimum between the allele frequencies of REF and ALT for a given SNP site. Here, both allele frequencies are derived from aggregated read counts from all cells (i.e., total_REF_read / total_reads, or total_ALT_read / total_reads). This parameter can be used for SNP filtering. Notes diff --git a/src/cellsnp.c b/src/cellsnp.c index a6f30f7..108ea0e 100644 --- a/src/cellsnp.c +++ b/src/cellsnp.c @@ -153,7 +153,7 @@ static void print_usage(FILE *fp) { fprintf(fp, " missing REFs in the input VCF for Mode 1.\n"); fprintf(fp, " --chrom STR The chromosomes to use, comma separated [1 to %d]\n", CSP_NCHROM); fprintf(fp, " --cellTAG STR Tag for cell barcodes, turn off with None [%s]\n", CSP_CELL_TAG); - fprintf(fp, " --UMItag STR Tag for UMI: UB, Auto, None. For Auto mode, use UB if barcodes is inputted,\n"); + fprintf(fp, " --UMItag STR Tag for UMI: UB, Auto, None. For Auto mode, use UB if barcodes are inputted,\n"); fprintf(fp, " otherwise use None. None mode means no UMI but read counts [%s]\n", CSP_UMI_TAG); fprintf(fp, " --minCOUNT INT Minimum aggragated count [%d]\n", CSP_MIN_COUNT); fprintf(fp, " --minMAF FLOAT Minimum minor allele frequency [%.2f]\n", CSP_MIN_MAF); From a5b512626a7b8b5ee8f1ea70fe1b2e23c5e6bb05 Mon Sep 17 00:00:00 2001 From: hxj5 Date: Thu, 23 Feb 2023 15:04:40 +0800 Subject: [PATCH 2/2] update to v1.2.3 --- configure.ac | 2 +- doc/conf.py | 2 +- doc/manual.rst | 11 ++++++++--- doc/release.rst | 11 +++++++++++ src/config.h | 2 +- 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index cf8a034..43989c4 100644 --- a/configure.ac +++ b/configure.ac @@ -7,7 +7,7 @@ dnl - github:htslib https://github.com/samtools/htslib/blob/550c6b1b98414ef60eeb dnl - github:samtools https://github.com/samtools/samtools/blob/develop/configure.ac AC_PREREQ([2.63]) -AC_INIT([cellsnp-lite], [1.2.2], [hxj5@hku.hk]) +AC_INIT([cellsnp-lite], [1.2.3], [hxj5@hku.hk]) dnl Use subdir-objects option so that the source files in subdir `src` dnl could be compiled successfully diff --git a/doc/conf.py b/doc/conf.py index 0e3f3bc..2d182d0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -22,7 +22,7 @@ author = 'Xianjie Huang' # The full version, including alpha/beta/rc tags -release = 'v1.2.2' +release = 'v1.2.3' # -- General configuration --------------------------------------------------- diff --git a/doc/manual.rst b/doc/manual.rst index 8b7536b..967d115 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -138,7 +138,7 @@ version you are using): .. code-block:: html - Version: 1.2.2 (htslib 1.11-79-g53d7277) + Version: 1.2.3 (htslib 1.11-79-g53d7277) Usage: cellsnp-lite [options] Options: @@ -168,7 +168,7 @@ version you are using): missing REFs in the input VCF for Mode 1. --chrom STR The chromosomes to use, comma separated [1 to 22] --cellTAG STR Tag for cell barcodes, turn off with None [CB] - --UMItag STR Tag for UMI: UR, Auto, None. For Auto mode, use UR if barcodes are inputted, + --UMItag STR Tag for UMI: UB, Auto, None. For Auto mode, use UB if barcodes are inputted, otherwise use None. None mode means no UMI but read counts [Auto] --minCOUNT INT Minimum aggragated count [20] --minMAF FLOAT Minimum minor allele frequency [0.00] @@ -180,7 +180,10 @@ version you are using): (when use UMI) or UNMAP,SECONDARY,QCFAIL,DUP (otherwise)] --minLEN INT Minimum mapped length for read filtering [30] --minMAPQ INT Minimum MAPQ for read filtering [20] - --maxDEPTH INT Maximum depth for one site of one file; 0 means highest possible value [0] + --maxPILEUP INT Maximum pileup for one site of one file (including those filtered reads), + avoids excessive memory usage; 0 means highest possible value [0] + --maxDEPTH INT Maximum depth for one site of one file (excluding those filtered reads), + avoids excessive memory usage; 0 means highest possible value [0] --countORPHAN If use, do not skip anomalous read pairs. Note that the "--maxFLAG" option is now deprecated, please use "--inclFLAG" or "--exclFLAG" @@ -204,6 +207,8 @@ Some Details: Notes ----- +Since v1.2.3, ``UB``, instead of ``UR``, is used as default UMI tag when barcodes are given. + The ``Too many open files`` issue has been fixed (since v1.2.0). The issue is commonly caused by exceeding the `RLIMIT_NOFILE`_ resource limit (ie. the max number of files allowed to be opened by system for single process), which is typically 1024. Specifically, in the diff --git a/doc/release.rst b/doc/release.rst index 1cc36c6..b7ae2c1 100644 --- a/doc/release.rst +++ b/doc/release.rst @@ -2,6 +2,17 @@ History ======= +Release v1.2.3 (23/02/2023) +=========================== +* use UB instead of UR as default UMI tag when barcodes are given. +* add --maxPILEUP and update --maxDEPTH. +* fix the segmentation fault of getopt_long when given unrecognized + cmdline parameters. +* allow more cmdline options in lower case. +* print CMD, VERSION and global settings. +* improve logging output. +* update manual according to issues (till Feb 23, 2023). + Release v1.2.2 (03/11/2021) =========================== * add -f/--refseq so that the real (genomic) ref could be extracted from diff --git a/src/config.h b/src/config.h index f926935..78fe4c9 100644 --- a/src/config.h +++ b/src/config.h @@ -10,7 +10,7 @@ #define DEVELOP 0 #define CSP_NAME "cellsnp-lite" -#define CSP_VERSION "1.2.2" +#define CSP_VERSION "1.2.3" #define CSP_AUTHOR "hxj5" #define JF_ZIP_TYPE 2 // 1, gzip; 2, bgzip.