diff --git a/README.md b/README.md index 2c055d7..14da4b1 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ For a lot of virus genera it is difficult to design pan-specific primers. varVAM We, in collaboration with specialists for the respective viruses, have already designed and wet-lab evaluated primer schemes for various viral pathogens. All the input data and varVAMP outputs are freely available [here](https://github.com/jonas-fuchs/ViralPrimerSchemes). -If you design primers for a particular pathogen, we will be happy to include them in this repo and make it freely available. Just contribute via an issue or pull request! +Moreover, varVAMP primers are now available at [primerschemes](https://labs.primalscheme.com/). varVAMP now reports primer bed files in ARTICv3 format. Feel free to contribute newly designed schemes via this [Github repository of the QuickLab](https://github.com/quick-lab/primerschemes). Use [primal-page](https://github.com/ChrisgKent/primal-page) developed by [Chris Kent](https://github.com/ChrisgKent) to generate data for compatible pull-requests. # Citing varVAMP diff --git a/docs/installation.md b/docs/installation.md index c2f2dc1..b7fbf37 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -21,7 +21,8 @@ pip install setuptools varvamp ### CONDA: ```shell -conda install -c bioconda varvamp +conda create -n varvamp varvamp +conda activate varvamp ``` ### DOCKER: diff --git a/docs/usage.md b/docs/usage.md index 50c6fe0..e459a4a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -43,6 +43,7 @@ optional arguments: -a , --n-ambig max number of ambiguous characters in a primer -db None, --database None location of the BLAST db -th 1, --threads 1 number of threads + --name varVAMP name of the scheme -ol 1000, --opt-length 1000 optimal length of the amplicons -ml 1500, --max-length 1500 max length of the amplicons -n inf, --report-n inf report the top n best hits @@ -58,6 +59,7 @@ optional arguments: -a , --n-ambig max number of ambiguous characters in a primer -db None, --database None location of the BLAST db -th 1, --threads 1 number of threads + --name varVAMP name of the scheme -ol 1000, --opt-length 1000 optimal length of the amplicons -ml 1500, --max-length 1500 max length of the amplicons -o 100, --overlap 100 min overlap of the amplicons @@ -73,6 +75,7 @@ optional arguments: -a , --n-ambig max number of ambiguous characters in a primer -db None, --database None location of the BLAST db -th 1, --threads 1 number of threads + --name varVAMP name of the scheme -pa , --pn-ambig max number of ambiguous characters in a probe -n 50, --test-n 50 test the top n qPCR amplicons for secondary structures at the minimal primer temperature -d -3, --deltaG -3 minimum free energy (kcal/mol/K) cutoff at the lowest primer melting temp diff --git a/requirements.txt b/requirements.txt index 98dc7a4..cb7df6a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -biopython>=1.79 -matplotlib>=3.5.1 -primer3-py>=1.1.0 -pandas>=1.4.4 -numpy>=1.23.3 -seqfold>=0.7.15 +biopython~=1.79 +matplotlib~=3.5.1 +primer3-py~=1.1.0 +pandas~=1.4.4 +numpy~=1.23.3 +seqfold~=0.7.15 diff --git a/varvamp/__init__.py b/varvamp/__init__.py index 686b47b..32b71d6 100644 --- a/varvamp/__init__.py +++ b/varvamp/__init__.py @@ -1,3 +1,3 @@ """Tool to design amplicons for highly variable virusgenomes""" _program = "varvamp" -__version__ = "1.2.0" +__version__ = "1.2.1" diff --git a/varvamp/command.py b/varvamp/command.py index bd9d173..d6cde7a 100644 --- a/varvamp/command.py +++ b/varvamp/command.py @@ -89,6 +89,13 @@ def get_args(sysargs): type=int, default=1 ) + par.add_argument( + "--name", + help="name of the scheme", + metavar="varVAMP", + type=str, + default="varVAMP" + ) for par in (SINGLE_parser, TILED_parser): par.add_argument( "-ol", @@ -261,10 +268,10 @@ def shared_workflow(args, log_file): ambiguous_consensus, alignment_cleaned ) - for type, primer_candidates in [("+", left_primer_candidates), ("-", right_primer_candidates)]: + for primer_type, primer_candidates in [("+", left_primer_candidates), ("-", right_primer_candidates)]: if not primer_candidates: logging.raise_error( - f"no {type} primers found.\n", + f"no {primer_type} primers found.\n", log_file, exit=True ) @@ -330,7 +337,7 @@ def single_and_tiled_shared_workflow(args, left_primer_candidates, right_primer_ return all_primers, amplicons -def single_workflow(args, amplicons, all_primers, log_file): +def single_workflow(args, amplicons, log_file): """ workflow part specific for single mode """ @@ -477,13 +484,13 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori return probe_regions, final_schemes -def main(sysargs=sys.argv[1:]): +def main(): """ main varvamp workflow """ # start varVAMP - args = get_args(sysargs) + args = get_args(sys.argv[1:]) if not args.verbose: sys.stdout = open(os.devnull, 'w') start_time = datetime.datetime.now() @@ -496,10 +503,10 @@ def main(sysargs=sys.argv[1:]): alignment_cleaned, majority_consensus, ambiguous_consensus, primer_regions, left_primer_candidates, right_primer_candidates = shared_workflow(args, log_file) # write files that are shared in all modes - reporting.write_regions_to_bed(primer_regions, data_dir) + reporting.write_regions_to_bed(primer_regions, args.name, data_dir) reporting.write_alignment(data_dir, alignment_cleaned) - reporting.write_fasta(data_dir, "majority_consensus", majority_consensus) - reporting.write_fasta(results_dir, "ambiguous_consensus", ambiguous_consensus) + reporting.write_fasta(data_dir, f"majority_consensus", f"{args.name}_consensus",majority_consensus) + reporting.write_fasta(results_dir, f"ambiguous_consensus", f"{args.name}_consensus", ambiguous_consensus) # Functions called from here on return lists of amplicons that are refined step-wise into final schemes. # These lists that are passed between functions and later used for reporting consist of dictionary elemnts, @@ -526,7 +533,6 @@ def main(sysargs=sys.argv[1:]): amplicon_scheme = single_workflow( args, amplicons, - all_primers, log_file ) elif args.mode == "tiled": @@ -549,11 +555,12 @@ def main(sysargs=sys.argv[1:]): else: # make sure amplicons with no off-target products and with low penalties get the lowest numbers amplicon_scheme.sort(key=lambda x: (x.get("off_targets", False), x["penalty"])) - reporting.write_all_primers(data_dir, all_primers) + reporting.write_all_primers(data_dir, args.name, all_primers) reporting.write_scheme_to_files( results_dir, amplicon_scheme, ambiguous_consensus, + args.name, args.mode, log_file ) @@ -561,10 +568,11 @@ def main(sysargs=sys.argv[1:]): results_dir, alignment_cleaned, primer_regions, + args.name, all_primers=all_primers, amplicon_scheme=amplicon_scheme, ) - reporting.per_base_mismatch_plot(results_dir, amplicon_scheme, args.threshold) + reporting.per_base_mismatch_plot(results_dir, amplicon_scheme, args.threshold, args.name) # QPCR mode if args.mode == "qpcr": @@ -583,16 +591,17 @@ def main(sysargs=sys.argv[1:]): # make sure amplicons with no off-target products and with low penalties get the lowest numbers final_schemes.sort(key=lambda x: (x.get("off_targets", False), x["penalty"])) - reporting.write_regions_to_bed(probe_regions, data_dir, "probe") - reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, log_file) + reporting.write_regions_to_bed(probe_regions, args.name, data_dir, "probe") + reporting.write_qpcr_to_files(results_dir, final_schemes, ambiguous_consensus, args.name, log_file) reporting.varvamp_plot( results_dir, alignment_cleaned, primer_regions, + args.name, probe_regions=probe_regions, amplicon_scheme=final_schemes ) - reporting.per_base_mismatch_plot(results_dir, final_schemes, args.threshold, mode="QPCR") + reporting.per_base_mismatch_plot(results_dir, final_schemes, args.threshold, args.name, mode="QPCR") # varVAMP finished logging.varvamp_progress(log_file, progress=1, start_time=start_time) diff --git a/varvamp/scripts/logging.py b/varvamp/scripts/logging.py index bdfbe03..100f0cd 100644 --- a/varvamp/scripts/logging.py +++ b/varvamp/scripts/logging.py @@ -15,12 +15,12 @@ from varvamp import __version__ -def create_dir_structure(dir): +def create_dir_structure(dir_path): """ create output folders and log file """ cwd = os.getcwd() - results_dir = os.path.join(cwd, dir) + results_dir = os.path.join(cwd, dir_path) data_dir = os.path.join(results_dir, "data/") # create folders if not os.path.exists(results_dir): @@ -567,7 +567,7 @@ def goodbye_message(): "Thank you. Come again.", ">Placeholder for your advertisement<", "Make primers great again!", - "Ciao cacao!" + "Ciao cacao!", "And now lets pray to the PCR gods.", "**bibobibobop** task finished", "Thank you for traveling with varVAMP.", @@ -581,5 +581,14 @@ def goodbye_message(): "Barba non facit philosophum.", "Task failed successfully.", "Never gonna give you up, never gonna let you down.", + "Have you tried turning it off and on again?", + "Look, I am your primer scheme.", + "Quod erat demonstrandum.", + "Miau?", + "This is an automated message informing you that you are awsome.", + "Why was the negative-sense virus angry at the positive-sense virus?\nBecause he was left stranded!", + "If you see this message twice, you are an experienced user.", + "No one expects the spanish inquisition!", + "Primer design you must." ] print(f"\n{random.choice(messages)}") diff --git a/varvamp/scripts/reporting.py b/varvamp/scripts/reporting.py index 0f7398a..235fbb2 100644 --- a/varvamp/scripts/reporting.py +++ b/varvamp/scripts/reporting.py @@ -19,11 +19,11 @@ from varvamp.scripts import logging -def write_fasta(path, seq_id, seq): +def write_fasta(path, file_name, seq_id, seq): """ write fasta files """ - name = f"{seq_id}.fasta" + name = f"{file_name}.fasta" out = os.path.join(path, name) with open(out, 'w') as o: print(f">{seq_id}\n{seq}", file=o) @@ -40,7 +40,7 @@ def write_alignment(path, alignment): print(f">{seq[0]}\n{seq[1]}", file=o) -def write_regions_to_bed(primer_regions, path, mode=None): +def write_regions_to_bed(primer_regions, scheme_name, path, mode=None): """ write primer regions as bed file """ @@ -53,7 +53,7 @@ def write_regions_to_bed(primer_regions, path, mode=None): with open(outfile, 'w') as o: for counter, region in enumerate(primer_regions): print( - "ambiguous_consensus", + f"{scheme_name}_consensus", region[0], region[1], "REGION_"+str(counter), @@ -62,24 +62,30 @@ def write_regions_to_bed(primer_regions, path, mode=None): ) -def write_primers_to_bed(outfile, primer_name, primer_properties, direction): +def write_primers_to_bed(outfile, scheme_name, primer_name, primer_properties, numeric_value, direction, sequence=None): """ write primers as bed file """ with open(outfile, 'a') as o: - print( - "ambiguous_consensus", + # write header for primer bed + if os.path.getsize(outfile) == 0 and sequence is not None: + print("#chrom\tchromStart\tchromEnd\tprimer-name\tpool\tstrand\tprimer-sequence", file=o) + data = [f"{scheme_name}_consensus", primer_properties[1], # start primer_properties[2], # stop primer_name, - round(primer_properties[3], 1), # penalty - direction, + numeric_value, # can be pool or score + direction] + if sequence is not None: + data.append(sequence) + print( + *data, sep="\t", file=o ) -def write_all_primers(path, all_primers): +def write_all_primers(path, scheme_name, all_primers): """ write all primers that varVAMP designed as bed file """ @@ -87,7 +93,7 @@ def write_all_primers(path, all_primers): for direction in all_primers: for primer in all_primers[direction]: - write_primers_to_bed(outfile, primer, all_primers[direction][primer], direction) + write_primers_to_bed(outfile, scheme_name, primer, all_primers[direction][primer], round(all_primers[direction][primer][3], 2), direction) def get_permutations(seq): @@ -120,7 +126,7 @@ def calc_mean_stats(permutations): return round(gc/len(permutations), 1), round(temp/len(permutations), 1) -def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file): +def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, scheme_name, log_file): """ write all relevant bed files and tsv file for the qPCR design """ @@ -141,10 +147,10 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file): file=tsv ) for n, amp in enumerate(final_schemes): - amp_name = f"QPCR_SCHEME_{n}" + amp_name = f"{scheme_name}_{n}" # write bed amplicon file print( - "ambiguous_consensus", + f"{scheme_name}_consensus", amp["LEFT"][1], amp["RIGHT"][2], amp_name, @@ -188,6 +194,7 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file): permutations = get_permutations(seq) gc, temp = calc_mean_stats(permutations) + primer_name = f"{amp_name}_{oligo_type}" print( amp_name, @@ -208,15 +215,18 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, log_file): # write primer bed file write_primers_to_bed( primer_bed_file, - f"{amp_name}_{oligo_type}", + scheme_name, + primer_name, amp[oligo_type], - direction + round(amp[oligo_type][3], 2), + direction, + seq.upper() ) # write fasta - print(f">{amp_name}_{oligo_type}\n{seq.upper()}", file=fasta) + print(f">{primer_name}\n{seq.upper()}", file=fasta) -def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_file): +def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_name, mode, log_file): """ write all relevant bed files and a tsv file with all primer stats """ @@ -229,7 +239,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ with open(tsv_file, "w") as tsv, open(amplicon_bed_file, "w") as bed, open(tabular_file, "w") as tabular: # write header for primer tsv print( - "amlicon_name\tamplicon_length\tprimer_name\talternate_primer_name\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty\toff_target_amplicons", + "amlicon_name\tamplicon_length\tprimer_name\tprimer_name_all_primers\tpool\tstart\tstop\tseq\tsize\tgc_best\ttemp_best\tmean_gc\tmean_temp\tpenalty\toff_target_amplicons", file=tsv ) amplicon_bed_records = [] @@ -240,12 +250,12 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ if mode == "single": primer_fasta_file = os.path.join(path, "primers.fasta") else: - primer_fasta_file = os.path.join(path, f"primers_pool_{pool}.fasta") + primer_fasta_file = os.path.join(path, f"primers_pool_{pool+1}.fasta") with open(primer_fasta_file, "w") as primer_fasta: for counter, amp in enumerate(amplicon_scheme[pool::len(pools)]): # give a new amplicon name amplicon_index = counter*len(pools) + pool - new_name = f"AMPLICON_{amplicon_index}" + amp_name = f"{scheme_name}_{amplicon_index}" # get left and right primers and their names amp_length = amp["RIGHT"][2] - amp["LEFT"][1] if "off_targets" in amp: @@ -258,14 +268,14 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ amplicon_has_off_target = "n.d." # write amplicon bed if mode == "tiled": - bed_score = pool + bed_score = pool+1 elif mode == "single": bed_score = round(amp["LEFT"][3] + amp["RIGHT"][3], 1) amplicon_bed_records.append( ( amp["LEFT"][1], amp["RIGHT"][2], - new_name, + amp_name, bed_score ) ) @@ -273,7 +283,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ ( # will need amplicon_index for sorting amplicon_index, - (f"{new_name}_LEFT", f"{new_name}_RIGHT") + (f"{amp_name}_LEFT", f"{amp_name}_RIGHT") ) ) # write primer tsv and primer bed @@ -281,9 +291,9 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ seq = ambiguous_consensus[primer[1]:primer[2]] if direction == "-": seq = primers.rev_complement(seq) - primer_name = f"{new_name}_RIGHT" + primer_name = f"{amp_name}_RIGHT" else: - primer_name = f"{new_name}_LEFT" + primer_name = f"{amp_name}_LEFT" # write primers to fasta pool file print(f">{primer_name}\n{seq.upper()}", file=primer_fasta) # calc primer parameters for all permutations @@ -291,11 +301,11 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ gc, temp = calc_mean_stats(permutations) # write tsv file print( - new_name, + amp_name, amp_length, primer_name, primer[-1], - pool, + pool+1, primer[1] + 1, primer[2], seq.upper(), @@ -313,13 +323,13 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ ( # will need amplicon_index for sorting amplicon_index, - (primer_name, primer, direction) + (primer_name, primer, pool+1, direction, seq.upper()) ) ) # write amplicon bed with amplicons sorted by start position for record in sorted(amplicon_bed_records, key=lambda x: x[0]): print( - "ambiguous_consensus", + f"{scheme_name}_consensus", *record, ".", sep="\t", @@ -336,6 +346,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, mode, log_ for record in sorted(primer_bed_records): write_primers_to_bed( primer_bed_file, + scheme_name, *record[1] ) @@ -352,7 +363,7 @@ def write_dimers(path, primer_dimers): ) for pool, primer1, primer2 in primer_dimers: print( - pool, + pool+1, primer1[1], primer2[1], round(primers.calc_dimer(primer1[2][0], primer2[2][0]).tm, 1), @@ -405,7 +416,7 @@ def alignment_entropy(alignment_cleaned): return entropy_df -def entropy_subplot(ax, alignment_cleaned): +def entropy_subplot(ax, alignment_cleaned, scheme_name): """ creates the entropy subplot """ @@ -417,7 +428,7 @@ def entropy_subplot(ax, alignment_cleaned): ax[0].set_ylim((0, 1)) ax[0].set_xlim(0, max(entropy_df["position"])) ax[0].set_ylabel("normalized Shannon's entropy") - ax[0].set_title("final amplicon design") + ax[0].set_title(f"{scheme_name} amplicon design") ax[0].spines['top'].set_visible(False) ax[0].spines['right'].set_visible(False) @@ -501,7 +512,7 @@ def qpcr_subplot(ax, amplicon_scheme): ax[1].hlines(0.75, probe[1], probe[2], linewidth=5, color="darkgrey", label="probe") -def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, amplicon_scheme=None, probe_regions=None): +def varvamp_plot(path, alignment_cleaned, primer_regions, scheme_name, all_primers=None, amplicon_scheme=None, probe_regions=None): """ creates overview plot for the amplicon design and per base coverage plots @@ -514,7 +525,7 @@ def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, ampl fig, ax = plt.subplots(2, 1, figsize=[22, 6], squeeze=True, sharex=True, gridspec_kw={'height_ratios': [4, 1]}) fig.subplots_adjust(hspace=0) # entropy plot - entropy_subplot(ax, alignment_cleaned) + entropy_subplot(ax, alignment_cleaned, scheme_name) # primer regions plot region_subplot(ax, primer_regions) # probe region plot for probes @@ -540,43 +551,32 @@ def varvamp_plot(path, alignment_cleaned, primer_regions, all_primers=None, ampl plt.close() -def get_SINGLE_TILED_primers_for_plot(amplicon_scheme): +def get_primers_for_plot(amplicon_scheme, scheme_name, mode): """ get the primers for per base pair plot (single, tiled) """ amplicon_primers = [] - for counter, amp in enumerate(amplicon_scheme): - for type in ["LEFT", "RIGHT"]: - primer_name = f"AMPLICON_{counter}_{type}" - amplicon_primers.append((primer_name, amp[type])) - - return amplicon_primers - - -def get_QPCR_primers_for_plot(amplicon_schemes): - """ - get the primers for per base pair plot (qpcr) - """ - amplicon_primers = [] + if mode == "SINGLE/TILED": + oligo_types = ["LEFT", "RIGHT"] + else: + oligo_types = ["PROBE", "LEFT", "RIGHT"] - for counter, amp in enumerate(amplicon_schemes): - for type in ["PROBE", "LEFT", "RIGHT"]: - primer_name = f"QPCR_SCHEME_{counter}_{type}" - amplicon_primers.append((primer_name, amp[type])) + for counter, amp in enumerate(amplicon_scheme): + for oligo_type in oligo_types: + primer_name = f"{scheme_name}_{counter}_{oligo_type}" + amplicon_primers.append((primer_name, amp[oligo_type])) return amplicon_primers -def per_base_mismatch_plot(path, amplicon_scheme, threshold, mode="SINGLE/TILED"): +def per_base_mismatch_plot(path, amplicon_scheme, threshold, scheme_name, mode="SINGLE/TILED"): """ per base pair mismatch multiplot """ out = os.path.join(path, "per_base_mismatches.pdf") - if mode == "SINGLE/TILED": - amplicon_primers = get_SINGLE_TILED_primers_for_plot(amplicon_scheme) - elif mode == "QPCR": - amplicon_primers = get_QPCR_primers_for_plot(amplicon_scheme) + + amplicon_primers = get_primers_for_plot(amplicon_scheme, scheme_name, mode) # ini multi pdf with PdfPages(out) as pdf: # always print 4 primers to one page