diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cbc52f0..fe53a1a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ * `nanoplot`: Plotting tool for long read sequencing data and alignments (PR #95). +* `sgedemux`: demultiplexing sequencing data generated on Singular Genomics' sequencing instruments (PR #). + ## BUG FIXES * `falco`: Fix a typo in the `--reverse_complement` argument (PR #157). diff --git a/src/sgdemux/config.vsh.yaml b/src/sgdemux/config.vsh.yaml new file mode 100644 index 00000000..bb21a7a0 --- /dev/null +++ b/src/sgdemux/config.vsh.yaml @@ -0,0 +1,212 @@ +name: sgdemux +description: | + Demultiplex sequence data generated on Singular Genomics' sequencing instruments. +keywords: ["demultiplex", "fastq"] +links: + repository: https://github.com/Singular-Genomics/singular-demux +license: Proprietairy +requirements: + commands: [sgdemux] +authors: + - __merge__: /src/_authors/dries_schaumont.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Input + arguments: + - name: "--fastqs" + alternatives: [-f] + type: file + description: Path to the input FASTQs, or path prefix if not a file + required: true + multiple: true + example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq + - name: --sample_metadata + alternatives: ["-s"] + type: file + description: Path to the sample metadata CSV file including sample names and barcode sequences + required: true + + - name: Output + arguments: + - name: "--sample_fastq" + direction: "output" + type: file + description: The directory containing demultiplexed sample FASTQ files. + required: true + example: "output/" + - name: "--metrics" + direction: "output" + type: file + required: false + description: | + Demultiplexing summary statisitcs: + - control_reads_omitted: The number of reads that were omitted for being control reads. + - failing_reads_omitted: The number of reads that were omitted for having failed QC. + - total_templates: The total number of template reads that were output. + example: metrics.tsv + - name: "--most_frequent_unmatched" + direction: output + type: file + required: false + description: | + It contains the (approximate) counts of the most prevelant observed barcode sequences + that did not match to one of the expected barcodes. Can only be created when 'most_unmatched_to_output' + is not set to 0. + example: most_frequent_unmatched.tsv + - name: "--sample_barcode_hop_metrics" + direction: output + type: file + required: false + description: | + File containing the frequently observed barcodes that are unexpected + combinations of expected barcodes in a dual-indexed run. + example: sample_barcode_hop_metrics.tsv + - name: --per_project_metrics + type: file + required: false + direction: output + description: | + Aggregates the metrics by project (aggregates the metrics across samples with the same project) and + has the same columns as `--metrics`. In this case, sample_ID will contain the project name (or None if no project is given). + THe barcode will contain all Ns. The undetermined sample will not be aggregated with any other sample. + example: per_project_metrics.tsv + - name: --per_sample_metrics + direction: output + type: file + required: false + description: | + Tab-separated file containing statistics per sample. + example: per_sample_metrics.tsv + - name: Arguments + arguments: + - name: --read_structures + alternatives: ["-r"] + type: string + description: Read structures, one per input FASTQ. Do not provide when using a path prefix for FASTQs + required: false + multiple: true + - name: --allowed_mismatches + alternatives: ["-m"] + type: integer + description: Number of allowed mismatches between the observed barcode and the expected barcode + example: 1 + - name: --min_delta + alternatives: ["-d"] + type: integer + description: The minimum allowed difference between an observed barcode and the second closest expected barcode + example: 2 + - name: --free_ns + alternatives: ["-F"] + type: integer + description: Number of N's to allow in a barcode without counting against the allowed_mismatches + example: 1 + - name: --max_no_calls + alternatives: ["-N"] + type: integer + description: | + Max no-calls (N's) in a barcode before it is considered unmatchable. + A barcode with total N's greater than 'max_no_call' will be considered unmatchable. + required: false + - name: --quality_mask_threshold + type: integer + multiple: true + alternatives: [-M] + description: | + Mask template bases with quality scores less than specified value(s). + Sample barcode/index and UMI bases are never masked. If provided either a single value, + or one value per FASTQ must be provided. + required: false + - name: --filter_control_reads + alternatives: [-C] + type: boolean_true + description: Filter out control reads + - name: "--filter_failing_quality" + alternatives: [-Q] + type: boolean_true + description: | + Filter reads failing quality filter + - name: "--output_types" + alternatives: [-T] + multiple: true + type: string + description: | + The types of output FASTQs to write. + For each read structure, all segment types listed will be output to a FASTQ file. + + These may be any of the following: + - `T` - Template bases + - `B` - Sample barcode bases + - `M` - Molecular barcode bases + - `S` - Skip bases + choices: ["T", "B", "S", "M"] + example: T + - name: --undetermined_sample_name + alternatives: ["-u"] + type: string + example: Undetermined + description: | + The sample name for undetermined reads (reads that do not match an expected barcode) + - name: --most_unmatched_to_output + alternatives: ["-U"] + type: integer + description: | + Output the most frequent "unmatched" barcodes up to this number. + If set to 0 unmatched barcodes will not be collected, improving overall performance. + example: 1000 + - name: "--override_matcher" + type: string + description: | + If the sample barcodes are > 12 bp long, a cached hamming distance matcher is used. + If the barcodes are less than or equal to 12 bp long, all possible matches are precomputed. + This option allows for overriding that heuristic. + choices: [cached-hamming-distance, pre-compute] + - name: --skip_read_name_check + type: boolean_true + description: | + If this is true, then all the read names across FASTQs will not be enforced to be the same. + This may be useful when the read names are known to be the same and performance matters. + Regardless, the first read name in each FASTQ will always be checked. + - name: "--sample_barcode_in_fastq_header" + type: boolean_true + description: | + If this is true, then the sample barcode is expected to be in the FASTQ read header. + For dual indexed data, the barcodes must be `+` (plus) delimited. Additionally, if true, + then neither index FASTQ files nor sample barcode segments in the read structure may be specified. + - name: "--metric_prefix" + type: string + description: | + Prepend this prefix to all output metric file names + - name: "--lane" + type: integer + multiple: true + alternatives: ["-l"] + description: | + Select a subset of lanes to demultiplex. Will cause only samples and input FASTQs with + the given `Lane`(s) to be demultiplexed. Samples without a lane will be ignored, and + FASTQs without lane information will be ignored + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: +- type: docker + image: continuumio/miniconda3:latest + setup: + - type: apt + packages: + - procps + - type: docker + run: | + conda install -c conda-forge -c bioconda sgdemux && \ + echo "sgdemux: $(sgdemux --version | cut -d' ' -f2)" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow diff --git a/src/sgdemux/help.txt b/src/sgdemux/help.txt new file mode 100644 index 00000000..4782eb02 --- /dev/null +++ b/src/sgdemux/help.txt @@ -0,0 +1,166 @@ +███████╗██╗███╗ ██╗ ██████╗ ██╗ ██╗██╗ █████╗ ██████╗ +██╔════╝██║████╗ ██║██╔════╝ ██║ ██║██║ ██╔══██╗██╔══██╗ +███████╗██║██╔██╗ ██║██║ ███╗██║ ██║██║ ███████║██████╔╝ +╚════██║██║██║╚██╗██║██║ ██║██║ ██║██║ ██╔══██║██╔══██╗ +███████║██║██║ ╚████║╚██████╔╝╚██████╔╝███████╗██║ ██║██║ ██║ +╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ ╚═════╝ ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ + + ██████╗ ███████╗███╗ ██╗ ██████╗ ███╗ ███╗██╗ ██████╗███████╗ +██╔════╝ ██╔════╝████╗ ██║██╔═══██╗████╗ ████║██║██╔════╝██╔════╝ +██║ ███╗█████╗ ██╔██╗ ██║██║ ██║██╔████╔██║██║██║ ███████╗ +██║ ██║██╔══╝ ██║╚██╗██║██║ ██║██║╚██╔╝██║██║██║ ╚════██║ +╚██████╔╝███████╗██║ ╚████║╚██████╔╝██║ ╚═╝ ██║██║╚██████╗███████║ + ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═════╝╚══════╝ + +Performs sample demultiplexing on block-compressed (BGZF) FASTQs. + +Input FASTQs must be block compressed (e.g. with `bgzip`). A single bgzipped FASTQ file +should be provided per instrument read. One read structure should be provided per input FASTQ. + +Per-sample files with suffixes like _R1.fastq.gz will be written to the output directory specified with --output. + +The sample metadata file may be a Sample Sheet or a simple two-column CSV file with headers. +The Sample Sheet may haave a `[Demux]` section for command line options, and must have a `[Data]` +section for sample information. The `Sample_ID` column must contain a unique, non-empty identifier +for each sample. Both `Index1_Sequence` and `Index2_Sequence` must be present with values for +indexed runs. For non-indexed runs, a single sample must be given with an empty value for the +`Index1_Sequence` and `Index2_Sequence` columns. For the simple two-column CSV, the +`Sample_Barcode` column must contain the unique set of sample barcode bases for the sample(s). + +Example invocation: + +sgdemux \ + --fastqs R1.fq.gz R2.fq.gz I1.fq.gz \ + --read-structures +T +T 8B \ + --sample-metadata samples.csv \ + --output demuxed-fastqs/ + +For complete documentation see: https://github.com/Singular-Genomics/singular-demux +For support please contact: care@singulargenomics.com + +USAGE: + sgdemux [OPTIONS] --sample-metadata --output-dir + +OPTIONS: + -f, --fastqs ... + Path to the input FASTQs, or path prefix if not a file + + -s, --sample-metadata + Path to the sample metadata + + -r, --read-structures ... + Read structures, one per input FASTQ. Do not provide when using a path prefix for FASTQs + + -o, --output-dir + The directory to write outputs to. + + This tool will overwrite existing files. + + -m, --allowed-mismatches + Number of allowed mismatches between the observed barcode and the expected barcode + + [default: 1] + + -d, --min-delta + The minimum allowed difference between an observed barcode and the second closest expected barcode + + [default: 2] + + -F, --free-ns + Number of N's to allow in a barcode without counting against the allowed_mismatches + + [default: 1] + + -N, --max-no-calls + Max no-calls (N's) in a barcode before it is considered unmatchable. + + A barcode with total N's greater than `max_no_call` will be considered unmatchable. + + [default: None] + + -M, --quality-mask-threshold ... + Mask template bases with quality scores less than specified value(s). + + Sample barcode/index and UMI bases are never masked. If provided either a single value, or one value per FASTQ must be provided. + + -C, --filter-control-reads + Filter out control reads + + -Q, --filter-failing-quality + Filter reads failing quality filter + + -T, --output-types + The types of output FASTQs to write. + + These may be any of the following: + - `T` - Template bases + - `B` - Sample barcode bases + - `M` - Molecular barcode bases + - `S` - Skip bases + + For each read structure, all segment types listed by `--output-types` will be output to a + FASTQ file. + + [default: T] + + -u, --undetermined-sample-name + The sample name for undetermined reads (reads that do not match an expected barcode) + + [default: Undetermined] + + -U, --most-unmatched-to-output + Output the most frequent "unmatched" barcodes up to this number. + + If set to 0 unmatched barcodes will not be collected, improving overall performance. + + [default: 1000] + + -t, --demux-threads + Number of threads for demultiplexing. + + The number of threads to use for the process of determining which input reads should be assigned to which sample. + + [default: 4] + + --compressor-threads + Number of threads for compression the output reads. + + The number of threads to use for compressing reads that are queued for writing. + + [default: 12] + + --writer-threads + Number of threads for writing compressed reads to output. + + The number of threads to have writing reads to their individual output files. + + [default: 5] + + --override-matcher + Override the matcher heuristic. + + If the sample barcodes are > 12 bp long, a cached hamming distance matcher is used. If the barcodes are less than or equal to 12 bp long, all possible matches are precomputed. + + This option allows for overriding that heuristic. + + [default: None] + + [possible values: cached-hamming-distance, pre-compute] + + --skip-read-name-check + If this is true, then all the read names across FASTQs will not be enforced to be the same. This may be useful when the read names are known to be the same and performance matters. Regardless, the first read name in each FASTQ will always be checked + + --sample-barcode-in-fastq-header + If this is true, then the sample barcode is expected to be in the FASTQ read header. For dual indexed data, the barcodes must be `+` (plus) delimited. Additionally, if true, then neither index FASTQ files nor sample barcode segments in the read structure may be specified + + --metric-prefix + Prepend this prefix to all output metric file names + + -l, --lane ... + Select a subset of lanes to demultiplex. Will cause only samples and input FASTQs with the given `Lane`(s) to be demultiplexed. Samples without a lane will be ignored, and FASTQs without lane information will be ignored + + -h, --help + Print help information + + -V, --version + Print version information \ No newline at end of file diff --git a/src/sgdemux/script.sh b/src/sgdemux/script.sh new file mode 100644 index 00000000..7fd5091f --- /dev/null +++ b/src/sgdemux/script.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +set -eo pipefail + +## VIASH START +## VIASH END + +unset_if_false=( + par_filter_control_reads + par_filter_failing_quality + par_skip_read_name_check + par_sample_barcode_in_fastq_header +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done + +# Create arrays for inputs that contain multiple arguments +IFS=";" read -ra fastqs <<< "$par_fastqs" +IFS=";" read -ra read_structures <<< "$par_read_structures" +IFS=";" read -ra lane <<< "$par_lane" +IFS=";" read -ra quality_mask_threashold <<< "$par_quality_mask_threshold" +IFS=";" read -ra output_types <<< "$par_output_types" + +echo "> Creating temporary directory" +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT +echo "> Temporary directory '$TMPDIR' created" + +if [ "$par_most_unmatched_to_output" -eq "0" ] && [ ! -z "$par_most_frequent_unmatched" ]; then + echo "Requested to output 'most_frequent_unmatched' file, but 'most_unmatched_to_output' is set to 0." + exit 1 +fi + +args=( + --fastqs ${fastqs[@]} + --sample-metadata "$par_sample_metadata" + --output-dir "$TMPDIR" + ${par_allowed_mismatches:+--allowed-mismatches $par_allowed_mismatches} + ${par_min_delta:+--min-delta $par_min_delta} + ${par_free_ns:+--free-ns $par_free_ns} + ${par_max_no_calls:+--max-no-calls $par_max_no_calls} + ${quality_mask_threashold:+--quality-mask-threshold "${quality_mask_threashold[*]}" } + ${output_types:+--output-types "${output_types[*]}"} + ${par_undetermined_sample_name:+--undetermined-sample-name ${par_undetermined_sample_name}} + ${par_most_unmatched_to_output:+--par-most-unmatched-to-output ${par_most_unmatched_to_output}} + ${par_override_matcher:+--override-matcher $par_override_matcher} + ${par_metric_prefix:+--metric-prefix $par_metric_prefix} + ${lane:+--lane "${lane[*]}"} + ${read_structures:+--read-structures ${read_structures[*]}} + ${par_filter_control_reads:+--filter-control-reads} + ${par_filter_failing_quality:+--filter-failing-quality} + ${par_skip_read_name_check:+--skip-read-name-check} + ${par_sample_barcode_in_fastq_header:+--sample-barcode-in-fastq-header} +) + +echo "> Running sgdemux with arguments: ${args[@]}" +sgdemux ${args[@]} +echo "> Done running sgdemux" + +echo "> Copying FASTQ files to $par_sample_fastq" +find "$TMPDIR" -type f -name "*.fastq.gz" -exec mv '{}' "$par_sample_fastq" \; + +declare -A output_files=(["metrics.tsv"]="par_metrics" + ["most_frequent_unmatched.tsv"]="par_most_frequent_unmatched" + ["sample_barcode_hop_metrics.tsv"]="par_sample_barcode_hop_metrics" + ["per_project_metrics.tsv"]="par_per_project_metrics" + ["per_sample_metrics.tsv"]="par_per_sample_metrics" + ) + +for output_file_name in "${!output_files[@]}"; do + output_arg_variable_name=${output_files[$output_file_name]} + destination="${!output_arg_variable_name}" + if [ ! -z "$destination" ]; then + echo "> Copying $output_file file to $destination" + output_file="$TMPDIR/$output_file_name" + if [ ! -f "$output_file" ]; then + echo "Expected a '$output_file_name' to have been created! Exiting..." + exit 1 + fi + cp "$output_file" "$destination" + fi +done + +echo "> Finished!" \ No newline at end of file diff --git a/src/sgdemux/test.sh b/src/sgdemux/test.sh new file mode 100644 index 00000000..548b4ec8 --- /dev/null +++ b/src/sgdemux/test.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +set -eou pipefail + +# Helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} + + +# create temporary directory and clean up on exit +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -rf "$TMPDIR" +} +trap clean_up EXIT + +output_test1="$TMPDIR/output1" +mkdir "$output_test1" +sample_dir_test_1="$output_test1/fastq" +mkdir "$sample_dir_test_1" + +"$meta_executable" \ + --fastqs "$meta_resources_dir/test_data/fastq" \ + --sample_metadata "$meta_resources_dir/test_data/samplesheet.csv" \ + --sample_fastq "$sample_dir_test_1" \ + --metrics "$output_test1/metrics.tsv" \ + --most_frequent_unmatched "$output_test1/most_frequent_unmatched.tsv" \ + --sample_barcode_hop_metrics "$output_test1/sample_barcode_hop_metrics.tsv" \ + --per_sample_metrics "$output_test1/per_sample_metrics.tsv" \ + --per_project_metrics "$output_test1/per_project_metrics.tsv" + +# Check for correct number of output FASTQ files +readarray -d '' output_fastq < <(find "$sample_dir_test_1" -name "*.fastq.gz" -print0) +if (( ${#output_fastq[@]} != "196" )); then + echo "Wrong number of output fastq files found." + exit 1 +fi + +# Check if fastq files are not empty +for fastq in ${output_fastq[@]}; do + assert_file_not_empty "$fastq" +done + +# Checking if requested output files exist +assert_file_exists "$output_test1/metrics.tsv" +assert_file_exists "$output_test1/most_frequent_unmatched.tsv" +assert_file_exists "$output_test1/sample_barcode_hop_metrics.tsv" +assert_file_exists "$output_test1/per_sample_metrics.tsv" +assert_file_exists "$output_test1/per_project_metrics.tsv" + +# Checking output file contents +diff -q "$meta_resources_dir/test_data/expected/metrics.tsv" "$output_test1/metrics.tsv" || \ + (echo "Incorrect metrics.tsv output!" && exit 1) + +diff -q "$meta_resources_dir/test_data/expected/per_project_metrics.tsv" "$output_test1/per_project_metrics.tsv" || \ + (echo "Incorrect per_project_metrics.tsv output!" && diff exit 1) + +diff -q "$meta_resources_dir/test_data/expected/per_sample_metrics.tsv" "$output_test1/per_sample_metrics.tsv" || \ + (echo "Incorrect per_sample_metrics.tsv output!" && exit 1) \ No newline at end of file diff --git a/src/sgdemux/test_data/.gitignore b/src/sgdemux/test_data/.gitignore new file mode 100644 index 00000000..5fc9cc14 --- /dev/null +++ b/src/sgdemux/test_data/.gitignore @@ -0,0 +1,2 @@ +unfiltered_fastq +unfiltered_fastq.tar \ No newline at end of file diff --git a/src/sgdemux/test_data/expected/metrics.tsv b/src/sgdemux/test_data/expected/metrics.tsv new file mode 100644 index 00000000..f8743410 --- /dev/null +++ b/src/sgdemux/test_data/expected/metrics.tsv @@ -0,0 +1,2 @@ +control_reads_omitted failing_reads_omitted total_templates +0 0 40000 diff --git a/src/sgdemux/test_data/expected/per_project_metrics.tsv b/src/sgdemux/test_data/expected/per_project_metrics.tsv new file mode 100644 index 00000000..c5956b3d --- /dev/null +++ b/src/sgdemux/test_data/expected/per_project_metrics.tsv @@ -0,0 +1,3 @@ +barcode_name library_name barcode templates perfect_matches one_mismatch_matches q20_bases q30_bases total_number_of_bases fraction_matches ratio_this_barcode_to_best_barcode frac_q20_bases frac_q30_bases mean_index_base_quality +None None NNNNNNNNNNNNNNNNNNNNNNNN 39120 38085 1035 11379704 10715554 11736000 0.978 1.0 0.9696407634628493 0.9130499318336741 34.25803723585549 +Undetermined Undetermined NNNNNNNNNNNNNNNNNNNNNNNN 880 0 0 248629 228186 264000 0.022 0.022494887525562373 0.9417765151515152 0.8643409090909091 32.029592803030305 diff --git a/src/sgdemux/test_data/expected/per_sample_metrics.tsv b/src/sgdemux/test_data/expected/per_sample_metrics.tsv new file mode 100644 index 00000000..55a3f930 --- /dev/null +++ b/src/sgdemux/test_data/expected/per_sample_metrics.tsv @@ -0,0 +1,99 @@ +barcode_name library_name barcode templates perfect_matches one_mismatch_matches q20_bases q30_bases total_number_of_bases fraction_matches ratio_this_barcode_to_best_barcode frac_q20_bases frac_q30_bases mean_index_base_quality +Index1 Index1 TAAGACCCTACTGGGACATATTGA 407 398 9 118274 111544 122100 0.010175 0.2840195394277739 0.9686650286650287 0.9135462735462736 34.96007371007371 +Index2 Index2 CGAAGTACATCCTAGGACGTAACG 411 403 8 119540 112432 123300 0.010275 0.2868108862526169 0.9695052716950527 0.9118572587185726 33.54622871046229 +Index3 Index3 TAGCCTTCCAAAAGTATGGCAAGA 411 391 20 119407 112307 123300 0.010275 0.2868108862526169 0.968426601784266 0.9108434712084347 35.30869829683698 +Index4 Index4 GCCTTTCAAGTCTAGAGTCGTCGT 393 380 13 114900 108296 117900 0.009825 0.27424982554082344 0.9745547073791349 0.9185411365564037 33.596692111959285 +Index5 Index5 CAACGGTTCCGGACGTTTCGCTCG 333 326 7 96836 91049 99900 0.008325 0.23237962316817865 0.9693293293293294 0.9114014014014014 33.295670670670674 +Index6 Index6 GTTGCATGGCCCTAGGGAACGATG 358 348 10 104555 98438 107400 0.00895 0.24982554082344732 0.9735102420856611 0.9165549348230912 33.8824487895717 +Index7 Index7 ATCGTTGCTATCATGACTCCGCAT 405 398 7 117691 110628 121500 0.010125 0.2826238660153524 0.9686502057613169 0.9105185185185185 33.678909465020574 +Index8 Index8 CCTCGAATTCATGGTTGCTACCGG 349 339 10 101314 95441 104700 0.008725 0.2435450104675506 0.9676599808978033 0.9115663801337154 34.25059694364852 +Index9 Index9 TGAACGTCCGCCTCCTCGATTGAA 488 477 11 142525 134668 146400 0.0122 0.3405443126308444 0.9735314207650273 0.919863387978142 34.09366461748634 +Index10 Index10 CATCTAGCAAGCATGTAGCGTCTC 470 457 13 136840 129261 141000 0.01175 0.3279832519190509 0.9704964539007093 0.9167446808510639 34.048758865248224 +Index11 Index11 TATCGAGGCAACCATCATGCGTAC 443 429 14 127995 120114 132900 0.011075 0.3091416608513608 0.9630925507900677 0.9037923250564334 34.151523702031604 +Index12 Index12 GAGACGTAGCAAACCTTGACCGGG 305 301 4 88991 83801 91500 0.007625 0.21284019539427773 0.9725792349726776 0.9158579234972678 34.09685792349727 +Index13 Index13 ATCATGCGCCCGTTGACGAGATCT 433 428 5 126373 119226 129900 0.010825 0.30216329378925333 0.9728483448806774 0.9178290993071594 33.628752886836025 +Index14 Index14 AGGAGCTAGGGAGGGCTAATGTCA 423 413 10 123319 116323 126900 0.010575 0.29518492672714586 0.9717809298660363 0.9166509062253743 35.20439322301024 +Index15 Index15 ATCGACCATGCTTTAGGAGCGAAC 509 495 14 148123 139913 152700 0.012725 0.35519888346127004 0.9700261951538965 0.9162606417812704 33.92788146692862 +Index16 Index16 TGCGAATCGACAGTACATCGAGTA 385 379 6 112208 106006 115500 0.009625 0.2686671318911375 0.9714978354978355 0.9178008658008658 34.0754329004329 +Index17 Index17 ATGTTCCCCTCTAGGCTTTGTCAT 408 398 10 118038 110515 122400 0.0102 0.2847173761339846 0.9643627450980392 0.9029003267973856 34.84089052287582 +Index18 Index18 TCGCTCATCTAGCGACGATATTTG 339 325 14 98862 93167 101700 0.008475 0.23656664340544312 0.972094395280236 0.9160963618485742 33.497173058013765 +Index19 Index19 CCTAAGGTAAACAAACTCCGTTGT 369 346 23 107000 100347 110700 0.009225 0.2575017445917655 0.966576332429991 0.9064769647696477 34.50621047877146 +Index20 Index20 GAATAGCGCTTAACGTACCAAGAC 373 361 12 109064 103043 111900 0.009325 0.2602930914166085 0.9746559428060768 0.9208489722966935 34.24474977658624 +Index21 Index21 CGATGTACATCCTCCGATGTCGGC 358 350 8 103582 97059 107400 0.00895 0.24982554082344732 0.9644506517690875 0.9037150837988827 33.41980912476723 +Index22 Index22 CAAGTCGAAACCAGGTTACCGCGT 353 339 14 102683 96579 105900 0.008825 0.2463363572923936 0.9696222851746931 0.9119830028328612 33.82872993389991 +Index23 Index23 GTAACGGATAGCATGCCGAAACGT 359 346 13 103868 97314 107700 0.008975 0.2505233775296581 0.9644196843082637 0.9035654596100279 33.227251624883934 +Index24 Index24 GAAGCTTGGTCAACATACGCGGGG 294 288 6 85741 80490 88200 0.00735 0.20516399162595952 0.9721201814058957 0.9125850340136055 33.92743764172336 +Index25 Index25 AACCCGTAACCAGGATCTAGGACG 377 352 25 109783 103195 113100 0.009425 0.2630844382414515 0.9706719717064545 0.9124226348364279 33.73596374889478 +Index26 Index26 AATGCTCCCCTATCGACTCTCCGT 402 394 8 117878 111620 120600 0.01005 0.28053035589672015 0.9774295190713101 0.9255389718076286 33.359038142620236 +Index27 Index27 GTATGACGGATGTACGCTAGACAA 345 335 10 99083 92197 103500 0.008625 0.24075366364270762 0.9573236714975846 0.890792270531401 33.13345410628019 +Index28 Index28 GCAAAGCTTGGAATTTCGGGTAAG 354 344 10 102631 96324 106200 0.00885 0.24703419399860432 0.9663935969868174 0.9070056497175141 34.30426082862524 +Index29 Index29 TCTAACCGGCTACTAGCCAACGCC 332 320 12 95787 89529 99600 0.0083 0.2316817864619679 0.9617168674698795 0.8988855421686747 34.03752510040161 +Index30 Index30 ATTGGAGCCCGCTGATAGCCGGTT 327 323 4 94891 88955 98100 0.008175 0.22819260293091417 0.9672884811416922 0.9067787971457696 33.80338939857288 +Index31 Index31 TGTCCGATCTATGCATGGTTCCTA 376 366 10 108952 102197 112800 0.0094 0.26238660153524074 0.965886524822695 0.9060017730496454 33.57890070921986 +Index32 Index32 ACATCGCATGTTGACGGTAATGAG 391 381 10 114169 107781 117300 0.009775 0.27285415212840197 0.973307757885763 0.918849104859335 33.2770673486786 +Index33 Index33 ACTTCCGACAATCGAAGTCATCAA 455 437 18 131991 123910 136500 0.011375 0.31751570132588974 0.966967032967033 0.9077655677655677 34.097802197802196 +Index34 Index34 GCTCCTATGCCTTGGACTACAAAC 405 392 13 118042 111426 121500 0.010125 0.2826238660153524 0.9715390946502057 0.9170864197530865 34.9059670781893 +Index35 Index35 CATAACGCGAATATACGCGACATT 379 368 11 110301 104021 113700 0.009475 0.264480111653873 0.9701055408970977 0.914872471416007 33.94426121372032 +Index36 Index36 CGGACAATGATTTCCCTATGATAC 489 477 12 141195 132440 146700 0.012225 0.34124214933705516 0.9624744376278118 0.9027948193592366 34.75587934560327 +Index37 Index37 GCTACTTGGAAAGAGCTCTACGCA 342 330 12 99495 93814 102600 0.00855 0.23866015352407538 0.9697368421052631 0.9143664717348928 35.11464424951267 +Index38 Index38 TAAGCGAGTAGTCTAGAGGTTACC 443 436 7 128507 120437 132900 0.011075 0.3091416608513608 0.9669450714823176 0.9062227238525207 34.315744920993225 +Index39 Index39 TGCTTGACTCCGAACCCTTGTTCG 376 360 16 109593 103162 112800 0.0094 0.26238660153524074 0.9715691489361702 0.9145567375886525 33.58577127659574 +Index40 Index40 GACATCGTCGGGGTCGTAAGGGGT 346 334 12 100588 94635 103800 0.00865 0.24145150034891835 0.9690558766859345 0.9117052023121387 33.464715799614645 +Index41 Index41 AGCTATGGGACGTATACCCGGCCC 369 361 8 106939 100204 110700 0.009225 0.2575017445917655 0.9660252935862692 0.9051851851851852 34.12782294489612 +Index42 Index42 ACGACTAGGCTCTGCTAAGCGAGC 445 428 17 129387 121303 133500 0.011125 0.31053733426378227 0.9691910112359551 0.9086367041198502 33.87059925093633 +Index43 Index43 TACCGTACGATACGTCCTAAAACT 410 399 11 119141 112060 123000 0.01025 0.28611304954640615 0.9686260162601626 0.9110569105691056 34.009959349593494 +Index44 Index44 TAGAAGGCGCGTATATCGGGTAAG 337 329 8 97825 91982 101100 0.008425 0.23517096999302164 0.9676063303659743 0.9098120672601385 33.83469337289812 +Index45 Index45 CGTCATCAAGGAGAGACGTTCTTA 353 345 8 102459 96200 105900 0.008825 0.2463363572923936 0.9675070821529745 0.9084041548630784 34.14199716713881 +Index46 Index46 CCGTAAGATAGAGCTTAACGATCA 393 385 8 114883 108408 117900 0.009825 0.27424982554082344 0.9744105173876166 0.9194910941475827 34.751166242578456 +Index47 Index47 TAGACTCGTTTCCGCTAGTACTAT 335 326 9 97703 92042 100500 0.008375 0.23377529658060014 0.9721691542288557 0.9158407960199005 33.91417910447761 +Index48 Index48 TATCGGCTTGGTACGGGTTATTAG 340 332 8 98879 93215 102000 0.0085 0.23726448011165388 0.9694019607843137 0.9138725490196078 33.5484068627451 +Index49 Index49 TCAAGAGCGGAGCGGACTTTTGTA 286 279 7 83104 78361 85800 0.00715 0.19958129797627355 0.9685780885780886 0.9132983682983683 34.25728438228438 +Index50 Index50 TTACCCGTAGAATCCATTGCTTCT 430 421 9 125125 117868 129000 0.01075 0.3000697836706211 0.9699612403100775 0.9137054263565891 34.980135658914726 +Index51 Index51 GCTCTCAATCGGGTCTCATGGCGG 362 356 6 105671 99926 108600 0.00905 0.2526168876482903 0.9730294659300184 0.9201289134438305 33.95683701657459 +Index52 Index52 GTCTACGTTTACTGACTTGGAGAA 424 414 10 123910 116846 127200 0.0106 0.2958827634333566 0.9741352201257861 0.9186006289308176 35.046776729559745 +Index53 Index53 TCCGTATGAGACACCGTATCCGAT 388 377 11 113079 106509 116400 0.0097 0.27076064200976974 0.9714690721649485 0.9150257731958763 33.57796391752577 +Index54 Index54 CGCCAATACGTCCTATGGGACGGT 376 367 9 110118 104393 112800 0.0094 0.26238660153524074 0.9762234042553192 0.9254698581560283 34.23282358156028 +Index55 Index55 GATGGTCTAGCATAGTTCCCATTC 424 415 9 123407 116497 127200 0.0106 0.2958827634333566 0.9701808176100629 0.9158569182389937 34.653793238993714 +Index56 Index56 CTCGCTTAAGGCCTCCAAGACATC 378 367 11 109899 103233 113400 0.00945 0.26378227494766227 0.9691269841269842 0.9103439153439153 34.77436067019401 +Index57 Index57 GGCAACATGGGTATTACCGCGGTA 391 384 7 113735 107024 117300 0.009775 0.27285415212840197 0.9696078431372549 0.9123955669224212 34.57107843137255 +Index58 Index58 AGACTCTCATCACTAAGCTCCTAA 453 441 12 132414 124981 135900 0.011325 0.31612002791346827 0.9743487858719647 0.919654157468727 35.748804267844 +Index59 Index59 TGACAAGGTCAATGAACGTCCTTC 418 408 10 121688 114809 125400 0.01045 0.2916957431960921 0.9703987240829346 0.9155422647527911 34.78807814992025 +Index60 Index60 CGGTATGTCATCCCATGCAATCTA 380 373 7 110218 103476 114000 0.0095 0.26517794836008374 0.9668245614035088 0.9076842105263158 34.43344298245614 +Index61 Index61 GACTCATGAATGTAGCGTTCATTG 472 462 10 137703 130082 141600 0.0118 0.32937892533147245 0.9724788135593221 0.9186581920903955 34.63162076271186 +Index62 Index62 CGTAGACATTGAAGCATTCGAGCC 376 366 10 108875 101826 112800 0.0094 0.26238660153524074 0.9652039007092199 0.9027127659574468 34.237477836879435 +Index63 Index63 CATTCGCTCCCTAACCTCGAACAT 432 410 22 126027 119036 129600 0.0108 0.30146545708304257 0.9724305555555556 0.9184876543209877 33.62287808641975 +Index64 Index64 ACAATCGGGGACCAAATCGCGGAA 392 378 14 113976 107226 117600 0.0098 0.2735519888346127 0.9691836734693877 0.9117857142857143 34.69834183673469 +Index65 Index65 GGACTTAGAGCGGTCAAGAGGTTA 383 372 11 112077 105968 114900 0.009575 0.267271458478716 0.9754308093994778 0.9222628372497824 35.06647084421236 +Index66 Index66 GACCGATTCTCGTTAAGGCCGGGA 413 404 9 120172 113111 123900 0.010325 0.2882065596650384 0.969911218724778 0.9129217110573042 34.24606537530266 +Index67 Index67 TGGAAACCCGAGTCGAAAGGGAAA 384 371 13 111892 105331 115200 0.0096 0.26796929518492674 0.9712847222222222 0.9143315972222222 34.3740234375 +Index68 Index68 GGCCTAATGGAAGGAGTCAAATAG 412 405 7 119626 112676 123600 0.0103 0.2875087229588276 0.9678478964401295 0.9116181229773462 35.35507686084142 +Index69 Index69 TTGTACGCGTACCCGTTCTATACA 375 364 11 108748 102143 112500 0.009375 0.26168876482903003 0.9666488888888889 0.9079377777777777 33.481 +Index70 Index70 ATGTCGAGTTGCTGCGAATGCGAA 368 363 5 106772 100222 110400 0.0092 0.2568039078855548 0.9671376811594203 0.9078079710144927 34.03238224637681 +Index71 Index71 CTTCGTACCTCCGCGATCATGACT 370 354 16 106512 99008 111000 0.00925 0.25819958129797627 0.9595675675675676 0.8919639639639639 33.420495495495494 +Index72 Index72 TTAGGTCCGAGACCGAAATCCAAC 376 370 6 109689 103402 112800 0.0094 0.26238660153524074 0.9724202127659575 0.9166843971631206 34.535904255319146 +Index73 Index73 CTAGCTCTTCGTTCGGAGTTTTAC 408 401 7 117925 110354 122400 0.0102 0.2847173761339846 0.9634395424836601 0.9015849673202614 34.30514705882353 +Index74 Index74 CTTGTCCAACTTCTATCCGTCCCG 484 473 11 140913 132578 145200 0.0121 0.3377529658060014 0.9704752066115703 0.9130716253443526 34.36923209366391 +Index75 Index75 CTTAGCGACCCATAACGCGTACCC 380 367 13 109975 103107 114000 0.0095 0.26517794836008374 0.9646929824561403 0.9044473684210527 33.75416666666667 +Index76 Index76 CGTAGGTTAACACTCGTACTTAGC 435 434 1 127211 120150 130500 0.010875 0.3035589672016748 0.9747969348659004 0.9206896551724137 34.482183908045975 +Index77 Index77 AGCATTCCATGTGACTCGAAATGA 407 396 11 117815 110721 122100 0.010175 0.2840195394277739 0.964905814905815 0.9068058968058968 34.646191646191646 +Index78 Index78 TCGTTACCAACGTTAACCGGCCGA 429 417 12 124538 116764 128700 0.010725 0.29937194696441033 0.9676612276612276 0.9072571872571873 34.04127816627817 +Index79 Index79 TTGCTAGGACATTTCCTAGCAACC 413 401 12 119819 112447 123900 0.010325 0.2882065596650384 0.9670621468926553 0.9075625504439063 34.92070217917676 +Index80 Index80 CGAGACTTCTACGAATAGCGTCCC 342 336 6 99060 92765 102600 0.00855 0.23866015352407538 0.9654970760233919 0.9041423001949318 34.132797270955166 +Index81 Index81 GGTCTATGTTTGAATGACGGATGT 392 380 12 113778 106959 117600 0.0098 0.2735519888346127 0.9675 0.909515306122449 33.21226615646258 +Index82 Index82 GATGCCATAGTAAGTAGCTCGTCC 375 366 9 108822 102411 112500 0.009375 0.26168876482903003 0.9673066666666666 0.91032 34.473555555555556 +Index83 Index83 GTACGAGTTCCTTGGCCATTCTCC 420 404 16 121987 114521 126000 0.0105 0.2930914166085136 0.9681507936507937 0.9088968253968254 34.47152777777778 +Index84 Index84 TTCCATCGGTAGACGCCATAACCG 404 396 8 117727 110971 121200 0.0101 0.2819260293091417 0.9713448844884488 0.9156023102310231 34.18492161716171 +Index85 Index85 ACGCTATCATCTCGTTAACCCGCT 350 344 6 101640 95665 105000 0.00875 0.24424284717376135 0.968 0.9110952380952381 34.33047619047619 +Index86 Index86 GTCCAAGAGTTCTTGAGCCTTGCT 463 457 6 134610 126653 138900 0.011575 0.32309839497557574 0.9691144708423326 0.9118286537077034 35.270608351331894 +Index87 Index87 CGAATGGTAAGAACCCGTAACAAG 390 376 14 114157 107708 117000 0.00975 0.2721563154221912 0.9757008547008547 0.9205811965811965 34.15737179487179 +Index88 Index88 AGGTTTCGGTATAAAGGTCGCCCC 430 419 11 124474 116755 129000 0.01075 0.3000697836706211 0.9649147286821705 0.9050775193798449 33.74437984496124 +Index89 Index89 GGTTCAAGATACTACGAGCTCCTC 383 373 10 111039 104146 114900 0.009575 0.267271458478716 0.9663968668407311 0.9064055700609226 34.52708877284595 +Index90 Index90 GTTAAGCGGGCGGGCATAGAGCGA 400 395 5 116794 110269 120000 0.01 0.2791346824842987 0.9732833333333333 0.9189083333333333 35.131145833333335 +Index91 Index91 AAGCTACGCGAAGGTAGCATTCGG 421 410 11 121916 114493 126300 0.010525 0.29378925331472433 0.9652889944576405 0.9065162311955661 34.24851543942993 +Index92 Index92 TTGAGCGTCCGACGACAAGTTCGA 406 398 8 117940 111084 121800 0.01015 0.28332170272156315 0.9683087027914614 0.9120197044334976 33.79176929392447 +Index93 Index93 CCTATGCAATTGGGCAATGTACTT 464 457 7 134945 126964 139200 0.0116 0.32379623168178645 0.9694324712643678 0.9120977011494252 35.02846623563219 +Index94 Index94 CTATTGCCTACGCCGGTAAACCCA 421 412 9 122369 115190 126300 0.010525 0.29378925331472433 0.9688756927949327 0.9120348376880444 35.04127078384798 +Index95 Index95 CCTCTTGAAGAGGCCCATTAGATT 383 374 9 111657 105311 114900 0.009575 0.267271458478716 0.9717754569190601 0.9165448215839861 35.01142297650131 +Index96 Index96 TGGGTTACGGGCCAAGGTTCTCTT 327 323 4 94702 88727 98100 0.008175 0.22819260293091417 0.965361875637105 0.9044546381243629 34.88200815494393 +PhiX PhiX AAGGTAGCTACAGACAACCTACCT 1433 1386 47 421623 405869 429900 0.035825 1.0 0.9807466852756455 0.9441009537101651 34.565422191207254 +Undetermined Undetermined NNNNNNNNNNNNNNNNNNNNNNNN 880 0 0 248629 228186 264000 0.022 0.6140963014654571 0.9417765151515152 0.8643409090909091 32.029592803030305 diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L001_I1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_I1_001.fastq.gz new file mode 100644 index 00000000..d3d7e8ab Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_I1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L001_I2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_I2_001.fastq.gz new file mode 100644 index 00000000..6c4a0688 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_I2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L001_R1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_R1_001.fastq.gz new file mode 100644 index 00000000..63ccec3f Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_R1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L001_R2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_R2_001.fastq.gz new file mode 100644 index 00000000..b2641039 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L001_R2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L002_I1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_I1_001.fastq.gz new file mode 100644 index 00000000..ae9efffd Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_I1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L002_I2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_I2_001.fastq.gz new file mode 100644 index 00000000..375cb160 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_I2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L002_R1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_R1_001.fastq.gz new file mode 100644 index 00000000..cdbd4cdd Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_R1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L002_R2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_R2_001.fastq.gz new file mode 100644 index 00000000..367cde43 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L002_R2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L003_I1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_I1_001.fastq.gz new file mode 100644 index 00000000..25260a79 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_I1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L003_I2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_I2_001.fastq.gz new file mode 100644 index 00000000..941deb06 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_I2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L003_R1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_R1_001.fastq.gz new file mode 100644 index 00000000..80d6d8b1 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_R1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L003_R2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_R2_001.fastq.gz new file mode 100644 index 00000000..6d7c77fc Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L003_R2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L004_I1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_I1_001.fastq.gz new file mode 100644 index 00000000..e6b68227 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_I1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L004_I2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_I2_001.fastq.gz new file mode 100644 index 00000000..8be1acd5 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_I2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L004_R1_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_R1_001.fastq.gz new file mode 100644 index 00000000..5c9e4b13 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_R1_001.fastq.gz differ diff --git a/src/sgdemux/test_data/fastq/Undetermined_S0_L004_R2_001.fastq.gz b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_R2_001.fastq.gz new file mode 100644 index 00000000..9bda11c2 Binary files /dev/null and b/src/sgdemux/test_data/fastq/Undetermined_S0_L004_R2_001.fastq.gz differ diff --git a/src/sgdemux/test_data/samplesheet.csv b/src/sgdemux/test_data/samplesheet.csv new file mode 100644 index 00000000..9cbf1cdc --- /dev/null +++ b/src/sgdemux/test_data/samplesheet.csv @@ -0,0 +1,409 @@ +[Header],,,,,,,,,,, +Date,2023-01-30,,,,,,,,,, +Run Name,G15_020,,,,,,,,,, +User Name,,,,#Optional,,,,,,, +User Email,,,,#Optional,,,,,,, +Workflow,PE with Dual Indices,,,#Optional,,,,,,, +Assay,Functional Testing Pool of 96,,,#Optional,,,,,,, +Run Notes,12x150x12x150,,,#Optional,,,,,,, +,,,,,,,,,,, +[Settings],,,,,,,,,,, +Read1,150,,,#Enter the number of basepairs per read ,,,,,,, +Read2,150,,,,,,,,,, +,,,,,,,,,,, +Index1,12,,,#Enter the number of basepairs per index from 8-20 bp if using indices,,,,,,, +Index2,12,,,,,,,,,, +,,,,,,,,,,, +Custom Primer Read1 (Y),,,,"#If a custom primer is needed for the run mark Y. Otherwise, leave blank.",,,,,,, +Custom Primer Read2 (Y),,,,,,,,,,, +,,,,,,,,,,, +[Data],,#Required for demux,,#Required for demux,"#Indicate 1,2,3 or 4",#Optional,#Optional,#Optional,#Optional,#Optional,#Optional +Sample_ID,Index1_Name,Index1_Sequence,Index2_Name,Index2_Sequence,Lane,Lane_Name,Project,Loading_Concentration,Application,Notes,Reference +Index1,S1_Index1,TAAGACCCTACT,S2_Index1,GGGACATATTGA,1,,,,,, +Index1,S1_Index1,TAAGACCCTACT,S2_Index1,GGGACATATTGA,2,,,,,, +Index1,S1_Index1,TAAGACCCTACT,S2_Index1,GGGACATATTGA,3,,,,,, +Index1,S1_Index1,TAAGACCCTACT,S2_Index1,GGGACATATTGA,4,,,,,, +Index2,S1_Index2,CGAAGTACATCC,S2_Index2,TAGGACGTAACG,1,,,,,, +Index2,S1_Index2,CGAAGTACATCC,S2_Index2,TAGGACGTAACG,2,,,,,, +Index2,S1_Index2,CGAAGTACATCC,S2_Index2,TAGGACGTAACG,3,,,,,, +Index2,S1_Index2,CGAAGTACATCC,S2_Index2,TAGGACGTAACG,4,,,,,, +Index3,S1_Index3,TAGCCTTCCAAA,S2_Index3,AGTATGGCAAGA,1,,,,,, +Index3,S1_Index3,TAGCCTTCCAAA,S2_Index3,AGTATGGCAAGA,2,,,,,, +Index3,S1_Index3,TAGCCTTCCAAA,S2_Index3,AGTATGGCAAGA,3,,,,,, +Index3,S1_Index3,TAGCCTTCCAAA,S2_Index3,AGTATGGCAAGA,4,,,,,, +Index4,S1_Index4,GCCTTTCAAGTC,S2_Index4,TAGAGTCGTCGT,1,,,,,, +Index4,S1_Index4,GCCTTTCAAGTC,S2_Index4,TAGAGTCGTCGT,2,,,,,, +Index4,S1_Index4,GCCTTTCAAGTC,S2_Index4,TAGAGTCGTCGT,3,,,,,, +Index4,S1_Index4,GCCTTTCAAGTC,S2_Index4,TAGAGTCGTCGT,4,,,,,, +Index5,S1_Index5,CAACGGTTCCGG,S2_Index5,ACGTTTCGCTCG,1,,,,,, +Index5,S1_Index5,CAACGGTTCCGG,S2_Index5,ACGTTTCGCTCG,2,,,,,, +Index5,S1_Index5,CAACGGTTCCGG,S2_Index5,ACGTTTCGCTCG,3,,,,,, +Index5,S1_Index5,CAACGGTTCCGG,S2_Index5,ACGTTTCGCTCG,4,,,,,, +Index6,S1_Index6,GTTGCATGGCCC,S2_Index6,TAGGGAACGATG,1,,,,,, +Index6,S1_Index6,GTTGCATGGCCC,S2_Index6,TAGGGAACGATG,2,,,,,, +Index6,S1_Index6,GTTGCATGGCCC,S2_Index6,TAGGGAACGATG,3,,,,,, +Index6,S1_Index6,GTTGCATGGCCC,S2_Index6,TAGGGAACGATG,4,,,,,, +Index7,S1_Index7,ATCGTTGCTATC,S2_Index7,ATGACTCCGCAT,1,,,,,, +Index7,S1_Index7,ATCGTTGCTATC,S2_Index7,ATGACTCCGCAT,2,,,,,, +Index7,S1_Index7,ATCGTTGCTATC,S2_Index7,ATGACTCCGCAT,3,,,,,, +Index7,S1_Index7,ATCGTTGCTATC,S2_Index7,ATGACTCCGCAT,4,,,,,, +Index8,S1_Index8,CCTCGAATTCAT,S2_Index8,GGTTGCTACCGG,1,,,,,, +Index8,S1_Index8,CCTCGAATTCAT,S2_Index8,GGTTGCTACCGG,2,,,,,, +Index8,S1_Index8,CCTCGAATTCAT,S2_Index8,GGTTGCTACCGG,3,,,,,, +Index8,S1_Index8,CCTCGAATTCAT,S2_Index8,GGTTGCTACCGG,4,,,,,, +Index9,S1_Index9,TGAACGTCCGCC,S2_Index9,TCCTCGATTGAA,1,,,,,, +Index9,S1_Index9,TGAACGTCCGCC,S2_Index9,TCCTCGATTGAA,2,,,,,, +Index9,S1_Index9,TGAACGTCCGCC,S2_Index9,TCCTCGATTGAA,3,,,,,, +Index9,S1_Index9,TGAACGTCCGCC,S2_Index9,TCCTCGATTGAA,4,,,,,, +Index10,S1_Index10,CATCTAGCAAGC,S2_Index10,ATGTAGCGTCTC,1,,,,,, +Index10,S1_Index10,CATCTAGCAAGC,S2_Index10,ATGTAGCGTCTC,2,,,,,, +Index10,S1_Index10,CATCTAGCAAGC,S2_Index10,ATGTAGCGTCTC,3,,,,,, +Index10,S1_Index10,CATCTAGCAAGC,S2_Index10,ATGTAGCGTCTC,4,,,,,, +Index11,S1_Index11,TATCGAGGCAAC,S2_Index11,CATCATGCGTAC,1,,,,,, +Index11,S1_Index11,TATCGAGGCAAC,S2_Index11,CATCATGCGTAC,2,,,,,, +Index11,S1_Index11,TATCGAGGCAAC,S2_Index11,CATCATGCGTAC,3,,,,,, +Index11,S1_Index11,TATCGAGGCAAC,S2_Index11,CATCATGCGTAC,4,,,,,, +Index12,S1_Index12,GAGACGTAGCAA,S2_Index12,ACCTTGACCGGG,1,,,,,, +Index12,S1_Index12,GAGACGTAGCAA,S2_Index12,ACCTTGACCGGG,2,,,,,, +Index12,S1_Index12,GAGACGTAGCAA,S2_Index12,ACCTTGACCGGG,3,,,,,, +Index12,S1_Index12,GAGACGTAGCAA,S2_Index12,ACCTTGACCGGG,4,,,,,, +Index13,S1_Index13,ATCATGCGCCCG,S2_Index13,TTGACGAGATCT,1,,,,,, +Index13,S1_Index13,ATCATGCGCCCG,S2_Index13,TTGACGAGATCT,2,,,,,, +Index13,S1_Index13,ATCATGCGCCCG,S2_Index13,TTGACGAGATCT,3,,,,,, +Index13,S1_Index13,ATCATGCGCCCG,S2_Index13,TTGACGAGATCT,4,,,,,, +Index14,S1_Index14,AGGAGCTAGGGA,S2_Index14,GGGCTAATGTCA,1,,,,,, +Index14,S1_Index14,AGGAGCTAGGGA,S2_Index14,GGGCTAATGTCA,2,,,,,, +Index14,S1_Index14,AGGAGCTAGGGA,S2_Index14,GGGCTAATGTCA,3,,,,,, +Index14,S1_Index14,AGGAGCTAGGGA,S2_Index14,GGGCTAATGTCA,4,,,,,, +Index15,S1_Index15,ATCGACCATGCT,S2_Index15,TTAGGAGCGAAC,1,,,,,, +Index15,S1_Index15,ATCGACCATGCT,S2_Index15,TTAGGAGCGAAC,2,,,,,, +Index15,S1_Index15,ATCGACCATGCT,S2_Index15,TTAGGAGCGAAC,3,,,,,, +Index15,S1_Index15,ATCGACCATGCT,S2_Index15,TTAGGAGCGAAC,4,,,,,, +Index16,S1_Index16,TGCGAATCGACA,S2_Index16,GTACATCGAGTA,1,,,,,, +Index16,S1_Index16,TGCGAATCGACA,S2_Index16,GTACATCGAGTA,2,,,,,, +Index16,S1_Index16,TGCGAATCGACA,S2_Index16,GTACATCGAGTA,3,,,,,, +Index16,S1_Index16,TGCGAATCGACA,S2_Index16,GTACATCGAGTA,4,,,,,, +Index17,S1_Index17,ATGTTCCCCTCT,S2_Index17,AGGCTTTGTCAT,1,,,,,, +Index17,S1_Index17,ATGTTCCCCTCT,S2_Index17,AGGCTTTGTCAT,2,,,,,, +Index17,S1_Index17,ATGTTCCCCTCT,S2_Index17,AGGCTTTGTCAT,3,,,,,, +Index17,S1_Index17,ATGTTCCCCTCT,S2_Index17,AGGCTTTGTCAT,4,,,,,, +Index18,S1_Index18,TCGCTCATCTAG,S2_Index18,CGACGATATTTG,1,,,,,, +Index18,S1_Index18,TCGCTCATCTAG,S2_Index18,CGACGATATTTG,2,,,,,, +Index18,S1_Index18,TCGCTCATCTAG,S2_Index18,CGACGATATTTG,3,,,,,, +Index18,S1_Index18,TCGCTCATCTAG,S2_Index18,CGACGATATTTG,4,,,,,, +Index19,S1_Index19,CCTAAGGTAAAC,S2_Index19,AAACTCCGTTGT,1,,,,,, +Index19,S1_Index19,CCTAAGGTAAAC,S2_Index19,AAACTCCGTTGT,2,,,,,, +Index19,S1_Index19,CCTAAGGTAAAC,S2_Index19,AAACTCCGTTGT,3,,,,,, +Index19,S1_Index19,CCTAAGGTAAAC,S2_Index19,AAACTCCGTTGT,4,,,,,, +Index20,S1_Index20,GAATAGCGCTTA,S2_Index20,ACGTACCAAGAC,1,,,,,, +Index20,S1_Index20,GAATAGCGCTTA,S2_Index20,ACGTACCAAGAC,2,,,,,, +Index20,S1_Index20,GAATAGCGCTTA,S2_Index20,ACGTACCAAGAC,3,,,,,, +Index20,S1_Index20,GAATAGCGCTTA,S2_Index20,ACGTACCAAGAC,4,,,,,, +Index21,S1_Index21,CGATGTACATCC,S2_Index21,TCCGATGTCGGC,1,,,,,, +Index21,S1_Index21,CGATGTACATCC,S2_Index21,TCCGATGTCGGC,2,,,,,, +Index21,S1_Index21,CGATGTACATCC,S2_Index21,TCCGATGTCGGC,3,,,,,, +Index21,S1_Index21,CGATGTACATCC,S2_Index21,TCCGATGTCGGC,4,,,,,, +Index22,S1_Index22,CAAGTCGAAACC,S2_Index22,AGGTTACCGCGT,1,,,,,, +Index22,S1_Index22,CAAGTCGAAACC,S2_Index22,AGGTTACCGCGT,2,,,,,, +Index22,S1_Index22,CAAGTCGAAACC,S2_Index22,AGGTTACCGCGT,3,,,,,, +Index22,S1_Index22,CAAGTCGAAACC,S2_Index22,AGGTTACCGCGT,4,,,,,, +Index23,S1_Index23,GTAACGGATAGC,S2_Index23,ATGCCGAAACGT,1,,,,,, +Index23,S1_Index23,GTAACGGATAGC,S2_Index23,ATGCCGAAACGT,2,,,,,, +Index23,S1_Index23,GTAACGGATAGC,S2_Index23,ATGCCGAAACGT,3,,,,,, +Index23,S1_Index23,GTAACGGATAGC,S2_Index23,ATGCCGAAACGT,4,,,,,, +Index24,S1_Index24,GAAGCTTGGTCA,S2_Index24,ACATACGCGGGG,1,,,,,, +Index24,S1_Index24,GAAGCTTGGTCA,S2_Index24,ACATACGCGGGG,2,,,,,, +Index24,S1_Index24,GAAGCTTGGTCA,S2_Index24,ACATACGCGGGG,3,,,,,, +Index24,S1_Index24,GAAGCTTGGTCA,S2_Index24,ACATACGCGGGG,4,,,,,, +Index25,S1_Index25,AACCCGTAACCA,S2_Index25,GGATCTAGGACG,1,,,,,, +Index25,S1_Index25,AACCCGTAACCA,S2_Index25,GGATCTAGGACG,2,,,,,, +Index25,S1_Index25,AACCCGTAACCA,S2_Index25,GGATCTAGGACG,3,,,,,, +Index25,S1_Index25,AACCCGTAACCA,S2_Index25,GGATCTAGGACG,4,,,,,, +Index26,S1_Index26,AATGCTCCCCTA,S2_Index26,TCGACTCTCCGT,1,,,,,, +Index26,S1_Index26,AATGCTCCCCTA,S2_Index26,TCGACTCTCCGT,2,,,,,, +Index26,S1_Index26,AATGCTCCCCTA,S2_Index26,TCGACTCTCCGT,3,,,,,, +Index26,S1_Index26,AATGCTCCCCTA,S2_Index26,TCGACTCTCCGT,4,,,,,, +Index27,S1_Index27,GTATGACGGATG,S2_Index27,TACGCTAGACAA,1,,,,,, +Index27,S1_Index27,GTATGACGGATG,S2_Index27,TACGCTAGACAA,2,,,,,, +Index27,S1_Index27,GTATGACGGATG,S2_Index27,TACGCTAGACAA,3,,,,,, +Index27,S1_Index27,GTATGACGGATG,S2_Index27,TACGCTAGACAA,4,,,,,, +Index28,S1_Index28,GCAAAGCTTGGA,S2_Index28,ATTTCGGGTAAG,1,,,,,, +Index28,S1_Index28,GCAAAGCTTGGA,S2_Index28,ATTTCGGGTAAG,2,,,,,, +Index28,S1_Index28,GCAAAGCTTGGA,S2_Index28,ATTTCGGGTAAG,3,,,,,, +Index28,S1_Index28,GCAAAGCTTGGA,S2_Index28,ATTTCGGGTAAG,4,,,,,, +Index29,S1_Index29,TCTAACCGGCTA,S2_Index29,CTAGCCAACGCC,1,,,,,, +Index29,S1_Index29,TCTAACCGGCTA,S2_Index29,CTAGCCAACGCC,2,,,,,, +Index29,S1_Index29,TCTAACCGGCTA,S2_Index29,CTAGCCAACGCC,3,,,,,, +Index29,S1_Index29,TCTAACCGGCTA,S2_Index29,CTAGCCAACGCC,4,,,,,, +Index30,S1_Index30,ATTGGAGCCCGC,S2_Index30,TGATAGCCGGTT,1,,,,,, +Index30,S1_Index30,ATTGGAGCCCGC,S2_Index30,TGATAGCCGGTT,2,,,,,, +Index30,S1_Index30,ATTGGAGCCCGC,S2_Index30,TGATAGCCGGTT,3,,,,,, +Index30,S1_Index30,ATTGGAGCCCGC,S2_Index30,TGATAGCCGGTT,4,,,,,, +Index31,S1_Index31,TGTCCGATCTAT,S2_Index31,GCATGGTTCCTA,1,,,,,, +Index31,S1_Index31,TGTCCGATCTAT,S2_Index31,GCATGGTTCCTA,2,,,,,, +Index31,S1_Index31,TGTCCGATCTAT,S2_Index31,GCATGGTTCCTA,3,,,,,, +Index31,S1_Index31,TGTCCGATCTAT,S2_Index31,GCATGGTTCCTA,4,,,,,, +Index32,S1_Index32,ACATCGCATGTT,S2_Index32,GACGGTAATGAG,1,,,,,, +Index32,S1_Index32,ACATCGCATGTT,S2_Index32,GACGGTAATGAG,2,,,,,, +Index32,S1_Index32,ACATCGCATGTT,S2_Index32,GACGGTAATGAG,3,,,,,, +Index32,S1_Index32,ACATCGCATGTT,S2_Index32,GACGGTAATGAG,4,,,,,, +Index33,S1_Index33,ACTTCCGACAAT,S2_Index33,CGAAGTCATCAA,1,,,,,, +Index33,S1_Index33,ACTTCCGACAAT,S2_Index33,CGAAGTCATCAA,2,,,,,, +Index33,S1_Index33,ACTTCCGACAAT,S2_Index33,CGAAGTCATCAA,3,,,,,, +Index33,S1_Index33,ACTTCCGACAAT,S2_Index33,CGAAGTCATCAA,4,,,,,, +Index34,S1_Index34,GCTCCTATGCCT,S2_Index34,TGGACTACAAAC,1,,,,,, +Index34,S1_Index34,GCTCCTATGCCT,S2_Index34,TGGACTACAAAC,2,,,,,, +Index34,S1_Index34,GCTCCTATGCCT,S2_Index34,TGGACTACAAAC,3,,,,,, +Index34,S1_Index34,GCTCCTATGCCT,S2_Index34,TGGACTACAAAC,4,,,,,, +Index35,S1_Index35,CATAACGCGAAT,S2_Index35,ATACGCGACATT,1,,,,,, +Index35,S1_Index35,CATAACGCGAAT,S2_Index35,ATACGCGACATT,2,,,,,, +Index35,S1_Index35,CATAACGCGAAT,S2_Index35,ATACGCGACATT,3,,,,,, +Index35,S1_Index35,CATAACGCGAAT,S2_Index35,ATACGCGACATT,4,,,,,, +Index36,S1_Index36,CGGACAATGATT,S2_Index36,TCCCTATGATAC,1,,,,,, +Index36,S1_Index36,CGGACAATGATT,S2_Index36,TCCCTATGATAC,2,,,,,, +Index36,S1_Index36,CGGACAATGATT,S2_Index36,TCCCTATGATAC,3,,,,,, +Index36,S1_Index36,CGGACAATGATT,S2_Index36,TCCCTATGATAC,4,,,,,, +Index37,S1_Index37,GCTACTTGGAAA,S2_Index37,GAGCTCTACGCA,1,,,,,, +Index37,S1_Index37,GCTACTTGGAAA,S2_Index37,GAGCTCTACGCA,2,,,,,, +Index37,S1_Index37,GCTACTTGGAAA,S2_Index37,GAGCTCTACGCA,3,,,,,, +Index37,S1_Index37,GCTACTTGGAAA,S2_Index37,GAGCTCTACGCA,4,,,,,, +Index38,S1_Index38,TAAGCGAGTAGT,S2_Index38,CTAGAGGTTACC,1,,,,,, +Index38,S1_Index38,TAAGCGAGTAGT,S2_Index38,CTAGAGGTTACC,2,,,,,, +Index38,S1_Index38,TAAGCGAGTAGT,S2_Index38,CTAGAGGTTACC,3,,,,,, +Index38,S1_Index38,TAAGCGAGTAGT,S2_Index38,CTAGAGGTTACC,4,,,,,, +Index39,S1_Index39,TGCTTGACTCCG,S2_Index39,AACCCTTGTTCG,1,,,,,, +Index39,S1_Index39,TGCTTGACTCCG,S2_Index39,AACCCTTGTTCG,2,,,,,, +Index39,S1_Index39,TGCTTGACTCCG,S2_Index39,AACCCTTGTTCG,3,,,,,, +Index39,S1_Index39,TGCTTGACTCCG,S2_Index39,AACCCTTGTTCG,4,,,,,, +Index40,S1_Index40,GACATCGTCGGG,S2_Index40,GTCGTAAGGGGT,1,,,,,, +Index40,S1_Index40,GACATCGTCGGG,S2_Index40,GTCGTAAGGGGT,2,,,,,, +Index40,S1_Index40,GACATCGTCGGG,S2_Index40,GTCGTAAGGGGT,3,,,,,, +Index40,S1_Index40,GACATCGTCGGG,S2_Index40,GTCGTAAGGGGT,4,,,,,, +Index41,S1_Index41,AGCTATGGGACG,S2_Index41,TATACCCGGCCC,1,,,,,, +Index41,S1_Index41,AGCTATGGGACG,S2_Index41,TATACCCGGCCC,2,,,,,, +Index41,S1_Index41,AGCTATGGGACG,S2_Index41,TATACCCGGCCC,3,,,,,, +Index41,S1_Index41,AGCTATGGGACG,S2_Index41,TATACCCGGCCC,4,,,,,, +Index42,S1_Index42,ACGACTAGGCTC,S2_Index42,TGCTAAGCGAGC,1,,,,,, +Index42,S1_Index42,ACGACTAGGCTC,S2_Index42,TGCTAAGCGAGC,2,,,,,, +Index42,S1_Index42,ACGACTAGGCTC,S2_Index42,TGCTAAGCGAGC,3,,,,,, +Index42,S1_Index42,ACGACTAGGCTC,S2_Index42,TGCTAAGCGAGC,4,,,,,, +Index43,S1_Index43,TACCGTACGATA,S2_Index43,CGTCCTAAAACT,1,,,,,, +Index43,S1_Index43,TACCGTACGATA,S2_Index43,CGTCCTAAAACT,2,,,,,, +Index43,S1_Index43,TACCGTACGATA,S2_Index43,CGTCCTAAAACT,3,,,,,, +Index43,S1_Index43,TACCGTACGATA,S2_Index43,CGTCCTAAAACT,4,,,,,, +Index44,S1_Index44,TAGAAGGCGCGT,S2_Index44,ATATCGGGTAAG,1,,,,,, +Index44,S1_Index44,TAGAAGGCGCGT,S2_Index44,ATATCGGGTAAG,2,,,,,, +Index44,S1_Index44,TAGAAGGCGCGT,S2_Index44,ATATCGGGTAAG,3,,,,,, +Index44,S1_Index44,TAGAAGGCGCGT,S2_Index44,ATATCGGGTAAG,4,,,,,, +Index45,S1_Index45,CGTCATCAAGGA,S2_Index45,GAGACGTTCTTA,1,,,,,, +Index45,S1_Index45,CGTCATCAAGGA,S2_Index45,GAGACGTTCTTA,2,,,,,, +Index45,S1_Index45,CGTCATCAAGGA,S2_Index45,GAGACGTTCTTA,3,,,,,, +Index45,S1_Index45,CGTCATCAAGGA,S2_Index45,GAGACGTTCTTA,4,,,,,, +Index46,S1_Index46,CCGTAAGATAGA,S2_Index46,GCTTAACGATCA,1,,,,,, +Index46,S1_Index46,CCGTAAGATAGA,S2_Index46,GCTTAACGATCA,2,,,,,, +Index46,S1_Index46,CCGTAAGATAGA,S2_Index46,GCTTAACGATCA,3,,,,,, +Index46,S1_Index46,CCGTAAGATAGA,S2_Index46,GCTTAACGATCA,4,,,,,, +Index47,S1_Index47,TAGACTCGTTTC,S2_Index47,CGCTAGTACTAT,1,,,,,, +Index47,S1_Index47,TAGACTCGTTTC,S2_Index47,CGCTAGTACTAT,2,,,,,, +Index47,S1_Index47,TAGACTCGTTTC,S2_Index47,CGCTAGTACTAT,3,,,,,, +Index47,S1_Index47,TAGACTCGTTTC,S2_Index47,CGCTAGTACTAT,4,,,,,, +Index48,S1_Index48,TATCGGCTTGGT,S2_Index48,ACGGGTTATTAG,1,,,,,, +Index48,S1_Index48,TATCGGCTTGGT,S2_Index48,ACGGGTTATTAG,2,,,,,, +Index48,S1_Index48,TATCGGCTTGGT,S2_Index48,ACGGGTTATTAG,3,,,,,, +Index48,S1_Index48,TATCGGCTTGGT,S2_Index48,ACGGGTTATTAG,4,,,,,, +Index49,S1_Index49,TCAAGAGCGGAG,S2_Index49,CGGACTTTTGTA,1,,,,,, +Index49,S1_Index49,TCAAGAGCGGAG,S2_Index49,CGGACTTTTGTA,2,,,,,, +Index49,S1_Index49,TCAAGAGCGGAG,S2_Index49,CGGACTTTTGTA,3,,,,,, +Index49,S1_Index49,TCAAGAGCGGAG,S2_Index49,CGGACTTTTGTA,4,,,,,, +Index50,S1_Index50,TTACCCGTAGAA,S2_Index50,TCCATTGCTTCT,1,,,,,, +Index50,S1_Index50,TTACCCGTAGAA,S2_Index50,TCCATTGCTTCT,2,,,,,, +Index50,S1_Index50,TTACCCGTAGAA,S2_Index50,TCCATTGCTTCT,3,,,,,, +Index50,S1_Index50,TTACCCGTAGAA,S2_Index50,TCCATTGCTTCT,4,,,,,, +Index51,S1_Index51,GCTCTCAATCGG,S2_Index51,GTCTCATGGCGG,1,,,,,, +Index51,S1_Index51,GCTCTCAATCGG,S2_Index51,GTCTCATGGCGG,2,,,,,, +Index51,S1_Index51,GCTCTCAATCGG,S2_Index51,GTCTCATGGCGG,3,,,,,, +Index51,S1_Index51,GCTCTCAATCGG,S2_Index51,GTCTCATGGCGG,4,,,,,, +Index52,S1_Index52,GTCTACGTTTAC,S2_Index52,TGACTTGGAGAA,1,,,,,, +Index52,S1_Index52,GTCTACGTTTAC,S2_Index52,TGACTTGGAGAA,2,,,,,, +Index52,S1_Index52,GTCTACGTTTAC,S2_Index52,TGACTTGGAGAA,3,,,,,, +Index52,S1_Index52,GTCTACGTTTAC,S2_Index52,TGACTTGGAGAA,4,,,,,, +Index53,S1_Index53,TCCGTATGAGAC,S2_Index53,ACCGTATCCGAT,1,,,,,, +Index53,S1_Index53,TCCGTATGAGAC,S2_Index53,ACCGTATCCGAT,2,,,,,, +Index53,S1_Index53,TCCGTATGAGAC,S2_Index53,ACCGTATCCGAT,3,,,,,, +Index53,S1_Index53,TCCGTATGAGAC,S2_Index53,ACCGTATCCGAT,4,,,,,, +Index54,S1_Index54,CGCCAATACGTC,S2_Index54,CTATGGGACGGT,1,,,,,, +Index54,S1_Index54,CGCCAATACGTC,S2_Index54,CTATGGGACGGT,2,,,,,, +Index54,S1_Index54,CGCCAATACGTC,S2_Index54,CTATGGGACGGT,3,,,,,, +Index54,S1_Index54,CGCCAATACGTC,S2_Index54,CTATGGGACGGT,4,,,,,, +Index55,S1_Index55,GATGGTCTAGCA,S2_Index55,TAGTTCCCATTC,1,,,,,, +Index55,S1_Index55,GATGGTCTAGCA,S2_Index55,TAGTTCCCATTC,2,,,,,, +Index55,S1_Index55,GATGGTCTAGCA,S2_Index55,TAGTTCCCATTC,3,,,,,, +Index55,S1_Index55,GATGGTCTAGCA,S2_Index55,TAGTTCCCATTC,4,,,,,, +Index56,S1_Index56,CTCGCTTAAGGC,S2_Index56,CTCCAAGACATC,1,,,,,, +Index56,S1_Index56,CTCGCTTAAGGC,S2_Index56,CTCCAAGACATC,2,,,,,, +Index56,S1_Index56,CTCGCTTAAGGC,S2_Index56,CTCCAAGACATC,3,,,,,, +Index56,S1_Index56,CTCGCTTAAGGC,S2_Index56,CTCCAAGACATC,4,,,,,, +Index57,S1_Index57,GGCAACATGGGT,S2_Index57,ATTACCGCGGTA,1,,,,,, +Index57,S1_Index57,GGCAACATGGGT,S2_Index57,ATTACCGCGGTA,2,,,,,, +Index57,S1_Index57,GGCAACATGGGT,S2_Index57,ATTACCGCGGTA,3,,,,,, +Index57,S1_Index57,GGCAACATGGGT,S2_Index57,ATTACCGCGGTA,4,,,,,, +Index58,S1_Index58,AGACTCTCATCA,S2_Index58,CTAAGCTCCTAA,1,,,,,, +Index58,S1_Index58,AGACTCTCATCA,S2_Index58,CTAAGCTCCTAA,2,,,,,, +Index58,S1_Index58,AGACTCTCATCA,S2_Index58,CTAAGCTCCTAA,3,,,,,, +Index58,S1_Index58,AGACTCTCATCA,S2_Index58,CTAAGCTCCTAA,4,,,,,, +Index59,S1_Index59,TGACAAGGTCAA,S2_Index59,TGAACGTCCTTC,1,,,,,, +Index59,S1_Index59,TGACAAGGTCAA,S2_Index59,TGAACGTCCTTC,2,,,,,, +Index59,S1_Index59,TGACAAGGTCAA,S2_Index59,TGAACGTCCTTC,3,,,,,, +Index59,S1_Index59,TGACAAGGTCAA,S2_Index59,TGAACGTCCTTC,4,,,,,, +Index60,S1_Index60,CGGTATGTCATC,S2_Index60,CCATGCAATCTA,1,,,,,, +Index60,S1_Index60,CGGTATGTCATC,S2_Index60,CCATGCAATCTA,2,,,,,, +Index60,S1_Index60,CGGTATGTCATC,S2_Index60,CCATGCAATCTA,3,,,,,, +Index60,S1_Index60,CGGTATGTCATC,S2_Index60,CCATGCAATCTA,4,,,,,, +Index61,S1_Index61,GACTCATGAATG,S2_Index61,TAGCGTTCATTG,1,,,,,, +Index61,S1_Index61,GACTCATGAATG,S2_Index61,TAGCGTTCATTG,2,,,,,, +Index61,S1_Index61,GACTCATGAATG,S2_Index61,TAGCGTTCATTG,3,,,,,, +Index61,S1_Index61,GACTCATGAATG,S2_Index61,TAGCGTTCATTG,4,,,,,, +Index62,S1_Index62,CGTAGACATTGA,S2_Index62,AGCATTCGAGCC,1,,,,,, +Index62,S1_Index62,CGTAGACATTGA,S2_Index62,AGCATTCGAGCC,2,,,,,, +Index62,S1_Index62,CGTAGACATTGA,S2_Index62,AGCATTCGAGCC,3,,,,,, +Index62,S1_Index62,CGTAGACATTGA,S2_Index62,AGCATTCGAGCC,4,,,,,, +Index63,S1_Index63,CATTCGCTCCCT,S2_Index63,AACCTCGAACAT,1,,,,,, +Index63,S1_Index63,CATTCGCTCCCT,S2_Index63,AACCTCGAACAT,2,,,,,, +Index63,S1_Index63,CATTCGCTCCCT,S2_Index63,AACCTCGAACAT,3,,,,,, +Index63,S1_Index63,CATTCGCTCCCT,S2_Index63,AACCTCGAACAT,4,,,,,, +Index64,S1_Index64,ACAATCGGGGAC,S2_Index64,CAAATCGCGGAA,1,,,,,, +Index64,S1_Index64,ACAATCGGGGAC,S2_Index64,CAAATCGCGGAA,2,,,,,, +Index64,S1_Index64,ACAATCGGGGAC,S2_Index64,CAAATCGCGGAA,3,,,,,, +Index64,S1_Index64,ACAATCGGGGAC,S2_Index64,CAAATCGCGGAA,4,,,,,, +Index65,S1_Index65,GGACTTAGAGCG,S2_Index65,GTCAAGAGGTTA,1,,,,,, +Index65,S1_Index65,GGACTTAGAGCG,S2_Index65,GTCAAGAGGTTA,2,,,,,, +Index65,S1_Index65,GGACTTAGAGCG,S2_Index65,GTCAAGAGGTTA,3,,,,,, +Index65,S1_Index65,GGACTTAGAGCG,S2_Index65,GTCAAGAGGTTA,4,,,,,, +Index66,S1_Index66,GACCGATTCTCG,S2_Index66,TTAAGGCCGGGA,1,,,,,, +Index66,S1_Index66,GACCGATTCTCG,S2_Index66,TTAAGGCCGGGA,2,,,,,, +Index66,S1_Index66,GACCGATTCTCG,S2_Index66,TTAAGGCCGGGA,3,,,,,, +Index66,S1_Index66,GACCGATTCTCG,S2_Index66,TTAAGGCCGGGA,4,,,,,, +Index67,S1_Index67,TGGAAACCCGAG,S2_Index67,TCGAAAGGGAAA,1,,,,,, +Index67,S1_Index67,TGGAAACCCGAG,S2_Index67,TCGAAAGGGAAA,2,,,,,, +Index67,S1_Index67,TGGAAACCCGAG,S2_Index67,TCGAAAGGGAAA,3,,,,,, +Index67,S1_Index67,TGGAAACCCGAG,S2_Index67,TCGAAAGGGAAA,4,,,,,, +Index68,S1_Index68,GGCCTAATGGAA,S2_Index68,GGAGTCAAATAG,1,,,,,, +Index68,S1_Index68,GGCCTAATGGAA,S2_Index68,GGAGTCAAATAG,2,,,,,, +Index68,S1_Index68,GGCCTAATGGAA,S2_Index68,GGAGTCAAATAG,3,,,,,, +Index68,S1_Index68,GGCCTAATGGAA,S2_Index68,GGAGTCAAATAG,4,,,,,, +Index69,S1_Index69,TTGTACGCGTAC,S2_Index69,CCGTTCTATACA,1,,,,,, +Index69,S1_Index69,TTGTACGCGTAC,S2_Index69,CCGTTCTATACA,2,,,,,, +Index69,S1_Index69,TTGTACGCGTAC,S2_Index69,CCGTTCTATACA,3,,,,,, +Index69,S1_Index69,TTGTACGCGTAC,S2_Index69,CCGTTCTATACA,4,,,,,, +Index70,S1_Index70,ATGTCGAGTTGC,S2_Index70,TGCGAATGCGAA,1,,,,,, +Index70,S1_Index70,ATGTCGAGTTGC,S2_Index70,TGCGAATGCGAA,2,,,,,, +Index70,S1_Index70,ATGTCGAGTTGC,S2_Index70,TGCGAATGCGAA,3,,,,,, +Index70,S1_Index70,ATGTCGAGTTGC,S2_Index70,TGCGAATGCGAA,4,,,,,, +Index71,S1_Index71,CTTCGTACCTCC,S2_Index71,GCGATCATGACT,1,,,,,, +Index71,S1_Index71,CTTCGTACCTCC,S2_Index71,GCGATCATGACT,2,,,,,, +Index71,S1_Index71,CTTCGTACCTCC,S2_Index71,GCGATCATGACT,3,,,,,, +Index71,S1_Index71,CTTCGTACCTCC,S2_Index71,GCGATCATGACT,4,,,,,, +Index72,S1_Index72,TTAGGTCCGAGA,S2_Index72,CCGAAATCCAAC,1,,,,,, +Index72,S1_Index72,TTAGGTCCGAGA,S2_Index72,CCGAAATCCAAC,2,,,,,, +Index72,S1_Index72,TTAGGTCCGAGA,S2_Index72,CCGAAATCCAAC,3,,,,,, +Index72,S1_Index72,TTAGGTCCGAGA,S2_Index72,CCGAAATCCAAC,4,,,,,, +Index73,S1_Index73,CTAGCTCTTCGT,S2_Index73,TCGGAGTTTTAC,1,,,,,, +Index73,S1_Index73,CTAGCTCTTCGT,S2_Index73,TCGGAGTTTTAC,2,,,,,, +Index73,S1_Index73,CTAGCTCTTCGT,S2_Index73,TCGGAGTTTTAC,3,,,,,, +Index73,S1_Index73,CTAGCTCTTCGT,S2_Index73,TCGGAGTTTTAC,4,,,,,, +Index74,S1_Index74,CTTGTCCAACTT,S2_Index74,CTATCCGTCCCG,1,,,,,, +Index74,S1_Index74,CTTGTCCAACTT,S2_Index74,CTATCCGTCCCG,2,,,,,, +Index74,S1_Index74,CTTGTCCAACTT,S2_Index74,CTATCCGTCCCG,3,,,,,, +Index74,S1_Index74,CTTGTCCAACTT,S2_Index74,CTATCCGTCCCG,4,,,,,, +Index75,S1_Index75,CTTAGCGACCCA,S2_Index75,TAACGCGTACCC,1,,,,,, +Index75,S1_Index75,CTTAGCGACCCA,S2_Index75,TAACGCGTACCC,2,,,,,, +Index75,S1_Index75,CTTAGCGACCCA,S2_Index75,TAACGCGTACCC,3,,,,,, +Index75,S1_Index75,CTTAGCGACCCA,S2_Index75,TAACGCGTACCC,4,,,,,, +Index76,S1_Index76,CGTAGGTTAACA,S2_Index76,CTCGTACTTAGC,1,,,,,, +Index76,S1_Index76,CGTAGGTTAACA,S2_Index76,CTCGTACTTAGC,2,,,,,, +Index76,S1_Index76,CGTAGGTTAACA,S2_Index76,CTCGTACTTAGC,3,,,,,, +Index76,S1_Index76,CGTAGGTTAACA,S2_Index76,CTCGTACTTAGC,4,,,,,, +Index77,S1_Index77,AGCATTCCATGT,S2_Index77,GACTCGAAATGA,1,,,,,, +Index77,S1_Index77,AGCATTCCATGT,S2_Index77,GACTCGAAATGA,2,,,,,, +Index77,S1_Index77,AGCATTCCATGT,S2_Index77,GACTCGAAATGA,3,,,,,, +Index77,S1_Index77,AGCATTCCATGT,S2_Index77,GACTCGAAATGA,4,,,,,, +Index78,S1_Index78,TCGTTACCAACG,S2_Index78,TTAACCGGCCGA,1,,,,,, +Index78,S1_Index78,TCGTTACCAACG,S2_Index78,TTAACCGGCCGA,2,,,,,, +Index78,S1_Index78,TCGTTACCAACG,S2_Index78,TTAACCGGCCGA,3,,,,,, +Index78,S1_Index78,TCGTTACCAACG,S2_Index78,TTAACCGGCCGA,4,,,,,, +Index79,S1_Index79,TTGCTAGGACAT,S2_Index79,TTCCTAGCAACC,1,,,,,, +Index79,S1_Index79,TTGCTAGGACAT,S2_Index79,TTCCTAGCAACC,2,,,,,, +Index79,S1_Index79,TTGCTAGGACAT,S2_Index79,TTCCTAGCAACC,3,,,,,, +Index79,S1_Index79,TTGCTAGGACAT,S2_Index79,TTCCTAGCAACC,4,,,,,, +Index80,S1_Index80,CGAGACTTCTAC,S2_Index80,GAATAGCGTCCC,1,,,,,, +Index80,S1_Index80,CGAGACTTCTAC,S2_Index80,GAATAGCGTCCC,2,,,,,, +Index80,S1_Index80,CGAGACTTCTAC,S2_Index80,GAATAGCGTCCC,3,,,,,, +Index80,S1_Index80,CGAGACTTCTAC,S2_Index80,GAATAGCGTCCC,4,,,,,, +Index81,S1_Index81,GGTCTATGTTTG,S2_Index81,AATGACGGATGT,1,,,,,, +Index81,S1_Index81,GGTCTATGTTTG,S2_Index81,AATGACGGATGT,2,,,,,, +Index81,S1_Index81,GGTCTATGTTTG,S2_Index81,AATGACGGATGT,3,,,,,, +Index81,S1_Index81,GGTCTATGTTTG,S2_Index81,AATGACGGATGT,4,,,,,, +Index82,S1_Index82,GATGCCATAGTA,S2_Index82,AGTAGCTCGTCC,1,,,,,, +Index82,S1_Index82,GATGCCATAGTA,S2_Index82,AGTAGCTCGTCC,2,,,,,, +Index82,S1_Index82,GATGCCATAGTA,S2_Index82,AGTAGCTCGTCC,3,,,,,, +Index82,S1_Index82,GATGCCATAGTA,S2_Index82,AGTAGCTCGTCC,4,,,,,, +Index83,S1_Index83,GTACGAGTTCCT,S2_Index83,TGGCCATTCTCC,1,,,,,, +Index83,S1_Index83,GTACGAGTTCCT,S2_Index83,TGGCCATTCTCC,2,,,,,, +Index83,S1_Index83,GTACGAGTTCCT,S2_Index83,TGGCCATTCTCC,3,,,,,, +Index83,S1_Index83,GTACGAGTTCCT,S2_Index83,TGGCCATTCTCC,4,,,,,, +Index84,S1_Index84,TTCCATCGGTAG,S2_Index84,ACGCCATAACCG,1,,,,,, +Index84,S1_Index84,TTCCATCGGTAG,S2_Index84,ACGCCATAACCG,2,,,,,, +Index84,S1_Index84,TTCCATCGGTAG,S2_Index84,ACGCCATAACCG,3,,,,,, +Index84,S1_Index84,TTCCATCGGTAG,S2_Index84,ACGCCATAACCG,4,,,,,, +Index85,S1_Index85,ACGCTATCATCT,S2_Index85,CGTTAACCCGCT,1,,,,,, +Index85,S1_Index85,ACGCTATCATCT,S2_Index85,CGTTAACCCGCT,2,,,,,, +Index85,S1_Index85,ACGCTATCATCT,S2_Index85,CGTTAACCCGCT,3,,,,,, +Index85,S1_Index85,ACGCTATCATCT,S2_Index85,CGTTAACCCGCT,4,,,,,, +Index86,S1_Index86,GTCCAAGAGTTC,S2_Index86,TTGAGCCTTGCT,1,,,,,, +Index86,S1_Index86,GTCCAAGAGTTC,S2_Index86,TTGAGCCTTGCT,2,,,,,, +Index86,S1_Index86,GTCCAAGAGTTC,S2_Index86,TTGAGCCTTGCT,3,,,,,, +Index86,S1_Index86,GTCCAAGAGTTC,S2_Index86,TTGAGCCTTGCT,4,,,,,, +Index87,S1_Index87,CGAATGGTAAGA,S2_Index87,ACCCGTAACAAG,1,,,,,, +Index87,S1_Index87,CGAATGGTAAGA,S2_Index87,ACCCGTAACAAG,2,,,,,, +Index87,S1_Index87,CGAATGGTAAGA,S2_Index87,ACCCGTAACAAG,3,,,,,, +Index87,S1_Index87,CGAATGGTAAGA,S2_Index87,ACCCGTAACAAG,4,,,,,, +Index88,S1_Index88,AGGTTTCGGTAT,S2_Index88,AAAGGTCGCCCC,1,,,,,, +Index88,S1_Index88,AGGTTTCGGTAT,S2_Index88,AAAGGTCGCCCC,2,,,,,, +Index88,S1_Index88,AGGTTTCGGTAT,S2_Index88,AAAGGTCGCCCC,3,,,,,, +Index88,S1_Index88,AGGTTTCGGTAT,S2_Index88,AAAGGTCGCCCC,4,,,,,, +Index89,S1_Index89,GGTTCAAGATAC,S2_Index89,TACGAGCTCCTC,1,,,,,, +Index89,S1_Index89,GGTTCAAGATAC,S2_Index89,TACGAGCTCCTC,2,,,,,, +Index89,S1_Index89,GGTTCAAGATAC,S2_Index89,TACGAGCTCCTC,3,,,,,, +Index89,S1_Index89,GGTTCAAGATAC,S2_Index89,TACGAGCTCCTC,4,,,,,, +Index90,S1_Index90,GTTAAGCGGGCG,S2_Index90,GGCATAGAGCGA,1,,,,,, +Index90,S1_Index90,GTTAAGCGGGCG,S2_Index90,GGCATAGAGCGA,2,,,,,, +Index90,S1_Index90,GTTAAGCGGGCG,S2_Index90,GGCATAGAGCGA,3,,,,,, +Index90,S1_Index90,GTTAAGCGGGCG,S2_Index90,GGCATAGAGCGA,4,,,,,, +Index91,S1_Index91,AAGCTACGCGAA,S2_Index91,GGTAGCATTCGG,1,,,,,, +Index91,S1_Index91,AAGCTACGCGAA,S2_Index91,GGTAGCATTCGG,2,,,,,, +Index91,S1_Index91,AAGCTACGCGAA,S2_Index91,GGTAGCATTCGG,3,,,,,, +Index91,S1_Index91,AAGCTACGCGAA,S2_Index91,GGTAGCATTCGG,4,,,,,, +Index92,S1_Index92,TTGAGCGTCCGA,S2_Index92,CGACAAGTTCGA,1,,,,,, +Index92,S1_Index92,TTGAGCGTCCGA,S2_Index92,CGACAAGTTCGA,2,,,,,, +Index92,S1_Index92,TTGAGCGTCCGA,S2_Index92,CGACAAGTTCGA,3,,,,,, +Index92,S1_Index92,TTGAGCGTCCGA,S2_Index92,CGACAAGTTCGA,4,,,,,, +Index93,S1_Index93,CCTATGCAATTG,S2_Index93,GGCAATGTACTT,1,,,,,, +Index93,S1_Index93,CCTATGCAATTG,S2_Index93,GGCAATGTACTT,2,,,,,, +Index93,S1_Index93,CCTATGCAATTG,S2_Index93,GGCAATGTACTT,3,,,,,, +Index93,S1_Index93,CCTATGCAATTG,S2_Index93,GGCAATGTACTT,4,,,,,, +Index94,S1_Index94,CTATTGCCTACG,S2_Index94,CCGGTAAACCCA,1,,,,,, +Index94,S1_Index94,CTATTGCCTACG,S2_Index94,CCGGTAAACCCA,2,,,,,, +Index94,S1_Index94,CTATTGCCTACG,S2_Index94,CCGGTAAACCCA,3,,,,,, +Index94,S1_Index94,CTATTGCCTACG,S2_Index94,CCGGTAAACCCA,4,,,,,, +Index95,S1_Index95,CCTCTTGAAGAG,S2_Index95,GCCCATTAGATT,1,,,,,, +Index95,S1_Index95,CCTCTTGAAGAG,S2_Index95,GCCCATTAGATT,2,,,,,, +Index95,S1_Index95,CCTCTTGAAGAG,S2_Index95,GCCCATTAGATT,3,,,,,, +Index95,S1_Index95,CCTCTTGAAGAG,S2_Index95,GCCCATTAGATT,4,,,,,, +Index96,S1_Index96,TGGGTTACGGGC,S2_Index96,CAAGGTTCTCTT,1,,,,,, +Index96,S1_Index96,TGGGTTACGGGC,S2_Index96,CAAGGTTCTCTT,2,,,,,, +Index96,S1_Index96,TGGGTTACGGGC,S2_Index96,CAAGGTTCTCTT,3,,,,,, +Index96,S1_Index96,TGGGTTACGGGC,S2_Index96,CAAGGTTCTCTT,4,,,,,, +PhiX,S1-PhiX-index,AAGGTAGCTACA,S2-PhiX-index,GACAACCTACCT,1,,,,,, +PhiX,S1-PhiX-index,AAGGTAGCTACA,S2-PhiX-index,GACAACCTACCT,2,,,,,, +PhiX,S1-PhiX-index,AAGGTAGCTACA,S2-PhiX-index,GACAACCTACCT,3,,,,,, +PhiX,S1-PhiX-index,AAGGTAGCTACA,S2-PhiX-index,GACAACCTACCT,4,,,,,, diff --git a/src/sgdemux/test_data/script.sh b/src/sgdemux/test_data/script.sh new file mode 100755 index 00000000..776977b4 --- /dev/null +++ b/src/sgdemux/test_data/script.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -eo pipefail + +REPO_ROOT=$(git rev-parse --show-toplevel) +cd "$REPO_ROOT" + +OUT=src/sgdemux/test_data/ + + +TAR_LOC="$OUT/unfiltered_fastq.tar" +if [ ! -f "$TAR_LOC" ]; then + wget https://singular-public-repo.s3.us-west-1.amazonaws.com/example_raw_files/unfiltered_fastq.tar.gz -O "$TAR_LOC" +fi + +tar -xvf "$TAR_LOC" -C "$OUT" + +# NOTE: sgdemux requires block compressed gzip files! +function seqkit_head { + input="$1" + output="$2" + if [[ ! -f "$output" ]]; then + echo "> Processing $(basename $input)" + seqkit head -n 10000 "$input" | bgzip --threads 12 > "$output" + fi +} +tar_contents=( + Undetermined_S0_L001_I1_001.fastq.gz + Undetermined_S0_L001_I2_001.fastq.gz + Undetermined_S0_L001_R1_001.fastq.gz + Undetermined_S0_L001_R2_001.fastq.gz + Undetermined_S0_L002_I1_001.fastq.gz + Undetermined_S0_L002_I2_001.fastq.gz + Undetermined_S0_L002_R1_001.fastq.gz + Undetermined_S0_L002_R2_001.fastq.gz + Undetermined_S0_L003_I1_001.fastq.gz + Undetermined_S0_L003_I2_001.fastq.gz + Undetermined_S0_L003_R1_001.fastq.gz + Undetermined_S0_L003_R2_001.fastq.gz + Undetermined_S0_L004_I1_001.fastq.gz + Undetermined_S0_L004_I2_001.fastq.gz + Undetermined_S0_L004_R1_001.fastq.gz + Undetermined_S0_L004_R2_001.fastq.gz +) + +mkdir -p "$OUT/fastq" +for fastq in ${tar_contents[@]}; do + seqkit_head "$OUT/unfiltered_fastq/$fastq" "$OUT/fastq/$fastq" +done +cp "$OUT/unfiltered_fastq/samplesheet.csv" "$OUT/samplesheet.csv" \ No newline at end of file