Skip to content

Commit

Permalink
merge the development branch
Browse files Browse the repository at this point in the history
  • Loading branch information
golobor committed Jan 2, 2019
2 parents 2c1b83b + a587ef1 commit 479d0fc
Show file tree
Hide file tree
Showing 7 changed files with 706 additions and 729 deletions.
44 changes: 44 additions & 0 deletions bin/pyfilesplit
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python

import sys
import argparse

parser = argparse.ArgumentParser(
description='Split a file into pieces in a round robin fashion. '
'Covers the missing functionality of GNU split by enabling '
'multi-line round robin splits.')

parser.add_argument('output_files', nargs='+', help='output files')
parser.add_argument('--input', default='-', type=str,
help="input file; read stdin when not specified or when '-'")
parser.add_argument('--lines', type=int, default=1,
help='number of lines sent to each output file at each round')
parser.add_argument('--strict', action="store_true", default=False,
help='require that lines split evenly ')


args = parser.parse_args()

input = sys.stdin if (args.input == '-') else open(args.input)
outputs = [open(f, 'w') for f in args.output_files]
n_files = len(outputs)
chunk_size = args.lines
cur_file_idx = -1

for i,line in enumerate(input):
if (i % chunk_size == 0):
cur_file_idx = (cur_file_idx + 1) % n_files
outputs[cur_file_idx].write(line)

for f in outputs:
f.close()

if args.strict:
if not (i % (args.lines * n_files)) == (args.lines * n_files - 1):
raise ValueError("Input did not divide evenly")






74 changes: 19 additions & 55 deletions configs/cluster.config
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

process {

// default LSF node config
Expand All @@ -14,6 +15,10 @@ process {
// time='4h'
time = { task.attempt<=1 ? '4h' : '12h' }

// Set the location for temporary folders (mostly used in sorting).
distillerTmpDir='./'
// distillerTmpDir='/tmp'

// use this scope of config
// to specify LSF submission node
// configuration. Use 'params'
Expand All @@ -25,92 +30,51 @@ process {
maxForks = 15
}

withName: fastqc {
withName: download_truncate_chunk_fastqs {
maxForks = 10
cpus = 4
memory = '4 GB'
memory = '2 GB'
queue = 'short'
time = '2h'
}

withName: chunk_fastqs {
withName: local_truncate_chunk_fastqs {
cpus = 4
memory = '2 GB'
queue = 'short'
time = '2h'
}

withName: map_chunks {
withName: fastqc {
cpus = 4
memory = '16 GB'
memory = '4 GB'
queue = 'short'
time = '2h'
}

withName: parse_chunks {
cpus = 4
memory = '8 GB'
queue = 'short'
time = '1h'
}

withName: merge_chunks_into_runs {
cpus = 8
memory = '12 GB'
queue = 'short'
time = '4h'
}

withName: merge_runs_into_libraries {
withName: map_parse_sort_chunks {
cpus = 8
memory = '12 GB'
queue = 'short'
time = '4h'
}

//
// withName: merge_stats_chunks_into_runs
// { use default }
//

//
// withName: merge_stats_runs_into_libraries
// { use default }
//

withName: filter_make_pairs {
cpus = 8
memory = '12 GB'
queue = 'short'
time = '4h'
}

//
// withName: index_pairs
// { use defaults }
//

withName: bin_library_pairs {
cpus = 8
memory = '12 GB'
queue = 'short'
time = '4h'
withName: merge_dedup_splitbam {
cpus = 6
memory = '16 GB'
queue = 'long'
time = '12h'
}

withName: zoom_library_coolers {
withName: bin_zoom_library_pairs {
cpus = 8
memory = '12 GB'
queue = 'short'
time = '4h'
}

withName: make_library_group_coolers {
cpus = 2
memory = '12 GB'
queue = 'short'
time = '2h'
}

withName: zoom_library_group_coolers {
withName: merge_zoom_library_group_coolers {
cpus = 8
memory = '12 GB'
queue = 'short'
Expand Down
58 changes: 15 additions & 43 deletions configs/local.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@ process {
cpus = 4
maxRetries = 2
errorStrategy = 'retry'

// the coolest feature of 'retry' that
// one can dynamically adjust config for
// processes - each one individually, or for all
// using {task.attempt} as an index of attempt
cpus = { task.attempt<=1 ? 4 : 8 }
// see cluster config for more examples

// Set the location for temporary folders (mostly used in sorting).
distillerTmpDir='./'
// distillerTmpDir='/tmp'

// use this scope of config
// to specify local
// configuration. Use 'params'
Expand All @@ -20,67 +25,33 @@ process {

// process-specific local config

withName: download_sra{
cpus = 2
maxForks = 15
}

withName: fastqc {
withName: download_truncate_chunk_fastqs {
maxForks = 10
cpus = 4
}

withName: chunk_fastqs {
withName: local_truncate_chunk_fastqs {
cpus = 4
}

withName: map_chunks {
cpus = 8
}

withName: parse_chunks {
cpus = 4
}

withName: merge_chunks_into_runs {
cpus = 8
}

withName: merge_runs_into_libraries {
cpus = 8
}

//
// withName: merge_stats_chunks_into_runs
// { use default }
//

//
// withName: merge_stats_runs_into_libraries
// { use default }
//

withName: filter_make_pairs {
cpus = 8
}

//
// withName: index_pairs
// withName: fastqc
// { use defaults }
//

withName: bin_library_pairs {
withName: map_parse_sort_chunks {
cpus = 8
}

withName: zoom_library_coolers {
withName: merge_dedup_splitbam {
cpus = 8
}

withName: make_library_group_coolers {
cpus = 2
withName: bin_zoom_library_pairs {
cpus = 8
}

withName: zoom_library_group_coolers {
withName: merge_zoom_library_group_coolers {
cpus = 8
}

Expand All @@ -99,5 +70,6 @@ executor {
docker {
enabled = true
runOptions = '-u $(id -u):$(id -g)'
temp = 'auto'
}

Loading

0 comments on commit 479d0fc

Please sign in to comment.