Skip to content

Commit

Permalink
Merge pull request #1787 from dib-lab/fix/consume-filename-or-parser
Browse files Browse the repository at this point in the history
[MRG] "Overload" cython consume methods with type introspection
  • Loading branch information
camillescott authored Sep 17, 2017
2 parents ae3c90e + f0f61cd commit 01e826b
Show file tree
Hide file tree
Showing 12 changed files with 113 additions and 131 deletions.
11 changes: 6 additions & 5 deletions khmer/_oxli/graphs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ from libc.stdint cimport uint8_t, uint32_t, uint64_t, uintptr_t

from khmer._oxli.oxli_types cimport *
from khmer._oxli.hashing cimport Kmer, CpKmer, KmerSet, CpKmerFactory, CpKmerIterator
from khmer._oxli.parsing cimport CpReadParser, CpSequence
from khmer._oxli.parsing cimport CpReadParser, CpSequence, FastxParserPtr
from khmer._oxli.legacy_partitioning cimport (CpSubsetPartition, cp_pre_partition_info,
SubsetPartition)
from khmer._oxli.utils cimport oxli_raise_py_error
Expand Down Expand Up @@ -139,15 +139,15 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:

void consume_seqfile_and_tag[SeqIO](const string &,
unsigned int,
unsigned long long)
unsigned long long)

# Ugly workaround. For some reason, Cython doesn't like *just this*
# templated overload -- it chooses whichever was defined last, breaking
# resolution for either strings of FastxParserPtr. So, we rename it on
# the Cython side and give it a real name substitution for code gen.
void consume_seqfile_and_tag_readparser "consume_seqfile_and_tag" [SeqIO](shared_ptr[CpReadParser[SeqIO]],
unsigned int,
unsigned long long)
unsigned long long)

void consume_sequence_and_tag(const string &,
unsigned long long &)
Expand All @@ -160,7 +160,7 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:
unsigned int &,
unsigned long long &) except +oxli_raise_py_error

uintptr_t trim_on_stoptags(string)
uintptr_t trim_on_stoptags(string)

unsigned int traverse_from_kmer(CpKmer,
uint32_t,
Expand All @@ -177,7 +177,7 @@ cdef extern from "oxli/hashgraph.hh" namespace "oxli" nogil:
void load_stop_tags(string, bool) except +oxli_raise_py_error
void extract_unique_paths(string, uint32_t, float, vector[string])
void calc_connected_graph_size(CpKmer, uint64_t&, KmerSet&,
const uint64_t, bool)
const uint64_t, bool)
uint32_t kmer_degree(HashIntoType, HashIntoType)
uint32_t kmer_degree(const char *)
void find_high_degree_nodes(const char *, set[HashIntoType] &) const
Expand Down Expand Up @@ -246,6 +246,7 @@ cdef class Hashtable:
cdef HashIntoType sanitize_hash_kmer(self, object kmer) except -1
cdef bytes _valid_sequence(self, str sequence)
cdef CpKmer _build_kmer(self, object kmer) except *
cdef FastxParserPtr _get_parser(self, object parser_or_filename) except *
cdef list _get_raw_tables(self, uint8_t **, vector[uint64_t])


Expand Down
189 changes: 87 additions & 102 deletions khmer/_oxli/graphs.pyx

Large diffs are not rendered by default.

10 changes: 3 additions & 7 deletions khmer/_oxli/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ def get_n_primes_near_x(n_primes, x):
if len(primes) != n_primes:
msg = "unable to find {0} prime numbers < {1}".format(n_primes, x)
raise RuntimeError(msg)
return primes
return primes


cdef bytes _bstring(s):
if not isinstance(s, (basestring, bytes)):
raise TypeError("Requires a string-like sequence")
raise TypeError("Requires a string-like sequence, "\
" got {0} of type {1}".format(s, type(s)))

if isinstance(s, unicode):
s = s.encode('utf-8')
Expand All @@ -30,9 +31,6 @@ cdef unicode _ustring(s):
if type(s) is unicode:
# fast path for most common case(s)
return <unicode>s
elif PY_MAJOR_VERSION < 3 and isinstance(s, bytes):
# only accept byte strings in Python 2.x, not in Py3
return (<bytes>s).decode('UTF-8')
elif isinstance(s, unicode):
# an evil cast to <unicode> might work here in some(!) cases,
# depending on what the further processing does. to be safe,
Expand All @@ -58,5 +56,3 @@ cdef void _fill(double * fill_to, object fill_from):
'''UNSAFE fill from flat python iterable to C array.'''
for idx, item in enumerate(fill_from):
fill_to[idx] = <double>item


4 changes: 2 additions & 2 deletions oxli/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def build_graph(ifilenames, graph, num_threads=1, tags=False):
- tags: should there be tags
"""
if tags:
eat = graph.consume_seqfile_and_tag_with_reads_parser
eat = graph.consume_seqfile_and_tag
else:
eat = graph.consume_seqfile_with_reads_parser
eat = graph.consume_seqfile

for _, ifile in enumerate(ifilenames):
rparser = khmer.ReadParser(ifile)
Expand Down
2 changes: 1 addition & 1 deletion sandbox/count-kmers-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def main():
for _ in range(args.threads):
thread = \
threading.Thread(
target=countgraph.consume_seqfile_with_reads_parser,
target=countgraph.consume_seqfile,
args=(rparser, )
)
threads.append(thread)
Expand Down
2 changes: 1 addition & 1 deletion sandbox/optimal_args_hashbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main():
file=sys.stderr)

htable = khmer.new_nodegraph(args.ksize, args.max_tablesize, args.n_tables)
target_method = htable.consume_seqfile_with_reads_parser
target_method = htable.consume_seqfile

for _, filename in enumerate(filenames):
rparser = khmer.ReadParser(filename)
Expand Down
4 changes: 2 additions & 2 deletions scripts/abundance-dist-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
for _ in range(args.threads):
thread = \
threading.Thread(
target=countgraph.consume_seqfile_with_reads_parser,
target=countgraph.consume_seqfile,
args=(rparser, )
)
threads.append(thread)
Expand All @@ -162,7 +162,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
abundance_lists = []

def __do_abundance_dist__(read_parser):
abundances = countgraph.abundance_distribution_with_reads_parser(
abundances = countgraph.abundance_distribution(
read_parser, tracking)
abundance_lists.append(abundances)

Expand Down
2 changes: 1 addition & 1 deletion scripts/filter-abund-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def main():
for _ in range(args.threads):
cur_thread = \
threading.Thread(
target=graph.consume_seqfile_with_reads_parser,
target=graph.consume_seqfile,
args=(rparser, )
)
threads.append(cur_thread)
Expand Down
2 changes: 1 addition & 1 deletion scripts/load-into-counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def main():
for _ in range(args.threads):
cur_thrd = \
threading.Thread(
target=countgraph.consume_seqfile_with_reads_parser,
target=countgraph.consume_seqfile,
args=(rparser, )
)
threads.append(cur_thrd)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_countgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,16 +1186,16 @@ def test_consume_absentfasta():
print(str(err))


def test_consume_absentfasta_with_reads_parser():
def test_consume_absentfasta():
countgraph = khmer.Countgraph(4, 4 ** 4, 4)
try:
countgraph.consume_seqfile_with_reads_parser()
countgraph.consume_seqfile()
assert 0, "this should fail"
except TypeError as err:
print(str(err))
try:
readparser = ReadParser(utils.get_test_data('empty-file'))
countgraph.consume_seqfile_with_reads_parser(readparser)
countgraph.consume_seqfile(readparser)
assert 0, "this should fail"
except OSError as err:
print(str(err))
Expand Down
8 changes: 4 additions & 4 deletions tests/test_nodegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,16 +905,16 @@ def test_bad_primes_list():
print(str(e))


def test_consume_absentfasta_with_reads_parser():
def test_consume_absentfasta():
nodegraph = khmer.Nodegraph(31, 1, 1)
try:
nodegraph.consume_seqfile_with_reads_parser()
nodegraph.consume_seqfile()
assert 0, "this should fail"
except TypeError as err:
print(str(err))
try:
readparser = ReadParser(utils.get_test_data('empty-file'))
nodegraph.consume_seqfile_with_reads_parser(readparser)
nodegraph.consume_seqfile(readparser)
assert 0, "this should fail"
except OSError as err:
print(str(err))
Expand All @@ -934,7 +934,7 @@ def test_consume_seqfile_and_tag_with_badreads_parser():
nodegraph = khmer.Nodegraph(6, 1e6, 2)
try:
readsparser = khmer.ReadParser(utils.get_test_data("test-empty.fa"))
nodegraph.consume_seqfile_and_tag_with_reads_parser(readsparser)
nodegraph.consume_seqfile_and_tag(readsparser)
assert 0, "this should fail"
except OSError as e:
print(str(e))
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tabletype.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def test_consume_seqfile_reads_parser(AnyTabletype):
kh = AnyTabletype(5)
rparser = ReadParser(utils.get_test_data('test-fastq-reads.fq'))

kh.consume_seqfile_with_reads_parser(rparser)
kh.consume_seqfile(rparser)

kh2 = AnyTabletype(5)
for record in screed.open(utils.get_test_data('test-fastq-reads.fq')):
Expand Down Expand Up @@ -460,7 +460,7 @@ def test_abund_dist_A_readparser(AnyTabletype):
tracking = Nodegraph(4, 1, 1, primes=PRIMES_1m)

kh.consume_seqfile(A_filename)
dist = kh.abundance_distribution_with_reads_parser(rparser, tracking)
dist = kh.abundance_distribution(rparser, tracking)

print(dist[:10])
assert sum(dist) == 1
Expand Down

0 comments on commit 01e826b

Please sign in to comment.