Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename chunks -> iterable #28

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion into/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .resource import resource
from .into import into
from .drop import drop
from .chunks import chunks, Chunks
from .chunks import iterable, IterableOf
from datashape import discover, dshape
from collections import Iterator
import numpy as np
Expand Down
10 changes: 5 additions & 5 deletions into/backends/bcolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..convert import convert, ooc_types
from ..resource import resource
from ..drop import drop
from ..chunks import chunks
from ..chunks import iterable

keywords = ['cparams', 'dflt', 'expectedlen', 'chunklen', 'rootdir']

Expand All @@ -30,7 +30,7 @@ def numpy_append_to_bcolz(a, b, **kwargs):

@append.register((ctable, carray), object)
def numpy_append_to_bcolz(a, b, **kwargs):
return append(a, convert(chunks(np.ndarray), b, **kwargs), **kwargs)
return append(a, convert(iterable(np.ndarray), b, **kwargs), **kwargs)


@convert.register(ctable, np.ndarray, cost=2.0)
Expand All @@ -48,23 +48,23 @@ def convert_bcolz_to_numpy(x, **kwargs):
return x[:]


@append.register((carray, ctable), chunks(np.ndarray))
@append.register((carray, ctable), iterable(np.ndarray))
def append_carray_with_chunks(a, c, **kwargs):
for chunk in c:
append(a, chunk)
a.flush()
return a


@convert.register(chunks(np.ndarray), (ctable, carray), cost=1.2)
@convert.register(iterable(np.ndarray), (ctable, carray), cost=1.2)
def bcolz_to_numpy_chunks(x, chunksize=2**20, **kwargs):
def load():
first_n = min(1000, chunksize)
first = x[:first_n]
yield first
for i in range(first_n, x.shape[0], chunksize):
yield x[i: i + chunksize]
return chunks(np.ndarray)(load)
return iterable(np.ndarray)(load)


@resource.register('.*\.bcolz/?')
Expand Down
20 changes: 10 additions & 10 deletions into/backends/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from ..append import append
from ..convert import convert, ooc_types
from ..resource import resource
from ..chunks import chunks
from ..chunks import iterable
from ..numpy_dtype import dshape_to_pandas
from .pandas import coerce_datetimes

Expand Down Expand Up @@ -57,7 +57,7 @@ def __init__(self, path, has_header='no-input', encoding='utf-8', **kwargs):

@append.register(CSV, object)
def append_object_to_csv(c, seq, **kwargs):
append(c, convert(chunks(pd.DataFrame), seq, **kwargs), **kwargs)
append(c, convert(iterable(pd.DataFrame), seq, **kwargs), **kwargs)
return c


Expand Down Expand Up @@ -92,7 +92,7 @@ def append_dataframe_to_csv(c, df, dshape=None, **kwargs):
return c


@append.register(CSV, chunks(pd.DataFrame))
@append.register(CSV, iterable(pd.DataFrame))
def append_iterator_to_csv(c, cs, **kwargs):
for chunk in cs:
append(c, chunk, **kwargs)
Expand Down Expand Up @@ -174,7 +174,7 @@ def _csv_to_DataFrame(c, dshape=None, chunksize=None, **kwargs):
**kwargs2)


@convert.register(chunks(pd.DataFrame), CSV, cost=10.0)
@convert.register(iterable(pd.DataFrame), CSV, cost=10.0)
def CSV_to_chunks_of_dataframes(c, chunksize=2**20, **kwargs):
# Load a small 1000 line DF to start
# This helps with rapid viewing of a large CSV file
Expand All @@ -187,7 +187,7 @@ def _():
yield first
for df in rest:
yield df
return chunks(pd.DataFrame)(_)
return iterable(pd.DataFrame)(_)


@discover.register(CSV)
Expand Down Expand Up @@ -226,19 +226,19 @@ def resource_csv(uri, **kwargs):
def resource_glob(uri, **kwargs):
filenames = sorted(glob(uri))
r = resource(filenames[0], **kwargs)
return chunks(type(r))([resource(u, **kwargs) for u in sorted(glob(uri))])
return iterable(type(r))([resource(u, **kwargs) for u in sorted(glob(uri))])

# Alternatively check each time we iterate?
def _():
return (resource(u, **kwargs) for u in glob(uri))
return chunks(type(r))(_)
return iterable(type(r))(_)


@convert.register(chunks(pd.DataFrame), chunks(CSV), cost=10.0)
@convert.register(iterable(pd.DataFrame), iterable(CSV), cost=10.0)
def convert_glob_of_csvs_to_chunks_of_dataframes(csvs, **kwargs):
def _():
return concat(convert(chunks(pd.DataFrame), csv, **kwargs) for csv in csvs)
return chunks(pd.DataFrame)(_)
return concat(convert(iterable(pd.DataFrame), csv, **kwargs) for csv in csvs)
return iterable(pd.DataFrame)(_)


@dispatch(CSV)
Expand Down
10 changes: 5 additions & 5 deletions into/backends/h5py.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..convert import convert, ooc_types
from ..create import create
from ..resource import resource
from ..chunks import chunks, Chunks
from ..chunks import iterable, IterableOf
from ..compatibility import unicode

h5py_attributes = ['chunks', 'compression', 'compression_opts', 'dtype',
Expand Down Expand Up @@ -95,7 +95,7 @@ def append_h5py(dset, x, **kwargs):
return dset


@append.register(h5py.Dataset, chunks(np.ndarray))
@append.register(h5py.Dataset, iterable(np.ndarray))
def append_h5py(dset, c, **kwargs):
for chunk in c:
append(dset, chunk)
Expand All @@ -104,7 +104,7 @@ def append_h5py(dset, c, **kwargs):

@append.register(h5py.Dataset, object)
def append_h5py(dset, x, **kwargs):
return append(dset, convert(chunks(np.ndarray), x, **kwargs), **kwargs)
return append(dset, convert(iterable(np.ndarray), x, **kwargs), **kwargs)


@convert.register(np.ndarray, h5py.Dataset, cost=3.0)
Expand All @@ -116,12 +116,12 @@ def h5py_to_numpy(dset, force=False, **kwargs):
return dset[:]


@convert.register(chunks(np.ndarray), h5py.Dataset, cost=3.0)
@convert.register(iterable(np.ndarray), h5py.Dataset, cost=3.0)
def h5py_to_numpy_chunks(dset, chunksize=2**20, **kwargs):
def load():
for i in range(0, dset.shape[0], chunksize):
yield dset[i: i + chunksize]
return chunks(np.ndarray)(load)
return iterable(np.ndarray)(load)


@resource.register('h5py://.+', priority=11)
Expand Down
10 changes: 5 additions & 5 deletions into/backends/hdfstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datashape import discover
from ..append import append
from ..convert import convert, ooc_types
from ..chunks import chunks, Chunks
from ..chunks import iterable, IterableOf
from ..resource import resource


Expand Down Expand Up @@ -37,9 +37,9 @@ def discover_hdfstore_storer(storer):
return n * measure


@convert.register(chunks(pd.DataFrame), pd.io.pytables.AppendableFrameTable)
@convert.register(iterable(pd.DataFrame), pd.io.pytables.AppendableFrameTable)
def hdfstore_to_chunks_dataframes(data, chunksize=1000000, **kwargs):
return chunks(pd.DataFrame)(data.parent.select(data.pathname, chunksize=chunksize))
return iterable(pd.DataFrame)(data.parent.select(data.pathname, chunksize=chunksize))


@convert.register(pd.DataFrame, (pd.io.pytables.AppendableFrameTable,
Expand Down Expand Up @@ -81,7 +81,7 @@ def append_dataframe_to_hdfstore(store, df, **kwargs):


@append.register((pd.io.pytables.Fixed, EmptyHDFStoreDataset),
chunks(pd.DataFrame))
iterable(pd.DataFrame))
def append_chunks_dataframe_to_hdfstore(store, c, **kwargs):
parent = store.parent
for chunk in c:
Expand All @@ -91,7 +91,7 @@ def append_chunks_dataframe_to_hdfstore(store, c, **kwargs):

@append.register((pd.io.pytables.Fixed, EmptyHDFStoreDataset), object)
def append_object_to_hdfstore(store, o, **kwargs):
return append(store, convert(chunks(pd.DataFrame), o, **kwargs), **kwargs)
return append(store, convert(iterable(pd.DataFrame), o, **kwargs), **kwargs)


ooc_types |= set(HDFDataset)
8 changes: 4 additions & 4 deletions into/backends/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..append import append
from ..convert import convert, ooc_types
from ..resource import resource
from ..chunks import chunks, Chunks
from ..chunks import iterable
from ..utils import tmpfile

import os
Expand Down Expand Up @@ -45,20 +45,20 @@ def numpy_to_pytables(t, x, **kwargs):

@append.register((tables.Array, tables.Table), object)
def append_h5py(dset, x, **kwargs):
return append(dset, convert(chunks(np.ndarray), x, **kwargs), **kwargs)
return append(dset, convert(iterable(np.ndarray), x, **kwargs), **kwargs)


@convert.register(np.ndarray, tables.Table, cost=3.0)
def pytables_to_numpy(t, **kwargs):
return t[:]


@convert.register(chunks(np.ndarray), tables.Table, cost=3.0)
@convert.register(iterable(np.ndarray), tables.Table, cost=3.0)
def pytables_to_numpy_chunks(t, chunksize=2**20, **kwargs):
def load():
for i in range(0, t.shape[0], chunksize):
yield t[i: i + chunksize]
return chunks(np.ndarray)(load)
return iterable(np.ndarray)(load)


def dtype_to_pytables(dtype):
Expand Down
4 changes: 2 additions & 2 deletions into/backends/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..convert import convert, ooc_types
from ..append import append
from ..resource import resource
from ..chunks import Chunks
from ..chunks import IterableOf

# http://docs.sqlalchemy.org/en/latest/core/types.html

Expand Down Expand Up @@ -243,7 +243,7 @@ def append_iterator_to_table(t, rows, dshape=None, **kwargs):
return t


@append.register(sa.Table, Chunks)
@append.register(sa.Table, IterableOf)
def append_anything_to_sql_Table(t, c, **kwargs):
for item in c:
append(t, item, **kwargs)
Expand Down
8 changes: 4 additions & 4 deletions into/backends/tests/test_bcolz.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from into.backends.bcolz import (append, convert, ctable, carray, resource,
discover, drop)
from into.chunks import chunks
from into.chunks import iterable
from into import append, convert, discover, into
import numpy as np
from into.utils import tmpfile
Expand Down Expand Up @@ -31,8 +31,8 @@ def test_convert():


def test_chunks():
c = convert(chunks(np.ndarray), a, chunksize=2)
assert isinstance(c, chunks(np.ndarray))
c = convert(iterable(np.ndarray), a, chunksize=2)
assert isinstance(c, iterable(np.ndarray))
assert len(list(c)) == 2
assert eq(list(c)[1], [3, 4])

Expand All @@ -41,7 +41,7 @@ def test_chunks():

def test_append_chunks():
b = carray(x)
append(b, chunks(np.ndarray)([x, x]))
append(b, iterable(np.ndarray)([x, x]))
assert len(b) == len(x) * 3


Expand Down
6 changes: 3 additions & 3 deletions into/backends/tests/test_h5py.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
resource_h5py)
from contextlib import contextmanager
from into.utils import tmpfile
from into.chunks import chunks
from into.chunks import iterable
from into import into, append, convert, discover
import datashape
import h5py
Expand Down Expand Up @@ -61,13 +61,13 @@ def test_numpy():

def test_chunks():
with file(x) as (fn, f, dset):
c = convert(chunks(np.ndarray), dset)
c = convert(iterable(np.ndarray), dset)
assert eq(convert(np.ndarray, c), x)


def test_append_chunks():
with file(x) as (fn, f, dset):
append(dset, chunks(np.ndarray)([x, x]))
append(dset, iterable(np.ndarray)([x, x]))

assert len(dset) == len(x) * 3

Expand Down
8 changes: 4 additions & 4 deletions into/backends/tests/test_hdfstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from into.backends.hdfstore import discover
from contextlib import contextmanager
from into.utils import tmpfile
from into.chunks import chunks
from into.chunks import iterable
from into import into, append, convert, resource, discover
import datashape
import pandas as pd
Expand Down Expand Up @@ -60,7 +60,7 @@ def eq(a, b):

def test_chunks():
with file(df) as (fn, f, dset):
c = convert(chunks(pd.DataFrame), dset)
c = convert(iterable(pd.DataFrame), dset)
assert eq(convert(np.ndarray, c), df)


Expand Down Expand Up @@ -97,14 +97,14 @@ def test_convert_pandas():

def test_convert_chunks():
with file(df) as (fn, f, dset):
c = convert(chunks(pd.DataFrame), dset, chunksize=len(df) / 2)
c = convert(iterable(pd.DataFrame), dset, chunksize=len(df) / 2)
assert len(list(c)) == 2
assert eq(convert(pd.DataFrame, c), df)


def test_append_chunks():
with file(df) as (fn, f, dset):
append(dset, chunks(pd.DataFrame)([df, df]))
append(dset, iterable(pd.DataFrame)([df, df]))

assert discover(dset).shape[0] == len(df) * 3

Expand Down
22 changes: 11 additions & 11 deletions into/chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
from .utils import cls_name


class Chunks(object):
class IterableOf(object):
""" An Iterable of chunked data

Iterates over chunks of in-memory data. Contains an iterable or a function
that returns an iterator.

>>> c = Chunks([[1, 2, 3], [4, 5, 6]])
>>> c = IterableOf([[1, 2, 3], [4, 5, 6]])
>>> next(iter(c))
[1, 2, 3]

For typed containers see the ``chunks`` function which generates
parametrized Chunks classes.
For typed containers see the ``iterable`` function which generates
parametrized ``IterableOf`` classes.

>>> c = chunks(list)([[1, 2, 3], [4, 5, 6]])
>>> c = iterable(list)([[1, 2, 3], [4, 5, 6]])
>>> next(iter(c))
[1, 2, 3]

Expand All @@ -36,15 +36,15 @@ def __iter__(self):
return iter(self.data)


def chunks(cls):
""" Parametrized Chunks Class """
return type('Chunks_' + cls_name(cls).replace('.', '_'), (Chunks,), {'container': cls})
def iterable(cls):
""" Parametrized IterableOf Class """
return type('iterable(%s)' % cls_name(cls), (IterableOf,), {'container': cls})

chunks.__doc__ = Chunks.__doc__
iterable.__doc__ = IterableOf.__doc__

chunks = memoize(chunks)
iterable = memoize(iterable)


@discover.register(Chunks)
@discover.register(IterableOf)
def discover_chunks(c, **kwargs):
return var * discover(first(c)).subshape[0]
Loading