Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' into feature/series_std
Browse files Browse the repository at this point in the history
  • Loading branch information
shssf authored Oct 18, 2019
2 parents 6ddbf0a + 7f4088b commit f762289
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 22 deletions.
5 changes: 5 additions & 0 deletions buildscripts/hpat-conda-recipe/run_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ if errorlevel 1 exit 1

mpiexec -localonly -n 3 python -u -m hpat.runtests -v
if errorlevel 1 exit 1

REM Link check for Documentation using Sphinx's in-built linkchecker
REM sphinx-build -b linkcheck -j1 usersource _build/html
REM if errorlevel 1 exit 1

3 changes: 3 additions & 0 deletions buildscripts/hpat-conda-recipe/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ export PYTHONFAULTHANDLER=1

python -m hpat.tests.gen_test_data

#Link check for Documentation using Sphinx's in-built linkchecker
#sphinx-build -b linkcheck -j1 usersource _build/html

# TODO investigate root cause of NumbaPerformanceWarning
# http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics
if [ -z "$HPAT_NUM_PES" ]; then
Expand Down
94 changes: 94 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,100 @@ def hpat_pandas_series_index_impl(self):
return hpat_pandas_series_index_impl


@overload_attribute(SeriesType, 'size')
def hpat_pandas_series_size(self):
"""
Pandas Series attribute :attr:`pandas.Series.size` implementation
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_size
Parameters
----------
series: :obj:`pandas.Series`
input series
Returns
-------
:class:`pandas.Series`
Return the number of elements in the underlying data.
"""

_func_name = 'Attribute size.'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

def hpat_pandas_series_size_impl(self):
return len(self._data)

return hpat_pandas_series_size_impl


@overload_attribute(SeriesType, 'ndim')
def hpat_pandas_series_ndim(self):
"""
Pandas Series attribute :attr:`pandas.Series.ndim` implementation
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_getattr_ndim
Parameters
----------
self: :obj:`pandas.Series`
input series
Returns
-------
:obj:`int`
Number of dimensions of the underlying data, by definition 1
"""

_func_name = 'Attribute ndim.'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

def hpat_pandas_series_ndim_impl(self):
return 1

return hpat_pandas_series_ndim_impl


@overload_attribute(SeriesType, 'T')
def hpat_pandas_series_T(self):
"""
Pandas Series attribute :attr:`pandas.Series.T` implementation
.. only:: developer
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_getattr_T
Parameters
----------
self: :obj:`pandas.Series`
input series
Returns
-------
:obj:`numpy.ndarray`
An array representing the underlying data
"""

_func_name = 'Attribute T.'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

def hpat_pandas_series_T_impl(self):
return self._data

return hpat_pandas_series_T_impl



@overload(len)
def hpat_pandas_series_len(self):
"""
Expand Down
19 changes: 17 additions & 2 deletions hpat/hiframes/boxing.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,21 @@ def _infer_series_list_dtype(S):


def _infer_index_type(index):
# TODO: support proper inference
'''
Convertion input index type into Numba known type
need to return instance of the type class
'''

if isinstance(index, (types.NoneType, pd.RangeIndex, pd.DatetimeIndex)) or index is None or len(index) == 0:
return types.none

if index.dtype == np.dtype('O') and len(index) > 0:
first_val = index[0]
if isinstance(first_val, str):
return string_array_type
return types.none

numba_index_type = numpy_support.from_dtype(index.dtype)
return types.Array(numba_index_type, 1, 'C')


@box(DataFrameType)
Expand Down Expand Up @@ -275,6 +284,12 @@ def unbox_series(typ, val, c):
if typ.index == string_array_type:
index_obj = c.pyapi.object_getattr_string(val, "index")
series.index = unbox_str_series(string_array_type, index_obj, c).value

if isinstance(typ.index, types.Array):
index_obj = c.pyapi.object_getattr_string(val, "index")
index_data = c.pyapi.object_getattr_string(index_obj, "_data")
series.index = unbox_array(typ.index, index_data, c).value

if typ.is_named:
name_obj = c.pyapi.object_getattr_string(val, "name")
series.name = numba.unicode.unbox_unicode_str(
Expand Down
11 changes: 1 addition & 10 deletions hpat/hiframes/hiframes_typed.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,23 +356,14 @@ def _run_getattr(self, assign, rhs):
if rhs.attr in hpat.hiframes.pd_timestamp_ext.timedelta_fields:
return self._run_Timedelta_field(assign, assign.target, rhs)

if isinstance(rhs_type, SeriesType) and rhs.attr in ('size', 'shape'):
if isinstance(rhs_type, SeriesType) and rhs.attr == 'size':
# simply return the column
nodes = []
var = self._get_series_data(rhs.value, nodes)
rhs.value = var
nodes.append(assign)
return nodes

# TODO: test ndim and T
if isinstance(rhs_type, SeriesType) and rhs.attr == 'ndim':
rhs.value = ir.Const(1, rhs.loc)
return [assign]

if isinstance(rhs_type, SeriesType) and rhs.attr == 'T':
rhs = rhs.value
return [assign]

return [assign]

def _run_binop(self, assign, rhs):
Expand Down
6 changes: 5 additions & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,10 @@ class SeriesAttribute(AttributeTemplate):
def resolve_values(self, ary):
return series_to_array_type(ary, True)

# PR135. This needs to be commented out
def resolve_T(self, ary):
return series_to_array_type(ary, True)

# PR135. This needs to be commented out
# def resolve_shape(self, ary):
# return types.Tuple((types.int64,))
Expand Down Expand Up @@ -448,7 +452,7 @@ def resolve_astype(self, ary, args, kws):
dtype, = args
if ((isinstance(dtype, types.Function) and dtype.typing_key == str)
or (isinstance(dtype, types.StringLiteral) and dtype.literal_value == 'str')):
ret_type = SeriesType(string_type)
ret_type = SeriesType(string_type, index=ary.index)
sig = signature(ret_type, *args)
else:
resolver = ArrayAttribute.resolve_astype.__wrapped__
Expand Down
69 changes: 60 additions & 9 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,22 @@ def test_impl(A):
S = pd.Series([3, 5, 6], ['a', 'b', 'c'], name='A')
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

def test_series_attr1(self):
def test_impl(A):
return A.size
def test_series_size(self):
def test_impl(S):
return S.size
hpat_func = hpat.jit(test_impl)

n = 11
df = pd.DataFrame({'A': np.arange(n)})
self.assertEqual(hpat_func(df.A), test_impl(df.A))
for S, expected in [
(pd.Series(), 0),
(pd.Series([]), 0),
(pd.Series(np.arange(n)), n),
(pd.Series([np.nan, 1, 2]), 3),
(pd.Series(['1', '2', '3']), 3),
]:
with self.subTest(S=S, expected=expected):
self.assertEqual(hpat_func(S), expected)
self.assertEqual(hpat_func(S), test_impl(S))

def test_series_attr2(self):
def test_impl(A):
Expand Down Expand Up @@ -276,6 +284,26 @@ def test_impl(A):
df = pd.DataFrame({'A': np.arange(n)})
np.testing.assert_array_equal(hpat_func(df.A), test_impl(df.A))

def test_series_getattr_ndim(self):
'''Verifies getting Series attribute ndim is supported'''
def test_impl(S):
return S.ndim
hpat_func = hpat.jit(test_impl)

n = 11
S = pd.Series(np.arange(n))
self.assertEqual(hpat_func(S), test_impl(S))

def test_series_getattr_T(self):
'''Verifies getting Series attribute T is supported'''
def test_impl(S):
return S.T
hpat_func = hpat.jit(test_impl)

n = 11
S = pd.Series(np.arange(n))
np.testing.assert_array_equal(hpat_func(S), test_impl(S))

def test_series_copy_str1(self):
def test_impl(A):
return A.copy()
Expand Down Expand Up @@ -330,6 +358,32 @@ def test_impl(S):
S = pd.Series(['aa', 'bb', 'cc'])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

def test_series_astype_str_to_str_index_str(self):
'''Verifies Series.astype implementation with function 'str' as argument
handles string series not changing it
'''

def test_impl(S):
return S.astype(str)

hpat_func = hpat.jit(test_impl)

S = pd.Series(['aa', 'bb', 'cc'], index=['d', 'e', 'f'])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

def test_series_astype_str_to_str_index_int(self):
'''Verifies Series.astype implementation with function 'str' as argument
handles string series not changing it
'''

def test_impl(S):
return S.astype(str)

hpat_func = hpat.jit(test_impl)

S = pd.Series(['aa', 'bb', 'cc'], index=[1, 2, 3])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

@unittest.skip('TODO: requires str(datetime64) support in Numba')
def test_series_astype_dt_to_str1(self):
'''Verifies Series.astype implementation with function 'str' as argument
Expand All @@ -354,9 +408,7 @@ def test_series_astype_float_to_str1(self):
converts float series to series of strings
'''
def test_impl(A):
res = A.astype(str)
print(res)
return res
return A.astype(str)
hpat_func = hpat.jit(test_impl)

n = 11.0
Expand Down Expand Up @@ -1884,7 +1936,6 @@ def pyfunc():
result = cfunc()
pd.testing.assert_series_equal(ref_result, result)

@unittest.skip('Unboxing of integer Series.index as pd.Index is not implemented yet')
def test_series_take_index_int_unboxing(self):
def pyfunc(series, indices):
return series.take(indices)
Expand Down

0 comments on commit f762289

Please sign in to comment.