Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Implement Series.std() in new style #222

Merged
merged 5 commits into from
Oct 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,74 @@ def hpat_pandas_series_shape_impl(self):
return hpat_pandas_series_shape_impl


@overload_method(SeriesType, 'std')
def hpat_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def hpat_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
def hpat_pandas_series_std(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):

skipna : bool, default True

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@densmirn densmirn Oct 24, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the Pandas documentation skipna=None by default for series.std. I was guided by that.

"""
Pandas Series method :meth:`pandas.Series.std` implementation.

.. only:: developer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please insert blank line. Otherwise it will be treated as the same line with above in documentation generator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unboxing
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_str
Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_params

Parameters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please insert blank line. Otherwise it will be treated as the same line with above in documentation generator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

----------
self: :obj:`pandas.Series`
input series
axis: :obj:`int`, :obj:`str`
Axis along which the operation acts
0/None/'index' - row-wise operation
1/'columns' - column-wise operation
*unsupported*
skipna: :obj:`bool`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

skipna : bool, default True

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But default value is None according to Pandas documentation on this method.

exclude NA/null values
level: :obj:`int`, :obj:`str`
If the axis is a MultiIndex (hierarchical),
count along a particular level, collapsing into a scalar
*unsupported*
ddof: :obj:`int`
Delta Degrees of Freedom.
The divisor used in calculations is N - ddof,
where N represents the number of elements.
numeric_only: :obj:`bool`
Include only float, int, boolean columns.
If None, will attempt to use everything, then use only numeric data.
Not implemented for Series.
*unsupported*

Returns
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please insert blank line. Otherwise it will be treated as the same line with above in documentation generator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

-------
:obj:`scalar`
returns :obj:`scalar`
"""

_func_name = 'Method std().'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please insert blank line.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.data.dtype, types.Number):
msg = '{} The object must be a number. Given self.data.dtype: {}'
raise TypingError(msg.format(_func_name, self.data.dtype))

if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None:
raise TypingError('{} The object must be a boolean. Given skipna: {}'.format(_func_name, skipna))

if not isinstance(ddof, (types.Omitted, int, types.Integer)):
raise TypingError('{} The object must be an integer. Given ddof: {}'.format(_func_name, ddof))

for name, arg in [('axis', axis), ('level', level), ('numeric_only', numeric_only)]:
if not isinstance(arg, (types.Omitted, types.NoneType)) and arg is not None:
raise TypingError('{} Unsupported parameters. Given {}: {}'.format(_func_name, name, arg))

def hpat_pandas_series_std_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def hpat_pandas_series_std_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
def hpat_pandas_series_std_impl(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pandas documentation says otherwise.

var = self.var(axis=axis, skipna=skipna, level=level, ddof=ddof, numeric_only=numeric_only)
return var ** 0.5

return hpat_pandas_series_std_impl


@overload_attribute(SeriesType, 'values')
def hpat_pandas_series_iloc(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,8 @@ def generic_expand_cumulative_series(self, args, kws):
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique',
'resolve_prod', 'resolve_count', 'resolve_dropna']

if not hpat.config.config_pipeline_hpat_default:
_not_series_array_attrs.append('resolve_std')

# use ArrayAttribute for attributes not defined in SeriesAttribute
for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items():
Expand Down
59 changes: 59 additions & 0 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2796,6 +2796,65 @@ def test_impl():
result = hpat_func()
np.testing.assert_array_equal(result, ref_result)

def test_series_std(self):
def pyfunc():
series = pd.Series([1.0, np.nan, -1.0, 0.0, 5e-324])
return series.std()

cfunc = hpat.jit(pyfunc)
ref_result = pyfunc()
result = cfunc()
np.testing.assert_equal(ref_result, result)

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'Series.std() parameters "skipna" and "ddof" unsupported')
def test_series_std_unboxing(self):
def pyfunc(series, skipna, ddof):
return series.std(skipna=skipna, ddof=ddof)

cfunc = hpat.jit(pyfunc)
for data in test_global_input_data_numeric + [[]]:
series = pd.Series(data)
for ddof in [0, 1]:
for skipna in [True, False]:
ref_result = pyfunc(series, skipna=skipna, ddof=ddof)
result = cfunc(series, skipna=skipna, ddof=ddof)
np.testing.assert_equal(ref_result, result)

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'Series.std() strings as input data unsupported')
def test_series_std_str(self):
def pyfunc(series):
return series.std()

cfunc = hpat.jit(pyfunc)
series = pd.Series(test_global_input_data_unicode_kind4)
with self.assertRaises(TypingError) as raises:
cfunc(series)
msg = 'Method std(). The object must be a number. Given self.data.dtype: {}'
self.assertIn(msg.format(types.unicode_type), str(raises.exception))

@unittest.skipIf(hpat.config.config_pipeline_hpat_default,
'Series.std() parameters "axis", "level", "numeric_only" unsupported')
def test_series_std_unsupported_params(self):
def pyfunc(series, axis, level, numeric_only):
return series.std(axis=axis, level=level, numeric_only=numeric_only)

cfunc = hpat.jit(pyfunc)
series = pd.Series(test_global_input_data_float64[0])
msg = 'Method std(). Unsupported parameters. Given {}: {}'
with self.assertRaises(TypingError) as raises:
cfunc(series, axis=1, level=None, numeric_only=None)
self.assertIn(msg.format('axis', 'int'), str(raises.exception))

with self.assertRaises(TypingError) as raises:
cfunc(series, axis=None, level=1, numeric_only=None)
self.assertIn(msg.format('level', 'int'), str(raises.exception))

with self.assertRaises(TypingError) as raises:
cfunc(series, axis=None, level=None, numeric_only=True)
self.assertIn(msg.format('numeric_only', 'bool'), str(raises.exception))

def test_series_nunique(self):
def test_series_nunique_impl(S):
return S.nunique()
Expand Down