Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Implement Series.mean() in new style #219

Merged
merged 2 commits into from
Oct 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions hpat/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,67 @@ def hpat_pandas_series_max_impl(self, axis=None, skipna=True, level=None, numeri
return hpat_pandas_series_max_impl


@overload_method(SeriesType, 'mean')
def hpat_pandas_series_mean(self, axis=None, skipna=None, level=None, numeric_only=None):
PokhodenkoSA marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def hpat_pandas_series_mean(self, axis=None, skipna=None, level=None, numeric_only=None):
def hpat_pandas_series_mean(self, axis=None, skipna=True, level=None, numeric_only=None):

"""
Pandas Series method :meth:`pandas.Series.mean` implementation.

.. only:: developer

Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_mean

Parameters
-----------
axis: {index (0)}
Axis for the function to be applied on.
*unsupported*
skipna: :obj:`bool`, default True
Exclude NA/null values when computing the result.
level: :obj:`int` or level name, default None
If the axis is a MultiIndex (hierarchical), count along a particular level, collapsing into a scalar.
*unsupported*
numeric_only: :obj:`bool`, default None
Include only float, int, boolean columns.
If None, will attempt to use everything, then use only numeric data. Not implemented for Series.
*unsupported*

Returns
-------
:obj:
Return the mean of the values for the requested axis.
"""

_func_name = 'Method mean().'

if not isinstance(self, SeriesType):
raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self))

if not isinstance(self.data.dtype, types.Number):
raise TypingError('{} Currently function supports only numeric values. Given data type: {}'.format(_func_name, self.data.dtype))

if not isinstance(skipna, (types.Omitted, types.Boolean)) and skipna is not None:
raise TypingError(
'{} The parameter must be a boolean type. Given type skipna: {}'.format(_func_name, skipna))

if not (isinstance(axis, types.Omitted) or axis is None) \
or not (isinstance(level, types.Omitted) or level is None) \
or not (isinstance(numeric_only, types.Omitted) or numeric_only is None):
raise TypingError(
'{} Unsupported parameters. Given axis: {}, level: {}, numeric_only: {}'.format(_func_name, axis, level,
numeric_only))

def hpat_pandas_series_mean_impl(self, axis=None, skipna=None, level=None, numeric_only=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def hpat_pandas_series_mean_impl(self, axis=None, skipna=None, level=None, numeric_only=None):
def hpat_pandas_series_mean_impl(self, axis=None, skipna=True, level=None, numeric_only=None):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shssf could you please explain in this issue #233 for me and all why we should replace Pandas signature skipna=None with our skipna=True?
I really do not understand which criteria I should use to create signature for our functions, which default values I should replace and which to remain. My first suggestion was - do like in Pandas.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@PokhodenkoSA I would refer to Pandas sources. Please follow signature defined there. Documentation might have a bugs as a regular code. Anyway, by the algo you implemented it is True despite of the fact it None

if skipna is None:
skipna = True

if skipna:
return numpy.nanmean(self._data)

return self._data.mean()

return hpat_pandas_series_mean_impl


@overload_method(SeriesType, 'mod')
def hpat_pandas_series_mod(self, other, level=None, fill_value=None, axis=0):
"""
Expand Down
2 changes: 1 addition & 1 deletion hpat/hiframes/pd_series_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,7 @@ def generic_expand_cumulative_series(self, args, kws):

# TODO: add itemsize, strides, etc. when removed from Pandas
_not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten',
'resolve_shift', 'resolve_sum', 'resolve_copy',
'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean',
'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique']

# use ArrayAttribute for attributes not defined in SeriesAttribute
Expand Down
52 changes: 49 additions & 3 deletions hpat/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1228,13 +1228,59 @@ def test_impl(S):
S = pd.Series(['aa', 'bb', np.nan])
self.assertEqual(hpat_func(S), test_impl(S))

def test_series_mean1(self):
def test_series_mean(self):
def test_impl(S):
return S.mean()
hpat_func = hpat.jit(test_impl)

S = pd.Series([np.nan, 2., 3.])
self.assertEqual(hpat_func(S), test_impl(S))
data_samples = [
[6, 6, 2, 1, 3, 3, 2, 1, 2],
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
[6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf],
[1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf],
[1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.inf],
]

for data in data_samples:
with self.subTest(data=data):
S = pd.Series(data)
actual = hpat_func(S)
expected = test_impl(S)
if np.isnan(actual) or np.isnan(expected):
self.assertEqual(np.isnan(actual), np.isnan(expected))
else:
self.assertEqual(actual, expected)

@unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Series.mean() any parameters unsupported")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if hpat.config.config_pipeline_hpat_default is True it has no parameters support?
This parameter is True by default

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is correct. This test does not work with HPAT pipeline.

def test_series_mean_skipna(self):
def test_impl(S, skipna):
return S.mean(skipna=skipna)
hpat_func = hpat.jit(test_impl)

data_samples = [
[6, 6, 2, 1, 3, 3, 2, 1, 2],
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
[6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf],
[1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf],
[1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.inf],
]

for skipna in [True, False]:
for data in data_samples:
S = pd.Series(data)
actual = hpat_func(S, skipna)
expected = test_impl(S, skipna)
if np.isnan(actual) or np.isnan(expected):
self.assertEqual(np.isnan(actual), np.isnan(expected))
else:
self.assertEqual(actual, expected)


def test_series_var1(self):
def test_impl(S):
Expand Down