From 1547b01fc9458ae2377ef69186df629a0ef3a63f Mon Sep 17 00:00:00 2001 From: Denis Date: Mon, 14 Oct 2019 13:41:26 +0300 Subject: [PATCH 1/4] Implement Series.std() in new style --- .../datatypes/hpat_pandas_series_functions.py | 66 ++++++++++++++++++- hpat/hiframes/hiframes_typed.py | 3 +- hpat/hiframes/pd_series_ext.py | 3 +- hpat/tests/test_series.py | 66 +++++++++++++++++++ 4 files changed, 134 insertions(+), 4 deletions(-) diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index 37fd48060..ebb44f48a 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -28,7 +28,7 @@ | :class:`pandas.Series` functions and operators implementations in HPAT | Also, it contains Numba internal operators which are required for Series type handling """ - +import math import numpy import operator import pandas @@ -165,6 +165,70 @@ def hpat_pandas_series_shape_impl(self): return hpat_pandas_series_shape_impl +@overload_method(SeriesType, 'std') +def hapt_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): + """ + Pandas Series method :meth:`pandas.Series.std` implementation. + .. only:: developer + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unboxing + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_str + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_axis + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_level + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_numeric_only + Parameters + ---------- + self: :obj:`pandas.Series` + input series + axis: :obj:`int`, :obj:`str` + Axis along which the operation acts + 0/None - row-wise operation + 1 - column-wise operation + *unsupported* + skipna: :obj:`bool` + exclude NA/null values + level: :obj:`int`, :obj:`str` + If the axis is a MultiIndex (hierarchical), + count along a particular level, collapsing into a scalar + *unsupported* + ddof: :obj:`int` + Delta Degrees of Freedom. + The divisor used in calculations is N - ddof, + where N represents the number of elements. + numeric_only: :obj:`bool` + Include only float, int, boolean columns. + If None, will attempt to use everything, then use only numeric data. + Not implemented for Series. + *unsupported* + Returns + ------- + :obj:`scalar` + returns :obj:`scalar` + """ + _func_name = 'Method std().' + + if not isinstance(self, SeriesType): + raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) + + if not isinstance(self.dtype, types.Number): + raise TypingError('{} The object must be a number. Given self.dtype: {}'.format(_func_name, self.dtype)) + + if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None: + raise TypingError('{} The object must be a boolean. Given skipna: {}'.format(_func_name, skipna)) + + if not isinstance(ddof, (types.Omitted, int, types.Integer)): + raise TypingError('{} The object must be an integer. Given ddof: {}'.format(_func_name, ddof)) + + for name, arg in [('axis', axis), ('level', level), ('numeric_only', numeric_only)]: + if not isinstance(arg, (types.Omitted, types.NoneType)) and arg is not None: + raise TypingError('{} Unsupported parameters. Given {}: {}'.format(_func_name, name, arg)) + + def hpat_pandas_series_std_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): + return math.sqrt(self.var(axis=axis, skipna=skipna, level=level, ddof=ddof, numeric_only=numeric_only)) + + return hpat_pandas_series_std_impl + + @overload_attribute(SeriesType, 'values') def hpat_pandas_series_iloc(self): """ diff --git a/hpat/hiframes/hiframes_typed.py b/hpat/hiframes/hiframes_typed.py index db3284ae9..9afaff755 100644 --- a/hpat/hiframes/hiframes_typed.py +++ b/hpat/hiframes/hiframes_typed.py @@ -857,8 +857,7 @@ def _run_call_series(self, assign, lhs, rhs, series_var, func_name): data = self._get_series_data(series_var, nodes) return self._replace_func(func, [data], pre_nodes=nodes) - if func_name in ('std', 'nunique', 'describe', - 'isnull', 'median', 'unique'): + if func_name in ('nunique', 'describe', 'isnull', 'median', 'unique'): if rhs.args or rhs.kws: raise ValueError("unsupported Series.{}() arguments".format( func_name)) diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index d36986836..8032c1627 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -992,7 +992,8 @@ def generic_expand_cumulative_series(self, args, kws): # TODO: add itemsize, strides, etc. when removed from Pandas _not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten', - 'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean', + 'resolve_shift', 'resolve_std', + 'resolve_sum', 'resolve_copy', 'resolve_mean', 'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique', 'resolve_prod', 'resolve_count'] diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 2a27408ea..0533b9470 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -2581,6 +2581,72 @@ def test_impl(): result = hpat_func() np.testing.assert_array_equal(result, ref_result) + def test_series_std(self): + def pyfunc(): + series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + return series.std() + + cfunc = hpat.jit(pyfunc) + ref_result = pyfunc() + result = cfunc() + np.testing.assert_equal(ref_result, result) + + def test_series_std_unboxing(self): + def pyfunc(series, skipna, ddof): + return series.std(skipna=skipna, ddof=ddof) + + cfunc = hpat.jit(pyfunc) + series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + for ddof in [0, 1]: + for skipna in [True, False]: + ref_result = pyfunc(series, skipna=skipna, ddof=ddof) + result = cfunc(series, skipna=skipna, ddof=ddof) + np.testing.assert_equal(ref_result, result) + + def test_series_std_str(self): + def pyfunc(series): + return series.std() + + cfunc = hpat.jit(pyfunc) + series = pd.Series(['test', 'series', 'std', 'str']) + with self.assertRaises(TypingError) as raises: + cfunc(series) + msg = 'Method std(). The object must be a number. Given self.dtype: {}' + self.assertIn(msg.format(types.unicode_type), str(raises.exception)) + + def test_series_std_unsupported_axis(self): + def pyfunc(series, axis): + return series.std(axis=axis) + + cfunc = hpat.jit(pyfunc) + series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + with self.assertRaises(TypingError) as raises: + cfunc(series, axis=1) + msg = 'Method std(). Unsupported parameters. Given axis: int' + self.assertIn(msg, str(raises.exception)) + + def test_series_std_unsupported_level(self): + def pyfunc(series, level): + return series.std(level=level) + + cfunc = hpat.jit(pyfunc) + series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + with self.assertRaises(TypingError) as raises: + cfunc(series, level=1) + msg = 'Method std(). Unsupported parameters. Given level: int' + self.assertIn(msg, str(raises.exception)) + + def test_series_std_unsupported_numeric_only(self): + def pyfunc(series, numeric_only): + return series.std(numeric_only=numeric_only) + + cfunc = hpat.jit(pyfunc) + series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + with self.assertRaises(TypingError) as raises: + cfunc(series, numeric_only=True) + msg = 'Method std(). Unsupported parameters. Given numeric_only: bool' + self.assertIn(msg, str(raises.exception)) + def test_series_nunique(self): def test_series_nunique_impl(S): return S.nunique() From d6abd53a78aacfa08a5e1bfb6bed66501694576f Mon Sep 17 00:00:00 2001 From: Denis Date: Tue, 15 Oct 2019 09:04:02 +0300 Subject: [PATCH 2/4] Combine 3 tests to check unsupported params --- .../datatypes/hpat_pandas_series_functions.py | 24 ++++---- hpat/tests/test_series.py | 55 +++++++------------ 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/hpat/datatypes/hpat_pandas_series_functions.py index ebb44f48a..c37d054cd 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/hpat/datatypes/hpat_pandas_series_functions.py @@ -28,7 +28,7 @@ | :class:`pandas.Series` functions and operators implementations in HPAT | Also, it contains Numba internal operators which are required for Series type handling """ -import math + import numpy import operator import pandas @@ -166,24 +166,24 @@ def hpat_pandas_series_shape_impl(self): @overload_method(SeriesType, 'std') -def hapt_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): +def hpat_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): """ Pandas Series method :meth:`pandas.Series.std` implementation. + .. only:: developer Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unboxing Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_axis - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_level - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_numeric_only + Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_params + Parameters ---------- self: :obj:`pandas.Series` input series axis: :obj:`int`, :obj:`str` Axis along which the operation acts - 0/None - row-wise operation - 1 - column-wise operation + 0/None/'index' - row-wise operation + 1/'columns' - column-wise operation *unsupported* skipna: :obj:`bool` exclude NA/null values @@ -200,18 +200,21 @@ def hapt_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, num If None, will attempt to use everything, then use only numeric data. Not implemented for Series. *unsupported* + Returns ------- :obj:`scalar` returns :obj:`scalar` """ + _func_name = 'Method std().' if not isinstance(self, SeriesType): raise TypingError('{} The object must be a pandas.series. Given: {}'.format(_func_name, self)) - if not isinstance(self.dtype, types.Number): - raise TypingError('{} The object must be a number. Given self.dtype: {}'.format(_func_name, self.dtype)) + if not isinstance(self.data.dtype, types.Number): + msg = '{} The object must be a number. Given self.data.dtype: {}' + raise TypingError(msg.format(_func_name, self.data.dtype)) if not isinstance(skipna, (types.Omitted, types.Boolean, types.NoneType)) and skipna is not None: raise TypingError('{} The object must be a boolean. Given skipna: {}'.format(_func_name, skipna)) @@ -224,7 +227,8 @@ def hapt_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, num raise TypingError('{} Unsupported parameters. Given {}: {}'.format(_func_name, name, arg)) def hpat_pandas_series_std_impl(self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): - return math.sqrt(self.var(axis=axis, skipna=skipna, level=level, ddof=ddof, numeric_only=numeric_only)) + var = self.var(axis=axis, skipna=skipna, level=level, ddof=ddof, numeric_only=numeric_only) + return var ** 0.5 return hpat_pandas_series_std_impl diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index 0533b9470..c2f4340aa 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -2583,7 +2583,7 @@ def test_impl(): def test_series_std(self): def pyfunc(): - series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + series = pd.Series([1.0, np.nan, -1.0, 0.0, 5e-324]) return series.std() cfunc = hpat.jit(pyfunc) @@ -2596,56 +2596,43 @@ def pyfunc(series, skipna, ddof): return series.std(skipna=skipna, ddof=ddof) cfunc = hpat.jit(pyfunc) - series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) - for ddof in [0, 1]: - for skipna in [True, False]: - ref_result = pyfunc(series, skipna=skipna, ddof=ddof) - result = cfunc(series, skipna=skipna, ddof=ddof) - np.testing.assert_equal(ref_result, result) + for data in test_global_input_data_float64: + series = pd.Series(data) + for ddof in [0, 1]: + for skipna in [True, False]: + ref_result = pyfunc(series, skipna=skipna, ddof=ddof) + result = cfunc(series, skipna=skipna, ddof=ddof) + np.testing.assert_equal(ref_result, result) def test_series_std_str(self): def pyfunc(series): return series.std() cfunc = hpat.jit(pyfunc) - series = pd.Series(['test', 'series', 'std', 'str']) + series = pd.Series(test_global_input_data_unicode_kind4) with self.assertRaises(TypingError) as raises: cfunc(series) - msg = 'Method std(). The object must be a number. Given self.dtype: {}' + msg = 'Method std(). The object must be a number. Given self.data.dtype: {}' self.assertIn(msg.format(types.unicode_type), str(raises.exception)) - def test_series_std_unsupported_axis(self): - def pyfunc(series, axis): - return series.std(axis=axis) + def test_series_std_unsupported_params(self): + def pyfunc(series, axis, level, numeric_only): + return series.std(axis=axis, level=level, numeric_only=numeric_only) cfunc = hpat.jit(pyfunc) - series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) + series = pd.Series(test_global_input_data_float64[0]) + msg = 'Method std(). Unsupported parameters. Given {}: {}' with self.assertRaises(TypingError) as raises: - cfunc(series, axis=1) - msg = 'Method std(). Unsupported parameters. Given axis: int' - self.assertIn(msg, str(raises.exception)) + cfunc(series, axis=1, level=None, numeric_only=None) + self.assertIn(msg.format('axis', 'int'), str(raises.exception)) - def test_series_std_unsupported_level(self): - def pyfunc(series, level): - return series.std(level=level) - - cfunc = hpat.jit(pyfunc) - series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) with self.assertRaises(TypingError) as raises: - cfunc(series, level=1) - msg = 'Method std(). Unsupported parameters. Given level: int' - self.assertIn(msg, str(raises.exception)) + cfunc(series, axis=None, level=1, numeric_only=None) + self.assertIn(msg.format('level', 'int'), str(raises.exception)) - def test_series_std_unsupported_numeric_only(self): - def pyfunc(series, numeric_only): - return series.std(numeric_only=numeric_only) - - cfunc = hpat.jit(pyfunc) - series = pd.Series([1.3, -2.7, np.nan, 0.1, 10.9]) with self.assertRaises(TypingError) as raises: - cfunc(series, numeric_only=True) - msg = 'Method std(). Unsupported parameters. Given numeric_only: bool' - self.assertIn(msg, str(raises.exception)) + cfunc(series, axis=None, level=None, numeric_only=True) + self.assertIn(msg.format('numeric_only', 'bool'), str(raises.exception)) def test_series_nunique(self): def test_series_nunique_impl(S): From 52db8cc3df73fa15a2ff3058af9f50d87357a8fa Mon Sep 17 00:00:00 2001 From: Denis Date: Thu, 24 Oct 2019 10:31:06 +0300 Subject: [PATCH 3/4] Change input data for series.std() in tests --- hpat/tests/test_series.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index c2f4340aa..ec20f588e 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -36,18 +36,22 @@ ), ]] +min_float64 = np.finfo('float64').min +max_float64 = np.finfo('float64').max + test_global_input_data_float64 = [ - [1.0, np.nan, -1.0, 0.0, 5e-324], + [1., np.nan, -1., 0., min_float64, max_float64], [np.nan, np.inf, np.NINF, np.NZERO] ] -min_int64 = -9223372036854775808 -max_int64 = 9223372036854775807 -max_uint64 = 18446744073709551615 +min_int64 = np.iinfo('int64').min +max_int64 = np.iinfo('int64').max +max_uint64 = np.iinfo('uint64').max test_global_input_data_integer64 = [ - [1, -1, 0, max_uint64], - [-0, min_int64, max_int64] + [1, -1, 0], + [min_int64, max_int64], + [max_uint64] ] test_global_input_data_numeric = test_global_input_data_integer64 + test_global_input_data_float64 @@ -2596,7 +2600,7 @@ def pyfunc(series, skipna, ddof): return series.std(skipna=skipna, ddof=ddof) cfunc = hpat.jit(pyfunc) - for data in test_global_input_data_float64: + for data in test_global_input_data_numeric + [[]]: series = pd.Series(data) for ddof in [0, 1]: for skipna in [True, False]: From 8fc4b27612e9782b20f9e070ba71bb08f704df8a Mon Sep 17 00:00:00 2001 From: Denis Date: Thu, 24 Oct 2019 16:41:46 +0300 Subject: [PATCH 4/4] Revert multiprocessing parallelism for series.std() --- hpat/hiframes/hiframes_typed.py | 3 ++- hpat/hiframes/pd_series_ext.py | 5 +++-- hpat/tests/test_series.py | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hpat/hiframes/hiframes_typed.py b/hpat/hiframes/hiframes_typed.py index 9afaff755..db3284ae9 100644 --- a/hpat/hiframes/hiframes_typed.py +++ b/hpat/hiframes/hiframes_typed.py @@ -857,7 +857,8 @@ def _run_call_series(self, assign, lhs, rhs, series_var, func_name): data = self._get_series_data(series_var, nodes) return self._replace_func(func, [data], pre_nodes=nodes) - if func_name in ('nunique', 'describe', 'isnull', 'median', 'unique'): + if func_name in ('std', 'nunique', 'describe', + 'isnull', 'median', 'unique'): if rhs.args or rhs.kws: raise ValueError("unsupported Series.{}() arguments".format( func_name)) diff --git a/hpat/hiframes/pd_series_ext.py b/hpat/hiframes/pd_series_ext.py index 8032c1627..14d6cffc8 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/hpat/hiframes/pd_series_ext.py @@ -992,10 +992,11 @@ def generic_expand_cumulative_series(self, args, kws): # TODO: add itemsize, strides, etc. when removed from Pandas _not_series_array_attrs = ['flat', 'ctypes', 'itemset', 'reshape', 'sort', 'flatten', - 'resolve_shift', 'resolve_std', - 'resolve_sum', 'resolve_copy', 'resolve_mean', + 'resolve_shift', 'resolve_sum', 'resolve_copy', 'resolve_mean', 'resolve_take', 'resolve_max', 'resolve_min', 'resolve_nunique', 'resolve_prod', 'resolve_count'] +if not hpat.config.config_pipeline_hpat_default: + _not_series_array_attrs.append('resolve_std') # use ArrayAttribute for attributes not defined in SeriesAttribute for attr, func in numba.typing.arraydecl.ArrayAttribute.__dict__.items(): diff --git a/hpat/tests/test_series.py b/hpat/tests/test_series.py index ec20f588e..bff33239b 100644 --- a/hpat/tests/test_series.py +++ b/hpat/tests/test_series.py @@ -2595,6 +2595,8 @@ def pyfunc(): result = cfunc() np.testing.assert_equal(ref_result, result) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.std() parameters "skipna" and "ddof" unsupported') def test_series_std_unboxing(self): def pyfunc(series, skipna, ddof): return series.std(skipna=skipna, ddof=ddof) @@ -2608,6 +2610,8 @@ def pyfunc(series, skipna, ddof): result = cfunc(series, skipna=skipna, ddof=ddof) np.testing.assert_equal(ref_result, result) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.std() strings as input data unsupported') def test_series_std_str(self): def pyfunc(series): return series.std() @@ -2619,6 +2623,8 @@ def pyfunc(series): msg = 'Method std(). The object must be a number. Given self.data.dtype: {}' self.assertIn(msg.format(types.unicode_type), str(raises.exception)) + @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + 'Series.std() parameters "axis", "level", "numeric_only" unsupported') def test_series_std_unsupported_params(self): def pyfunc(series, axis, level, numeric_only): return series.std(axis=axis, level=level, numeric_only=numeric_only)