Skip to content

Commit

Permalink
ENH: Fix some warning caused by deprecation (#804)
Browse files Browse the repository at this point in the history
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
hucorz and mergify[bot] authored Sep 18, 2024
1 parent 9984250 commit c765e57
Show file tree
Hide file tree
Showing 13 changed files with 56 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/asv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
if: ${{ steps.build.outcome == 'success' }}

- name: Publish benchmarks artifact
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Benchmarks log
path: benchmarks/asv_bench/results
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,10 @@ jobs:
- { os: ubuntu-20.04, module: hadoop, python-version: 3.9 }
- { os: ubuntu-latest, module: vineyard, python-version: 3.9 }
- { os: ubuntu-latest, module: external-storage, python-version: 3.9 }
- { os: ubuntu-latest, module: compatibility, python-version: 3.9 }
# always test compatibility with the latest version
# - { os: ubuntu-latest, module: compatibility, python-version: 3.9 }
- { os: ubuntu-latest, module: doc-build, python-version: 3.9 }
- { os: [self-hosted, gpu], module: gpu, python-version: 3.11}
- { os: self-hosted, module: gpu, python-version: 3.11}
- { os: ubuntu-latest, module: jax, python-version: 3.9 }
# a self-hosted runner which needs computing resources, activate when necessary
# - { os: juicefs-ci, module: kubernetes-juicefs, python-version: 3.9 }
Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ cdef list tokenize_pandas_dataframe(ob):


cdef list tokenize_pandas_categorical(ob):
l = ob.to_list()
l = ob.tolist()
l.append(ob.shape)
return iterative_tokenize(l)

Expand Down
11 changes: 3 additions & 8 deletions python/xorbits/_mars/dataframe/base/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@
from typing import Iterable

import pandas as pd
from pandas.api.types import (
is_datetime64_dtype,
is_datetime64tz_dtype,
is_period_dtype,
is_timedelta64_dtype,
)
from pandas.api.types import is_datetime64_dtype, is_timedelta64_dtype

from ...utils import adapt_mars_docstring
from .datetimes import SeriesDatetimeMethod, _datetime_method_to_handlers
Expand Down Expand Up @@ -238,9 +233,9 @@ class DatetimeAccessor:
def __init__(self, series):
if (
not is_datetime64_dtype(series.dtype)
and not is_datetime64tz_dtype(series.dtype)
and not isinstance(series.dtype, pd.DatetimeTZDtype)
and not is_timedelta64_dtype(series.dtype)
and not is_period_dtype(series.dtype)
and not isinstance(series.dtype, pd.PeriodDtype)
):
raise AttributeError("Can only use .dt accessor with datetimelike values")
self._series = series
Expand Down
7 changes: 4 additions & 3 deletions python/xorbits/_mars/dataframe/base/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ def execute(cls, ctx, op):
**op.kwds,
)
else:
result = input_data.apply(
func, convert_dtype=op.convert_dtype, args=op.args, **op.kwds
)
if op.convert_dtype:
result = input_data.apply(func, args=op.args, **op.kwds)
else:
result = input_data.apply(func, args=op.args, **op.kwds).astype(object)
ctx[out.key] = result

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def subtract_custom_value(x, custom_value):
).execute()
assert res.data_params["dtype"] == "object"
pd.testing.assert_series_equal(
res.fetch(), s.apply(apply_func, args=(5,), convert_dtype=False)
res.fetch(), s.apply(apply_func, args=(5,)).astype(object)
)

res = ms.apply(
Expand All @@ -220,9 +220,7 @@ def subtract_custom_value(x, custom_value):
assert res.shape == (4,)
with pytest.raises(AttributeError):
_ = res.dtypes
pd.testing.assert_series_equal(
res.fetch(), s.apply(apply_func, args=(5,), convert_dtype=True)
)
pd.testing.assert_series_equal(res.fetch(), s.apply(apply_func, args=(5,)))


def test_apply_execution_with_multi_chunks(setup):
Expand Down
10 changes: 5 additions & 5 deletions python/xorbits/_mars/dataframe/base/tests/test_base_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def test_series_apply_execute(setup):

r = series.apply(lambda x: [x, x + 1], convert_dtype=False)
result = r.execute().fetch()
expected = s_raw.apply(lambda x: [x, x + 1], convert_dtype=False)
expected = s_raw.apply(lambda x: [x, x + 1]).astype(object)
pd.testing.assert_series_equal(result, expected)

s_raw2 = pd.Series([np.array([1, 2, 3]), np.array([4, 5, 6])])
Expand All @@ -502,7 +502,7 @@ def closure(z):

r = series.apply(closure, convert_dtype=False)
result = r.execute().fetch()
expected = s_raw.apply(closure, convert_dtype=False)
expected = s_raw.apply(closure).astype(object)
pd.testing.assert_series_equal(result, expected)

class callable_series:
Expand All @@ -518,7 +518,7 @@ def __call__(self, z):
cs = callable_series()
r = series.apply(cs, convert_dtype=False)
result = r.execute().fetch()
expected = s_raw.apply(cs, convert_dtype=False)
expected = s_raw.apply(cs).astype(object)
pd.testing.assert_series_equal(result, expected)


Expand All @@ -528,9 +528,9 @@ def test_apply_with_arrow_dtype_execution(setup):
df1 = table.to_pandas(types_mapper=pd.ArrowDtype)
df = from_pandas_df(df1)

r = df.apply(lambda row: str(row[0]) + row[1], axis=1)
r = df.apply(lambda row: str(row.iloc[0]) + row.iloc[1], axis=1)
result = r.execute().fetch()
expected = df1.apply(lambda row: str(row[0]) + row[1], axis=1)
expected = df1.apply(lambda row: str(row.iloc[0]) + row.iloc[1], axis=1)
pd.testing.assert_series_equal(result, expected)

s1 = df1["b"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def test_date_range():
with pytest.raises(ValueError):
_ = date_range(pd.NaT, periods=10)

expected = pd.date_range("2020-1-1", periods=9.0, name="date")
expected = pd.date_range("2020-1-1", periods=9, name="date")

dr = date_range("2020-1-1", periods=9.0, name="date", chunk_size=3)
assert isinstance(dr, DatetimeIndex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1281,11 +1281,11 @@ def test_date_range_execution(setup):

# start, end and freq
dr = md.date_range(
"2020-1-1", "2020-1-10", freq="12H", chunk_size=chunk_size, **kw
"2020-1-1", "2020-1-10", freq="12h", chunk_size=chunk_size, **kw
)

result = dr.execute().fetch()
expected = pd.date_range("2020-1-1", "2020-1-10", freq="12H", **kw)
expected = pd.date_range("2020-1-1", "2020-1-10", freq="12h", **kw)
pd.testing.assert_index_equal(result, expected)

# test timezone
Expand Down Expand Up @@ -1317,15 +1317,15 @@ def test_date_range_execution(setup):
pd.testing.assert_index_equal(result, expected)

# test freq
dr = md.date_range(start="1/1/2018", periods=5, freq="M", chunk_size=3)
dr = md.date_range(start="1/1/2018", periods=5, freq="ME", chunk_size=3)

result = dr.execute().fetch()
expected = pd.date_range(start="1/1/2018", periods=5, freq="M")
expected = pd.date_range(start="1/1/2018", periods=5, freq="ME")
pd.testing.assert_index_equal(result, expected)

dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="M")
dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="ME")
result = dr.execute().fetch()
expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="M")
expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="ME")
pd.testing.assert_index_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/dataframe/indexing/index_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ def _create_reorder_chunk(
reorder_indexes[-1]
]
params["columns_value"] = parse_index(reorder_columns, store_data=True)
params["dtypes"] = concat_chunk.dtypes[reorder_indexes[-1]]
params["dtypes"] = concat_chunk.dtypes.iloc[reorder_indexes[-1]]

return reorder_chunk_op.new_chunk([concat_chunk], kws=[params])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,7 @@ def test_sample_execution(setup):

def test_loc_setitem(setup):
raw_df = pd.DataFrame({"a": [1, 2, 3, 4, 2, 4, 5, 7, 2, 8, 9], 1: [10] * 11})
raw_df = raw_df.astype("object")
md_data = md.DataFrame(raw_df, chunk_size=3)
md_data.loc[md_data["a"] <= 4, 1] = "v1"
pd_data = raw_df.copy(True)
Expand Down
50 changes: 26 additions & 24 deletions python/xorbits/_mars/dataframe/missing/fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ def _set_inputs(self, inputs):
def output_limit(self):
return self._output_limit or 1

@staticmethod
def _apply_fillna_with_method(df, value, method, axis, limit, inplace=False):
"""
Parameter method is deprecated since version 2.1.0, use ffill or bfill instead.
"""
if method is not None:
if method in ["backfill", "bfill"]:
return df.bfill(axis=axis, limit=limit, inplace=inplace)
elif method in ["pad", "ffill"]:
return df.ffill(axis=axis, limit=limit, inplace=inplace)
return df.fillna(value=value, axis=axis, inplace=inplace)

@staticmethod
def _get_first_slice(op, df, end):
if op.method == "bfill":
Expand All @@ -115,11 +127,7 @@ def _execute_map(cls, ctx, op):
axis = op.axis
method = op.method

filled = input_data.fillna(
method=method,
axis=axis,
limit=limit,
)
filled = cls._apply_fillna_with_method(input_data, None, method, axis, limit)
ctx[op.outputs[0].key] = cls._get_first_slice(op, filled, 1)
del filled

Expand All @@ -137,15 +145,17 @@ def _execute_combine(cls, ctx, op):
summaries = [ctx[inp.key] for inp in op.inputs[1:]]

if not summaries:
ctx[op.outputs[0].key] = input_data.fillna(
method=method,
axis=axis,
limit=limit,
ctx[op.outputs[0].key] = cls._apply_fillna_with_method(
input_data, None, method, axis, limit
)
return

valid_summary = cls._get_first_slice(
op, pd.concat(summaries, axis=axis).fillna(method=method, axis=axis), 1
op,
cls._apply_fillna_with_method(
pd.concat(summaries, axis=axis), None, method, axis, limit
),
1,
)

if method == "bfill":
Expand All @@ -154,17 +164,12 @@ def _execute_combine(cls, ctx, op):
concat_df = pd.concat([valid_summary, input_data], axis=axis)

if is_pandas_2():
concat_df = concat_df.fillna(
method=method,
axis=axis,
limit=limit,
concat_df = cls._apply_fillna_with_method(
concat_df, None, method, axis, limit
)
else:
concat_df.fillna(
method=method,
axis=axis,
inplace=True,
limit=limit,
concat_df = cls._apply_fillna_with_method(
concat_df, None, method, axis, limit, inplace=True
)
ctx[op.outputs[0].key] = cls._get_first_slice(op, concat_df, -1)

Expand All @@ -180,11 +185,8 @@ def execute(cls, ctx, op):
if isinstance(op.value, ENTITY_TYPE):
value = ctx[op.value.key]
if not isinstance(input_data, pd.Index):
ctx[op.outputs[0].key] = input_data.fillna(
value=value,
method=op.method,
axis=op.axis,
limit=op.limit,
ctx[op.outputs[0].key] = cls._apply_fillna_with_method(
input_data, value, op.method, op.axis, op.limit
)
else:
ctx[op.outputs[0].key] = input_data.fillna(value=value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ def test_dataframe_fill_na_execution(setup):

# test forward fill in axis=0 without limit
r = df.fillna(method="pad")
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.fillna(method="pad"))
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.ffill())

# test backward fill in axis=0 without limit
r = df.fillna(method="backfill")
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.fillna(method="backfill"))
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.bfill())

# test forward fill in axis=1 without limit
r = df.ffill(axis=1)
Expand Down

0 comments on commit c765e57

Please sign in to comment.