From 6786846764cd9cf9d5041e5bdc86430be38eeb8e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 11 Oct 2023 10:54:23 -0700
Subject: [PATCH 01/19] ENH: EA._get_repr_footer (#55478)

---
 pandas/core/arrays/base.py        | 12 ++++++++++--
 pandas/core/arrays/categorical.py |  4 ++--
 pandas/io/formats/format.py       | 13 +++++++------
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 31c143ee012bb..e7b7ecba60e0b 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1663,7 +1663,14 @@ def __repr__(self) -> str:
             self, self._formatter(), indent_for_name=False
         ).rstrip(", \n")
         class_name = f"<{type(self).__name__}>\n"
-        return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
+        footer = self._get_repr_footer()
+        return f"{class_name}{data}\n{footer}"
+
+    def _get_repr_footer(self) -> str:
+        # GH#24278
+        if self.ndim > 1:
+            return f"Shape: {self.shape}, dtype: {self.dtype}"
+        return f"Length: {len(self)}, dtype: {self.dtype}"
 
     def _repr_2d(self) -> str:
         from pandas.io.formats.printing import format_object_summary
@@ -1679,7 +1686,8 @@ def _repr_2d(self) -> str:
         ]
         data = ",\n".join(lines)
         class_name = f"<{type(self).__name__}>"
-        return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
+        footer = self._get_repr_footer()
+        return f"{class_name}\n[\n{data}\n]\n{footer}"
 
     def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         """
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 5059f5d000ccd..e19635af6ab0b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2177,7 +2177,7 @@ def _repr_categories(self) -> list[str]:
         category_strs = [x.strip() for x in category_strs]
         return category_strs
 
-    def _repr_categories_info(self) -> str:
+    def _get_repr_footer(self) -> str:
         """
         Returns a string representation of the footer.
         """
@@ -2229,7 +2229,7 @@ def __repr__(self) -> str:
         """
         String representation.
         """
-        footer = self._repr_categories_info()
+        footer = self._get_repr_footer()
         length = len(self)
         max_len = 10
         if length > max_len:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index cac83e2a48972..322f7f88494de 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -259,11 +259,12 @@ def _get_footer(self) -> str:
         name = self.series.name
         footer = ""
 
-        if getattr(self.series.index, "freq", None) is not None:
-            assert isinstance(
-                self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
-            )
-            footer += f"Freq: {self.series.index.freqstr}"
+        index = self.series.index
+        if (
+            isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex))
+            and index.freq is not None
+        ):
+            footer += f"Freq: {index.freqstr}"
 
         if self.name is not False and name is not None:
             if footer:
@@ -289,7 +290,7 @@ def _get_footer(self) -> str:
         # level infos are added to the end and in a new line, like it is done
         # for Categoricals
         if isinstance(self.tr_series.dtype, CategoricalDtype):
-            level_info = self.tr_series._values._repr_categories_info()
+            level_info = self.tr_series._values._get_repr_footer()
             if footer:
                 footer += "\n"
             footer += level_info

From 943c3cb38d23e60905891724f878b91483597c83 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 11 Oct 2023 10:54:57 -0700
Subject: [PATCH 02/19] BUG: repr of inf values with use_inf_as_na (#55483)

* BUG: repr of inf values with use_inf_as_na

* GH ref
---
 doc/source/whatsnew/v2.2.0.rst       |  1 +
 pandas/_libs/missing.pyi             |  1 -
 pandas/_libs/missing.pyx             | 25 -------------------------
 pandas/core/indexes/base.py          | 13 ++-----------
 pandas/io/formats/format.py          | 22 ++++++++++------------
 pandas/tests/frame/test_repr_info.py |  2 +-
 6 files changed, 14 insertions(+), 50 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 29a2d5c0b5877..eec82ae26afcc 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -394,6 +394,7 @@ Other
 ^^^^^
 - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`)
 - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
+- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`)
 - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`)
 -
 
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
index d5c9f1342a089..282dcee3ed6cf 100644
--- a/pandas/_libs/missing.pyi
+++ b/pandas/_libs/missing.pyi
@@ -14,4 +14,3 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
 def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
-def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index e3e7d8daa03e1..8ef59b46ca25f 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -255,31 +255,6 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def is_float_nan(values: ndarray) -> ndarray:
-    """
-    True for elements which correspond to a float nan
-
-    Returns
-    -------
-    ndarray[bool]
-    """
-    cdef:
-        ndarray[uint8_t] result
-        Py_ssize_t i, N
-        object val
-
-    N = len(values)
-    result = np.zeros(N, dtype=np.uint8)
-
-    for i in range(N):
-        val = values[i]
-        if util.is_nan(val):
-            result[i] = True
-    return result.view(bool)
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c3cab965041e0..d749235e2cd2c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -37,7 +37,6 @@
     is_datetime_array,
     no_default,
 )
-from pandas._libs.missing import is_float_nan
 from pandas._libs.tslibs import (
     IncompatibleFrequency,
     OutOfBoundsDatetime,
@@ -1390,16 +1389,8 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
 
         if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
             values = np.asarray(values)
-            values = lib.maybe_convert_objects(values, safe=True)
-
-            result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
-
-            # could have nans
-            mask = is_float_nan(values)
-            if mask.any():
-                result_arr = np.array(result)
-                result_arr[mask] = na_rep
-                result = result_arr.tolist()
+            # TODO: why do we need different justify for these cases?
+            result = trim_front(format_array(values, None, justify="all"))
         else:
             result = trim_front(format_array(values, None, justify="left"))
         return header + result
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 322f7f88494de..bb976b3a0208e 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1216,18 +1216,16 @@ def _format_strings(self) -> list[str]:
 
         def _format(x):
             if self.na_rep is not None and is_scalar(x) and isna(x):
-                try:
-                    # try block for np.isnat specifically
-                    # determine na_rep if x is None or NaT-like
-                    if x is None:
-                        return "None"
-                    elif x is NA:
-                        return str(NA)
-                    elif x is NaT or np.isnat(x):
-                        return "NaT"
-                except (TypeError, ValueError):
-                    # np.isnat only handles datetime or timedelta objects
-                    pass
+                if x is None:
+                    return "None"
+                elif x is NA:
+                    return str(NA)
+                elif lib.is_float(x) and np.isinf(x):
+                    # TODO(3.0): this will be unreachable when use_inf_as_na
+                    #  deprecation is enforced
+                    return str(x)
+                elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)):
+                    return "NaT"
                 return self.na_rep
             elif isinstance(x, PandasObject):
                 return str(x)
diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
index 0634b8268c04c..63ecdfa5e001b 100644
--- a/pandas/tests/frame/test_repr_info.py
+++ b/pandas/tests/frame/test_repr_info.py
@@ -411,7 +411,7 @@ def test_to_records_with_na_record(self):
     def test_to_records_with_inf_as_na_record(self):
         # GH 48526
         expected = """   NaN  inf         record
-0  NaN    b    [0, inf, b]
+0  inf    b    [0, inf, b]
 1  NaN  NaN  [1, nan, nan]
 2    e    f      [2, e, f]"""
         msg = "use_inf_as_na option is deprecated"

From c9a98f0f0fe3a1ff2ce549b2f2c7551cc9afc58a Mon Sep 17 00:00:00 2001
From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com>
Date: Wed, 11 Oct 2023 19:55:10 +0200
Subject: [PATCH 03/19] =?UTF-8?q?DEPR:=20=20=E2=80=98AS=E2=80=99,=20?=
 =?UTF-8?q?=E2=80=98BA=E2=80=99=20and=20=E2=80=98BAS=E2=80=99=20in=20favou?=
 =?UTF-8?q?r=20of=20=E2=80=98YS=E2=80=99,=20=E2=80=98BY=E2=80=99=20and=20?=
 =?UTF-8?q?=E2=80=98BYS=E2=80=99=20(#55479)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* deprecate AS/BA/BAS in favour of YS/BY/BYS, fix tests

* correct def get_start_end_field, correct docstrings and v0.20.0.rst

* fix pre-commit error

* add deprecated frequencies to dict DEPR_ABBREVS, add tests
---
 doc/source/user_guide/timeseries.rst          | 14 +++---
 doc/source/whatsnew/v0.20.0.rst               | 20 +++++++--
 pandas/_libs/tslibs/dtypes.pyx                | 44 +++++++++++++++++--
 pandas/_libs/tslibs/fields.pyx                |  4 +-
 pandas/_libs/tslibs/offsets.pyx               | 21 ++++-----
 pandas/core/arrays/arrow/array.py             |  2 +-
 pandas/core/indexes/datetimes.py              |  4 +-
 pandas/core/resample.py                       |  4 +-
 pandas/core/series.py                         |  2 +-
 pandas/tests/arithmetic/test_datetime64.py    |  6 +--
 .../tests/frame/methods/test_to_timestamp.py  | 10 ++---
 pandas/tests/groupby/test_grouping.py         |  2 +-
 .../datetimes/methods/test_to_period.py       |  2 +-
 .../indexes/datetimes/test_constructors.py    |  6 +--
 .../indexes/datetimes/test_date_range.py      | 14 +++---
 .../tests/indexes/datetimes/test_datetime.py  | 38 ++++++++++++++++
 pandas/tests/indexes/datetimes/test_misc.py   |  2 +-
 .../period/methods/test_to_timestamp.py       |  2 +-
 pandas/tests/resample/test_datetime_index.py  |  4 +-
 pandas/tests/resample/test_period_index.py    |  2 +-
 .../tseries/frequencies/test_inference.py     |  4 +-
 pandas/tests/tseries/offsets/test_offsets.py  |  4 +-
 pandas/tseries/frequencies.py                 |  4 +-
 23 files changed, 148 insertions(+), 67 deletions(-)

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index e1415e2ca23ca..9f3077e266e98 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -896,9 +896,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
     :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin"
     :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter"
     :class:`~pandas.tseries.offsets.YearEnd`, ``'Y'``, "calendar year end"
-    :class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin"
-    :class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end"
-    :class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin"
+    :class:`~pandas.tseries.offsets.YearBegin`, ``'YS'`` or ``'BYS'``,"calendar year begin"
+    :class:`~pandas.tseries.offsets.BYearEnd`, ``'BY'``, "business year end"
+    :class:`~pandas.tseries.offsets.BYearBegin`, ``'BYS'``, "business year begin"
     :class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year"
     :class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday"
     :class:`~pandas.tseries.offsets.BusinessHour`, ``'bh'``, "business hour"
@@ -1259,9 +1259,9 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "QS", "quarter start frequency"
     "BQS", "business quarter start frequency"
     "Y", "year end frequency"
-    "BA, BY", "business year end frequency"
-    "AS, YS", "year start frequency"
-    "BAS, BYS", "business year start frequency"
+    "BY", "business year end frequency"
+    "YS", "year start frequency"
+    "BYS", "business year start frequency"
     "h", "hourly frequency"
     "bh", "business hour frequency"
     "cbh", "custom business hour frequency"
@@ -1692,7 +1692,7 @@ the end of the interval.
 .. warning::
 
     The default values for ``label`` and ``closed`` is '**left**' for all
-    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BA', 'BQ', and 'W'
+    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W'
     which all have a default of 'right'.
 
     This might unintendedly lead to looking ahead, where the value for a later
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index ae70eb078f6d9..09bf5428d0432 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -886,11 +886,23 @@ This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622
 
 This is *unchanged* from prior versions, but shown for illustration purposes:
 
-.. ipython:: python
+.. code-block:: python
 
-   df = pd.DataFrame(np.arange(6), columns=['value'],
-                     index=pd.MultiIndex.from_product([list('BA'), range(3)]))
-   df
+   In [81]: df = pd.DataFrame(np.arange(6), columns=['value'],
+      ....:                   index=pd.MultiIndex.from_product([list('BA'), range(3)]))
+      ....:
+   In [82]: df
+
+   Out[82]:
+        value
+   B 0      0
+     1      1
+     2      2
+   A 0      3
+     1      4
+     2      5
+
+   [6 rows x 1 columns]
 
 .. code-block:: python
 
diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
index 86f620beeec3b..26181d8f15518 100644
--- a/pandas/_libs/tslibs/dtypes.pyx
+++ b/pandas/_libs/tslibs/dtypes.pyx
@@ -192,9 +192,6 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
     "BQS": "Q",
     "QS": "Q",
     "BQ": "Q",
-    "BA": "Y",
-    "AS": "Y",
-    "BAS": "Y",
     "MS": "M",
     "D": "D",
     "B": "B",
@@ -205,9 +202,9 @@ OFFSET_TO_PERIOD_FREQSTR: dict = {
     "ns": "ns",
     "h": "h",
     "Q": "Q",
-    "Y": "Y",
     "W": "W",
     "ME": "M",
+    "Y": "Y",
     "BY": "Y",
     "YS": "Y",
     "BYS": "Y",
@@ -244,6 +241,45 @@ DEPR_ABBREVS: dict[str, str]= {
     "A-SEP": "Y-SEP",
     "A-OCT": "Y-OCT",
     "A-NOV": "Y-NOV",
+    "BA": "BY",
+    "BA-DEC": "BY-DEC",
+    "BA-JAN": "BY-JAN",
+    "BA-FEB": "BY-FEB",
+    "BA-MAR": "BY-MAR",
+    "BA-APR": "BY-APR",
+    "BA-MAY": "BY-MAY",
+    "BA-JUN": "BY-JUN",
+    "BA-JUL": "BY-JUL",
+    "BA-AUG": "BY-AUG",
+    "BA-SEP": "BY-SEP",
+    "BA-OCT": "BY-OCT",
+    "BA-NOV": "BY-NOV",
+    "AS": "YS",
+    "AS-DEC": "YS-DEC",
+    "AS-JAN": "YS-JAN",
+    "AS-FEB": "YS-FEB",
+    "AS-MAR": "YS-MAR",
+    "AS-APR": "YS-APR",
+    "AS-MAY": "YS-MAY",
+    "AS-JUN": "YS-JUN",
+    "AS-JUL": "YS-JUL",
+    "AS-AUG": "YS-AUG",
+    "AS-SEP": "YS-SEP",
+    "AS-OCT": "YS-OCT",
+    "AS-NOV": "YS-NOV",
+    "BAS": "BYS",
+    "BAS-DEC": "BYS-DEC",
+    "BAS-JAN": "BYS-JAN",
+    "BAS-FEB": "BYS-FEB",
+    "BAS-MAR": "BYS-MAR",
+    "BAS-APR": "BYS-APR",
+    "BAS-MAY": "BYS-MAY",
+    "BAS-JUN": "BYS-JUN",
+    "BAS-JUL": "BYS-JUL",
+    "BAS-AUG": "BYS-AUG",
+    "BAS-SEP": "BYS-SEP",
+    "BAS-OCT": "BYS-OCT",
+    "BAS-NOV": "BYS-NOV",
     "H": "h",
     "BH": "bh",
     "CBH": "cbh",
diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
index ad37add17967d..a726c735bf9a1 100644
--- a/pandas/_libs/tslibs/fields.pyx
+++ b/pandas/_libs/tslibs/fields.pyx
@@ -253,8 +253,8 @@ def get_start_end_field(
         # month of year. Other offsets use month, startingMonth as ending
         # month of year.
 
-        if (freqstr[0:2] in ["MS", "QS", "AS"]) or (
-                freqstr[1:3] in ["MS", "QS", "AS"]):
+        if (freqstr[0:2] in ["MS", "QS", "YS"]) or (
+                freqstr[1:3] in ["MS", "QS", "YS"]):
             end_month = 12 if month_kw == 1 else month_kw - 1
             start_month = month_kw
         else:
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 042d5dafe3046..6a6f30de8dade 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -2414,7 +2414,7 @@ cdef class BYearEnd(YearOffset):
 
     _outputName = "BusinessYearEnd"
     _default_month = 12
-    _prefix = "BA"
+    _prefix = "BY"
     _day_opt = "business_end"
 
 
@@ -2453,7 +2453,7 @@ cdef class BYearBegin(YearOffset):
 
     _outputName = "BusinessYearBegin"
     _default_month = 1
-    _prefix = "BAS"
+    _prefix = "BYS"
     _day_opt = "business_start"
 
 
@@ -2552,7 +2552,7 @@ cdef class YearBegin(YearOffset):
     """
 
     _default_month = 1
-    _prefix = "AS"
+    _prefix = "YS"
     _day_opt = "start"
 
 
@@ -4540,10 +4540,10 @@ CDay = CustomBusinessDay
 prefix_mapping = {
     offset._prefix: offset
     for offset in [
-        YearBegin,  # 'AS'
+        YearBegin,  # 'YS'
         YearEnd,  # 'Y'
-        BYearBegin,  # 'BAS'
-        BYearEnd,  # 'BA'
+        BYearBegin,  # 'BYS'
+        BYearEnd,  # 'BY'
         BusinessDay,  # 'B'
         BusinessMonthBegin,  # 'BMS'
         BusinessMonthEnd,  # 'BM'
@@ -4584,12 +4584,9 @@ _lite_rule_alias = {
     "Q": "Q-DEC",
 
     "Y": "Y-DEC",      # YearEnd(month=12),
-    "AS": "AS-JAN",    # YearBegin(month=1),
-    "YS": "AS-JAN",
-    "BA": "BA-DEC",    # BYearEnd(month=12),
-    "BY": "BA-DEC",
-    "BAS": "BAS-JAN",  # BYearBegin(month=1),
-    "BYS": "BAS-JAN",
+    "YS": "YS-JAN",    # YearBegin(month=1),
+    "BY": "BY-DEC",    # BYearEnd(month=12),
+    "BYS": "BYS-JAN",  # BYearBegin(month=1),
 
     "Min": "min",
     "min": "min",
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 60c42c01e9f6f..c91f892936640 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2526,7 +2526,7 @@ def _round_temporally(
             raise ValueError(f"Must specify a valid frequency: {freq}")
         pa_supported_unit = {
             "Y": "year",
-            "AS": "year",
+            "YS": "year",
             "Q": "quarter",
             "QS": "quarter",
             "M": "month",
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index f3b2a35f379f4..12f93cf482a1d 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -992,11 +992,11 @@ def date_range(
 
     **Specify a unit**
 
-    >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
+    >>> pd.date_range(start="2017-01-01", periods=10, freq="100YS", unit="s")
     DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
                    '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
                    '2817-01-01', '2917-01-01'],
-                  dtype='datetime64[s]', freq='100AS-JAN')
+                  dtype='datetime64[s]', freq='100YS-JAN')
     """
     if freq is None and com.any_none(periods, start, end):
         freq = "D"
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 59e6a20915c18..8b3071a6f8582 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -2101,7 +2101,7 @@ def __init__(
         else:
             freq = to_offset(freq)
 
-        end_types = {"ME", "Y", "Q", "BM", "BA", "BQ", "W"}
+        end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"}
         rule = freq.rule_code
         if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
             if closed is None:
@@ -2299,7 +2299,7 @@ def _adjust_bin_edges(
 
         if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
             "BQ",
-            "BA",
+            "BY",
             "Q",
             "Y",
             "W",
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c2eea371ddef3..fdd03debf6de4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -5729,7 +5729,7 @@ def to_timestamp(
         2023-01-01    1
         2024-01-01    2
         2025-01-01    3
-        Freq: AS-JAN, dtype: int64
+        Freq: YS-JAN, dtype: int64
 
         Using `freq` which is the offset that the Timestamps will have
 
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index df6ccda27ab85..693b8d9483407 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1594,7 +1594,7 @@ def test_dt64arr_add_sub_offset_array(
                     Timestamp("2016-04-01"),
                     Timestamp("2017-04-01"),
                 ],
-                "AS-APR",
+                "YS-APR",
             ),
             (
                 "__sub__",
@@ -1616,7 +1616,7 @@ def test_dt64arr_add_sub_offset_array(
                     Timestamp("2015-10-01"),
                     Timestamp("2016-10-01"),
                 ],
-                "AS-OCT",
+                "YS-OCT",
             ),
         ],
     )
@@ -1625,7 +1625,7 @@ def test_dti_add_sub_nonzero_mth_offset(
     ):
         # GH 26258
         tz = tz_aware_fixture
-        date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz)
+        date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="YS", tz=tz)
         date = tm.box_expected(date, box_with_array, False)
         mth = getattr(date, op)
         result = mth(offset)
diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py
index aeb65d98d8ab2..859dc56de4a25 100644
--- a/pandas/tests/frame/methods/test_to_timestamp.py
+++ b/pandas/tests/frame/methods/test_to_timestamp.py
@@ -44,7 +44,7 @@ def test_to_timestamp(self, frame_or_series):
         if frame_or_series is Series:
             assert result.name == "A"
 
-        exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
+        exp_index = date_range("1/1/2001", end="1/1/2009", freq="YS-JAN")
         result = obj.to_timestamp("D", "start")
         tm.assert_index_equal(result.index, exp_index)
 
@@ -88,7 +88,7 @@ def test_to_timestamp_columns(self):
         tm.assert_index_equal(result.columns, exp_index)
         tm.assert_numpy_array_equal(result.values, df.values)
 
-        exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN")
+        exp_index = date_range("1/1/2001", end="1/1/2009", freq="YS-JAN")
         result = df.to_timestamp("D", "start", axis=1)
         tm.assert_index_equal(result.columns, exp_index)
 
@@ -112,14 +112,14 @@ def test_to_timestamp_columns(self):
 
         result1 = df.to_timestamp("5min", axis=1)
         result2 = df.to_timestamp("min", axis=1)
-        expected = date_range("2001-01-01", "2009-01-01", freq="AS")
+        expected = date_range("2001-01-01", "2009-01-01", freq="YS")
         assert isinstance(result1.columns, DatetimeIndex)
         assert isinstance(result2.columns, DatetimeIndex)
         tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8)
         tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8)
         # PeriodIndex.to_timestamp always use 'infer'
-        assert result1.columns.freqstr == "AS-JAN"
-        assert result2.columns.freqstr == "AS-JAN"
+        assert result1.columns.freqstr == "YS-JAN"
+        assert result2.columns.freqstr == "YS-JAN"
 
     def test_to_timestamp_invalid_axis(self):
         index = period_range(freq="Y", start="1/1/2001", end="12/1/2009")
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 88ee8a35e5c94..76a543050097d 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -734,7 +734,7 @@ def test_list_grouper_with_nat(self):
         # GH 14715
         df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
         df.iloc[-1] = pd.NaT
-        grouper = Grouper(key="date", freq="AS")
+        grouper = Grouper(key="date", freq="YS")
 
         # Grouper in a list grouping
         result = df.groupby([grouper])
diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py
index 8900c5cdbca14..6839fafcdc114 100644
--- a/pandas/tests/indexes/datetimes/methods/test_to_period.py
+++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py
@@ -56,7 +56,7 @@ def test_to_period_quarterlyish(self, off):
         prng = rng.to_period()
         assert prng.freq == "Q-DEC"
 
-    @pytest.mark.parametrize("off", ["BA", "AS", "BAS"])
+    @pytest.mark.parametrize("off", ["BY", "YS", "BYS"])
     def test_to_period_annualish(self, off):
         rng = date_range("01-Jan-2012", periods=8, freq=off)
         prng = rng.to_period()
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index 6da215715482d..077b4fa5a0696 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -648,7 +648,7 @@ def test_constructor_coverage(self):
         with pytest.raises(ValueError, match=msg):
             date_range(periods=10, freq="D")
 
-    @pytest.mark.parametrize("freq", ["AS", "W-SUN"])
+    @pytest.mark.parametrize("freq", ["YS", "W-SUN"])
     def test_constructor_datetime64_tzformat(self, freq):
         # see GH#6572: ISO 8601 format results in stdlib timezone object
         idx = date_range(
@@ -981,8 +981,8 @@ def test_dti_constructor_years_only(self, tz_naive_fixture):
         rng3 = date_range("2014", "2020", freq="Y", tz=tz)
         expected3 = date_range("2014-12-31", "2019-12-31", freq="Y", tz=tz)
 
-        rng4 = date_range("2014", "2020", freq="AS", tz=tz)
-        expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz)
+        rng4 = date_range("2014", "2020", freq="YS", tz=tz)
+        expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz)
 
         for rng, expected in [
             (rng1, expected1),
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index ededf78621699..a74d31747fbb0 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -243,13 +243,12 @@ def test_date_range_gen_error(self):
         rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
         assert len(rng) == 4
 
-    @pytest.mark.parametrize("freq", ["AS", "YS"])
-    def test_begin_year_alias(self, freq):
+    def test_begin_year_alias(self):
         # see gh-9313
-        rng = date_range("1/1/2013", "7/1/2017", freq=freq)
+        rng = date_range("1/1/2013", "7/1/2017", freq="YS")
         exp = DatetimeIndex(
             ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
-            freq=freq,
+            freq="YS",
         )
         tm.assert_index_equal(rng, exp)
 
@@ -261,12 +260,11 @@ def test_end_year_alias(self):
         )
         tm.assert_index_equal(rng, exp)
 
-    @pytest.mark.parametrize("freq", ["BA", "BY"])
-    def test_business_end_year_alias(self, freq):
+    def test_business_end_year_alias(self):
         # see gh-9313
-        rng = date_range("1/1/2013", "7/1/2017", freq=freq)
+        rng = date_range("1/1/2013", "7/1/2017", freq="BY")
         exp = DatetimeIndex(
-            ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq
+            ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq="BY"
         )
         tm.assert_index_equal(rng, exp)
 
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index 156075e3fafec..a18501a193b60 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -1,5 +1,6 @@
 import datetime as dt
 from datetime import date
+import re
 
 import dateutil
 import numpy as np
@@ -226,3 +227,40 @@ def test_CBH_deprecated(self):
         )
 
         tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "freq_depr, expected_values, expected_freq",
+        [
+            (
+                "2BA",
+                ["2020-12-31", "2022-12-30"],
+                "2BY-DEC",
+            ),
+            (
+                "AS-AUG",
+                ["2021-08-01", "2022-08-01", "2023-08-01"],
+                "YS-AUG",
+            ),
+            (
+                "1BAS-MAY",
+                ["2021-05-03", "2022-05-02", "2023-05-01"],
+                "1BYS-MAY",
+            ),
+        ],
+    )
+    def test_AS_BA_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
+        # GH#55479
+        freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
+        msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = date_range(
+                dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
+            )
+        result = DatetimeIndex(
+            expected_values,
+            dtype="datetime64[ns]",
+            freq=expected_freq,
+        )
+
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index 185134af165f4..0a5287d154adc 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -146,7 +146,7 @@ def test_datetimeindex_accessors5(self):
         qsfeb = to_offset("QS-FEB")
         bq = to_offset("BQ")
         bqs_apr = to_offset("BQS-APR")
-        as_nov = to_offset("AS-NOV")
+        as_nov = to_offset("YS-NOV")
 
         tests = [
             (freq_m.is_month_start(Timestamp("2013-06-01")), 1),
diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py
index 2394efb353ab6..977ad8b26a369 100644
--- a/pandas/tests/indexes/period/methods/test_to_timestamp.py
+++ b/pandas/tests/indexes/period/methods/test_to_timestamp.py
@@ -49,7 +49,7 @@ def test_to_timestamp_non_contiguous(self):
     def test_to_timestamp_freq(self):
         idx = period_range("2017", periods=12, freq="Y-DEC")
         result = idx.to_timestamp()
-        expected = date_range("2017", periods=12, freq="AS-JAN")
+        expected = date_range("2017", periods=12, freq="YS-JAN")
         tm.assert_index_equal(result, expected)
 
     def test_to_timestamp_pi_nat(self):
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 28d02576156a0..f66f5bf50974e 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1172,7 +1172,7 @@ def test_resample_anchored_intraday(simple_date_range_series, unit):
     assert len(resampled) == 1
 
 
-@pytest.mark.parametrize("freq", ["MS", "BMS", "QS-MAR", "AS-DEC", "AS-JUN"])
+@pytest.mark.parametrize("freq", ["MS", "BMS", "QS-MAR", "YS-DEC", "YS-JUN"])
 def test_resample_anchored_monthstart(simple_date_range_series, freq, unit):
     ts = simple_date_range_series("1/1/2000", "12/31/2002")
     ts.index = ts.index.as_unit(unit)
@@ -1320,7 +1320,7 @@ def test_resample_unequal_times(unit):
     df = DataFrame({"close": 1}, index=bad_ind)
 
     # it works!
-    df.resample("AS").sum()
+    df.resample("YS").sum()
 
 
 def test_resample_consistency(unit):
diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py
index d214e1b4ae4ae..6ad09f12525b4 100644
--- a/pandas/tests/resample/test_period_index.py
+++ b/pandas/tests/resample/test_period_index.py
@@ -660,7 +660,7 @@ def test_default_right_closed_label(self, from_freq, to_freq):
 
     @pytest.mark.parametrize(
         "from_freq, to_freq",
-        [("D", "MS"), ("Q", "AS"), ("ME", "QS"), ("h", "D"), ("min", "h")],
+        [("D", "MS"), ("Q", "YS"), ("ME", "QS"), ("h", "D"), ("min", "h")],
     )
     def test_default_left_closed_label(self, from_freq, to_freq):
         idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py
index 51d0dd298f841..22ff7f8405a40 100644
--- a/pandas/tests/tseries/frequencies/test_inference.py
+++ b/pandas/tests/tseries/frequencies/test_inference.py
@@ -52,7 +52,7 @@ def base_delta_code_pair(request):
 
 freqs = (
     [f"Q-{month}" for month in MONTHS]
-    + [f"{annual}-{month}" for annual in ["Y", "BA"] for month in MONTHS]
+    + [f"{annual}-{month}" for annual in ["Y", "BY"] for month in MONTHS]
     + ["ME", "BM", "BMS"]
     + [f"WOM-{count}{day}" for count in range(1, 5) for day in DAYS]
     + [f"W-{day}" for day in DAYS]
@@ -215,7 +215,7 @@ def test_infer_freq_index(freq, expected):
     "expected,dates",
     list(
         {
-            "AS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"],
+            "YS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"],
             "Q-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"],
             "ME": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"],
             "W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"],
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 7f96ea98fa047..9389f78c9e672 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -839,7 +839,7 @@ def test_rule_code(self):
             "NOV",
             "DEC",
         ]
-        base_lst = ["Y", "AS", "BA", "BAS", "Q", "QS", "BQ", "BQS"]
+        base_lst = ["Y", "YS", "BY", "BYS", "Q", "QS", "BQ", "BQS"]
         for base in base_lst:
             for v in suffix_lst:
                 alias = "-".join([base, v])
@@ -858,7 +858,7 @@ def test_freq_offsets():
 class TestReprNames:
     def test_str_for_named_is_name(self):
         # look at all the amazing combinations!
-        month_prefixes = ["Y", "AS", "BA", "BAS", "Q", "BQ", "BQS", "QS"]
+        month_prefixes = ["Y", "YS", "BY", "BYS", "Q", "BQ", "BQS", "QS"]
         names = [
             prefix + "-" + month
             for prefix in month_prefixes
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 0ed0fe4b87576..db4fdf0d24465 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -59,7 +59,7 @@
 # --------------------------------------------------------------------
 # Offset related functions
 
-_need_suffix = ["QS", "BQ", "BQS", "YS", "AS", "BY", "BA", "BYS", "BAS"]
+_need_suffix = ["QS", "BQ", "BQS", "YS", "BY", "BYS"]
 
 for _prefix in _need_suffix:
     for _m in MONTHS:
@@ -345,7 +345,7 @@ def _get_annual_rule(self) -> str | None:
         if pos_check is None:
             return None
         else:
-            return {"cs": "AS", "bs": "BAS", "ce": "Y", "be": "BA"}.get(pos_check)
+            return {"cs": "YS", "bs": "BYS", "ce": "Y", "be": "BY"}.get(pos_check)
 
     def _get_quarterly_rule(self) -> str | None:
         if len(self.mdiffs) > 1:

From 7e8148f4ff44482b8541959d5e095fef00adad23 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 12 Oct 2023 08:08:25 -0700
Subject: [PATCH 04/19] REF: De-special-case _format_with_header (#55491)

* REF: De-special-case _format_with_header

* simplify
---
 pandas/core/indexes/base.py         | 16 ++++++++++++----
 pandas/core/indexes/category.py     | 10 ----------
 pandas/core/indexes/datetimelike.py |  1 +
 pandas/core/indexes/interval.py     |  7 -------
 4 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d749235e2cd2c..515f750f11219 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1387,12 +1387,20 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
 
         values = self._values
 
-        if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
-            values = np.asarray(values)
+        if (
+            is_object_dtype(values.dtype)
+            or is_string_dtype(values.dtype)
+            or isinstance(self.dtype, (IntervalDtype, CategoricalDtype))
+        ):
             # TODO: why do we need different justify for these cases?
-            result = trim_front(format_array(values, None, justify="all"))
+            justify = "all"
         else:
-            result = trim_front(format_array(values, None, justify="left"))
+            justify = "left"
+        # passing leading_space=False breaks test_format_missing,
+        #  test_index_repr_in_frame_with_nan, but would otherwise make
+        #  trim_front unnecessary
+        formatted = format_array(values, None, justify=justify)
+        result = trim_front(formatted)
         return header + result
 
     def _format_native_types(
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 9cf7e861584d9..b307be004ad6e 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -21,7 +21,6 @@
 from pandas.core.dtypes.missing import (
     is_valid_na_for_dtype,
     isna,
-    notna,
 )
 
 from pandas.core.arrays.categorical import (
@@ -38,8 +37,6 @@
     inherit_names,
 )
 
-from pandas.io.formats.printing import pprint_thing
-
 if TYPE_CHECKING:
     from collections.abc import Hashable
 
@@ -356,13 +353,6 @@ def _format_attrs(self):
         extra = super()._format_attrs()
         return attrs + extra
 
-    def _format_with_header(self, *, header: list[str], na_rep: str) -> list[str]:
-        result = [
-            pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep
-            for x in self._values
-        ]
-        return header + result
-
     # --------------------------------------------------------------------
 
     @property
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 94ad556219b35..f02f7dcb65251 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -216,6 +216,7 @@ def format(
     def _format_with_header(
         self, *, header: list[str], na_rep: str, date_format: str | None = None
     ) -> list[str]:
+        # TODO: not reached in tests 2023-10-11
         # matches base class except for whitespace padding and date_format
         return header + list(
             self._format_native_types(na_rep=na_rep, date_format=date_format)
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 209ac84869e85..f73e2b5512262 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -842,13 +842,6 @@ def mid(self) -> Index:
     def length(self) -> Index:
         return Index(self._data.length, copy=False)
 
-    # --------------------------------------------------------------------
-    # Rendering Methods
-
-    def _format_with_header(self, *, header: list[str], na_rep: str) -> list[str]:
-        # matches base class except for whitespace padding
-        return header + list(self._format_native_types(na_rep=na_rep))
-
     # --------------------------------------------------------------------
     # Set Operations
 

From 7ef617e88e1e51bbfd5d0cf71f57ef4502b91b38 Mon Sep 17 00:00:00 2001
From: shiersansi <143710553+shiersansi@users.noreply.github.com>
Date: Thu, 12 Oct 2023 23:10:59 +0800
Subject: [PATCH 05/19] DOC: Add DataFrame.index.levels (#55437)

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   ../pandas/core/indexes/multi.py

* modified:   ../pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* modified:   pandas/core/indexes/multi.py

* Update pandas/core/indexes/multi.py

---------

Co-authored-by: Marc Garcia <garcia.marc@gmail.com>
---
 pandas/core/indexes/multi.py | 47 ++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index c5cab225fa7b1..0c3593eca178d 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -843,6 +843,53 @@ def size(self) -> int:
 
     @cache_readonly
     def levels(self) -> FrozenList:
+        """
+        Levels of the MultiIndex.
+
+        Levels refer to the different hierarchical levels or layers in a MultiIndex.
+        In a MultiIndex, each level represents a distinct dimension or category of
+        the index.
+
+        To access the levels, you can use the levels attribute of the MultiIndex,
+        which returns a tuple of Index objects. Each Index object represents a
+        level in the MultiIndex and contains the unique values found in that
+        specific level.
+
+        If a MultiIndex is created with levels A, B, C, and the DataFrame using
+        it filters out all rows of the level C, MultiIndex.levels will still
+        return A, B, C.
+
+        Examples
+        --------
+        >>> index = pd.MultiIndex.from_product([['mammal'],
+        ... ('goat', 'human', 'cat', 'dog')], names=['Category', 'Animals'])
+        >>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs'])
+        >>> leg_num
+                          Legs
+        Category Animals
+        mammal   goat        4
+                 human       2
+                 cat         4
+                 dog         4
+
+        >>> leg_num.index.levels
+        FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']])
+
+        MultiIndex levels will not change even if the DataFrame using the MultiIndex
+        does not contain all them anymore.
+        See how "human" is not in the DataFrame, but it is still in levels:
+
+        >>> large_leg_num = leg_num[leg_num.Legs > 2]
+        >>> large_leg_num
+                          Legs
+        Category Animals
+        mammal   goat        4
+                 cat         4
+                 dog         4
+
+        >>> large_leg_num.index.levels
+        FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']])
+        """
         # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
         # create new IndexEngine
         # https://github.com/pandas-dev/pandas/issues/31648

From ae177e88472a0f71e4fa8d41e773f6fb7029a8dc Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 12 Oct 2023 05:12:37 -1000
Subject: [PATCH 06/19] TST: Close ad-hoc db connections (#55445)

* TST: Close ad-hoc db connections

* Add runtime import
---
 pandas/tests/io/test_sql.py | 227 +++++++++++++++++-------------------
 1 file changed, 105 insertions(+), 122 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 82fb98615100f..11f95ff104767 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1,20 +1,3 @@
-"""SQL io tests
-
-The SQL tests are broken down in different classes:
-
-- `PandasSQLTest`: base class with common methods for all test classes
-- Tests for the public API (only tests with sqlite3)
-    - `_TestSQLApi` base class
-    - `TestSQLApi`: test the public API with sqlalchemy engine
-    - `TestSQLiteFallbackApi`: test the public API with a sqlite DBAPI
-      connection
-- Tests for the different SQL flavors (flavor specific type conversions)
-    - Tests for the sqlalchemy mode: `_TestSQLAlchemy` is the base class with
-      common methods. The different tested flavors (sqlite3, MySQL,
-      PostgreSQL) derive from the base class
-    - Tests for the fallback mode (`TestSQLiteFallback`)
-
-"""
 from __future__ import annotations
 
 import contextlib
@@ -29,6 +12,7 @@
 from io import StringIO
 from pathlib import Path
 import sqlite3
+from typing import TYPE_CHECKING
 import uuid
 
 import numpy as np
@@ -69,13 +53,9 @@
     read_sql_table,
 )
 
-try:
+if TYPE_CHECKING:
     import sqlalchemy
 
-    SQLALCHEMY_INSTALLED = True
-except ImportError:
-    SQLALCHEMY_INSTALLED = False
-
 
 @pytest.fixture
 def sql_strings():
@@ -426,13 +406,16 @@ def drop_table(
         conn.commit()
     else:
         with conn.begin() as con:
-            sql.SQLDatabase(con).drop_table(table_name)
+            with sql.SQLDatabase(con) as db:
+                db.drop_table(table_name)
 
 
 def drop_view(
     view_name: str,
     conn: sqlite3.Connection | sqlalchemy.engine.Engine | sqlalchemy.engine.Connection,
 ):
+    import sqlalchemy
+
     if isinstance(conn, sqlite3.Connection):
         conn.execute(f"DROP VIEW IF EXISTS {sql._get_valid_sqlite_name(view_name)}")
         conn.commit()
@@ -1151,10 +1134,9 @@ def test_read_sql_iris_parameter(conn, request, sql_strings, flavor):
     conn = request.getfixturevalue(conn)
     query = sql_strings["read_parameters"][flavor(conn_name)]
     params = ("Iris-setosa", 5.1)
-    pandasSQL = pandasSQL_builder(conn)
-
-    with pandasSQL.run_transaction():
-        iris_frame = pandasSQL.read_query(query, params=params)
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            iris_frame = pandasSQL.read_query(query, params=params)
     check_iris_frame(iris_frame)
 
 
@@ -1179,9 +1161,9 @@ def test_read_sql_iris_no_parameter_with_percent(conn, request, sql_strings, fla
     conn = request.getfixturevalue(conn)
 
     query = sql_strings["read_no_parameters_with_percent"][flavor(conn_name)]
-    pandasSQL = pandasSQL_builder(conn)
-    with pandasSQL.run_transaction():
-        iris_frame = pandasSQL.read_query(query, params=None)
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            iris_frame = pandasSQL.read_query(query, params=None)
     check_iris_frame(iris_frame)
 
 
@@ -1914,7 +1896,8 @@ def test_warning_case_insensitive_table_name(conn, request, test_frame1):
             r"sensitivity issues. Consider using lower case table names."
         ),
     ):
-        sql.SQLDatabase(conn).check_case_sensitive("TABLE1", "")
+        with sql.SQLDatabase(conn) as db:
+            db.check_case_sensitive("TABLE1", "")
 
     # Test that the warning is certainly NOT triggered in a normal case.
     with tm.assert_produces_warning(None):
@@ -1930,10 +1913,10 @@ def test_sqlalchemy_type_mapping(conn, request):
     df = DataFrame(
         {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
     )
-    db = sql.SQLDatabase(conn)
-    table = sql.SQLTable("test_type", db, frame=df)
-    # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones
-    assert isinstance(table.table.c["time"].type, TIMESTAMP)
+    with sql.SQLDatabase(conn) as db:
+        table = sql.SQLTable("test_type", db, frame=df)
+        # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones
+        assert isinstance(table.table.c["time"].type, TIMESTAMP)
 
 
 @pytest.mark.parametrize("conn", sqlalchemy_connectable)
@@ -1961,10 +1944,10 @@ def test_sqlalchemy_integer_mapping(conn, request, integer, expected):
     # GH35076 Map pandas integer to optimal SQLAlchemy integer type
     conn = request.getfixturevalue(conn)
     df = DataFrame([0, 1], columns=["a"], dtype=integer)
-    db = sql.SQLDatabase(conn)
-    table = sql.SQLTable("test_type", db, frame=df)
+    with sql.SQLDatabase(conn) as db:
+        table = sql.SQLTable("test_type", db, frame=df)
 
-    result = str(table.table.c.a.type)
+        result = str(table.table.c.a.type)
     assert result == expected
 
 
@@ -1974,16 +1957,16 @@ def test_sqlalchemy_integer_overload_mapping(conn, request, integer):
     conn = request.getfixturevalue(conn)
     # GH35076 Map pandas integer to optimal SQLAlchemy integer type
     df = DataFrame([0, 1], columns=["a"], dtype=integer)
-    db = sql.SQLDatabase(conn)
-    with pytest.raises(
-        ValueError, match="Unsigned 64 bit integer datatype is not supported"
-    ):
-        sql.SQLTable("test_type", db, frame=df)
+    with sql.SQLDatabase(conn) as db:
+        with pytest.raises(
+            ValueError, match="Unsigned 64 bit integer datatype is not supported"
+        ):
+            sql.SQLTable("test_type", db, frame=df)
 
 
-@pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="fails without SQLAlchemy")
 @pytest.mark.parametrize("conn", all_connectable)
 def test_database_uri_string(conn, request, test_frame1):
+    pytest.importorskip("sqlalchemy")
     conn = request.getfixturevalue(conn)
     # Test read_sql and .to_sql method with a database URI (GH10654)
     # db_uri = 'sqlite:///:memory:' # raises
@@ -2003,9 +1986,9 @@ def test_database_uri_string(conn, request, test_frame1):
 
 
 @td.skip_if_installed("pg8000")
-@pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="fails without SQLAlchemy")
 @pytest.mark.parametrize("conn", all_connectable)
 def test_pg8000_sqlalchemy_passthrough_error(conn, request):
+    pytest.importorskip("sqlalchemy")
     conn = request.getfixturevalue(conn)
     # using driver that will not be installed on CI to trigger error
     # in sqlalchemy.create_engine -> test passing of this error to user
@@ -2076,7 +2059,7 @@ def test_sql_open_close(test_frame3):
     tm.assert_frame_equal(test_frame3, result)
 
 
-@pytest.mark.skipif(SQLALCHEMY_INSTALLED, reason="SQLAlchemy is installed")
+@td.skip_if_installed("sqlalchemy")
 def test_con_string_import_error():
     conn = "mysql://root@localhost/pandas"
     msg = "Using URI string without sqlalchemy installed"
@@ -2084,7 +2067,7 @@ def test_con_string_import_error():
         sql.read_sql("SELECT * FROM iris", conn)
 
 
-@pytest.mark.skipif(SQLALCHEMY_INSTALLED, reason="SQLAlchemy is installed")
+@td.skip_if_installed("sqlalchemy")
 def test_con_unknown_dbapi2_class_does_not_error_without_sql_alchemy_installed():
     class MockSqliteConnection:
         def __init__(self, *args, **kwargs) -> None:
@@ -2167,20 +2150,20 @@ def test_drop_table(conn, request):
     from sqlalchemy import inspect
 
     temp_frame = DataFrame({"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]})
-    pandasSQL = sql.SQLDatabase(conn)
-    with pandasSQL.run_transaction():
-        assert pandasSQL.to_sql(temp_frame, "temp_frame") == 4
+    with sql.SQLDatabase(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            assert pandasSQL.to_sql(temp_frame, "temp_frame") == 4
 
-    insp = inspect(conn)
-    assert insp.has_table("temp_frame")
+        insp = inspect(conn)
+        assert insp.has_table("temp_frame")
 
-    with pandasSQL.run_transaction():
-        pandasSQL.drop_table("temp_frame")
-    try:
-        insp.clear_cache()  # needed with SQLAlchemy 2.0, unavailable prior
-    except AttributeError:
-        pass
-    assert not insp.has_table("temp_frame")
+        with pandasSQL.run_transaction():
+            pandasSQL.drop_table("temp_frame")
+        try:
+            insp.clear_cache()  # needed with SQLAlchemy 2.0, unavailable prior
+        except AttributeError:
+            pass
+        assert not insp.has_table("temp_frame")
 
 
 @pytest.mark.parametrize("conn", all_connectable)
@@ -2206,9 +2189,10 @@ def test_roundtrip(conn, request, test_frame1):
 def test_execute_sql(conn, request):
     conn = request.getfixturevalue(conn)
     pandasSQL = pandasSQL_builder(conn)
-    with pandasSQL.run_transaction():
-        iris_results = pandasSQL.execute("SELECT * FROM iris")
-    row = iris_results.fetchone()
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            iris_results = pandasSQL.execute("SELECT * FROM iris")
+            row = iris_results.fetchone()
     tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"])
 
 
@@ -2616,10 +2600,10 @@ def test_to_sql_save_index(conn, request):
         [(1, 2.1, "line1"), (2, 1.5, "line2")], columns=["A", "B", "C"], index=["A"]
     )
 
-    pandasSQL = pandasSQL_builder(conn)
     tbl_name = "test_to_sql_saves_index"
-    with pandasSQL.run_transaction():
-        assert pandasSQL.to_sql(df, tbl_name) == 2
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            assert pandasSQL.to_sql(df, tbl_name) == 2
 
     if conn_name in {"sqlite_buildin", "sqlite_str"}:
         ixs = sql.read_sql_query(
@@ -2648,55 +2632,55 @@ def test_transactions(conn, request):
     conn = request.getfixturevalue(conn)
 
     stmt = "CREATE TABLE test_trans (A INT, B TEXT)"
-    pandasSQL = pandasSQL_builder(conn)
     if conn_name != "sqlite_buildin":
         from sqlalchemy import text
 
         stmt = text(stmt)
 
-    with pandasSQL.run_transaction() as trans:
-        trans.execute(stmt)
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction() as trans:
+            trans.execute(stmt)
 
 
 @pytest.mark.parametrize("conn", all_connectable)
 def test_transaction_rollback(conn, request):
     conn = request.getfixturevalue(conn)
-    pandasSQL = pandasSQL_builder(conn)
-    with pandasSQL.run_transaction() as trans:
-        stmt = "CREATE TABLE test_trans (A INT, B TEXT)"
-        if isinstance(pandasSQL, SQLiteDatabase):
-            trans.execute(stmt)
-        else:
-            from sqlalchemy import text
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction() as trans:
+            stmt = "CREATE TABLE test_trans (A INT, B TEXT)"
+            if isinstance(pandasSQL, SQLiteDatabase):
+                trans.execute(stmt)
+            else:
+                from sqlalchemy import text
 
-            stmt = text(stmt)
-            trans.execute(stmt)
+                stmt = text(stmt)
+                trans.execute(stmt)
 
-    class DummyException(Exception):
-        pass
+        class DummyException(Exception):
+            pass
 
-    # Make sure when transaction is rolled back, no rows get inserted
-    ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')"
-    if isinstance(pandasSQL, SQLDatabase):
-        from sqlalchemy import text
+        # Make sure when transaction is rolled back, no rows get inserted
+        ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')"
+        if isinstance(pandasSQL, SQLDatabase):
+            from sqlalchemy import text
+
+            ins_sql = text(ins_sql)
+        try:
+            with pandasSQL.run_transaction() as trans:
+                trans.execute(ins_sql)
+                raise DummyException("error")
+        except DummyException:
+            # ignore raised exception
+            pass
+        with pandasSQL.run_transaction():
+            res = pandasSQL.read_query("SELECT * FROM test_trans")
+        assert len(res) == 0
 
-        ins_sql = text(ins_sql)
-    try:
+        # Make sure when transaction is committed, rows do get inserted
         with pandasSQL.run_transaction() as trans:
             trans.execute(ins_sql)
-            raise DummyException("error")
-    except DummyException:
-        # ignore raised exception
-        pass
-    with pandasSQL.run_transaction():
-        res = pandasSQL.read_query("SELECT * FROM test_trans")
-    assert len(res) == 0
-
-    # Make sure when transaction is committed, rows do get inserted
-    with pandasSQL.run_transaction() as trans:
-        trans.execute(ins_sql)
-        res2 = pandasSQL.read_query("SELECT * FROM test_trans")
-    assert len(res2) == 1
+            res2 = pandasSQL.read_query("SELECT * FROM test_trans")
+        assert len(res2) == 1
 
 
 @pytest.mark.parametrize("conn", sqlalchemy_connectable)
@@ -2975,9 +2959,9 @@ def test_invalid_engine(conn, request, test_frame1):
 
     conn = request.getfixturevalue(conn)
     msg = "engine must be one of 'auto', 'sqlalchemy'"
-    pandasSQL = pandasSQL_builder(conn)
-    with pytest.raises(ValueError, match=msg):
-        pandasSQL.to_sql(test_frame1, "test_frame1", engine="bad_engine")
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pytest.raises(ValueError, match=msg):
+            pandasSQL.to_sql(test_frame1, "test_frame1", engine="bad_engine")
 
 
 @pytest.mark.parametrize("conn", all_connectable)
@@ -2985,10 +2969,10 @@ def test_to_sql_with_sql_engine(conn, request, test_frame1):
     """`to_sql` with the `engine` param"""
     # mostly copied from this class's `_to_sql()` method
     conn = request.getfixturevalue(conn)
-    pandasSQL = pandasSQL_builder(conn)
-    with pandasSQL.run_transaction():
-        assert pandasSQL.to_sql(test_frame1, "test_frame1", engine="auto") == 4
-        assert pandasSQL.has_table("test_frame1")
+    with pandasSQL_builder(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            assert pandasSQL.to_sql(test_frame1, "test_frame1", engine="auto") == 4
+            assert pandasSQL.has_table("test_frame1")
 
     num_entries = len(test_frame1)
     num_rows = count_rows(conn, "test_frame1")
@@ -3000,10 +2984,10 @@ def test_options_sqlalchemy(conn, request, test_frame1):
     # use the set option
     conn = request.getfixturevalue(conn)
     with pd.option_context("io.sql.engine", "sqlalchemy"):
-        pandasSQL = pandasSQL_builder(conn)
-        with pandasSQL.run_transaction():
-            assert pandasSQL.to_sql(test_frame1, "test_frame1") == 4
-            assert pandasSQL.has_table("test_frame1")
+        with pandasSQL_builder(conn) as pandasSQL:
+            with pandasSQL.run_transaction():
+                assert pandasSQL.to_sql(test_frame1, "test_frame1") == 4
+                assert pandasSQL.has_table("test_frame1")
 
         num_entries = len(test_frame1)
         num_rows = count_rows(conn, "test_frame1")
@@ -3015,18 +2999,18 @@ def test_options_auto(conn, request, test_frame1):
     # use the set option
     conn = request.getfixturevalue(conn)
     with pd.option_context("io.sql.engine", "auto"):
-        pandasSQL = pandasSQL_builder(conn)
-        with pandasSQL.run_transaction():
-            assert pandasSQL.to_sql(test_frame1, "test_frame1") == 4
-            assert pandasSQL.has_table("test_frame1")
+        with pandasSQL_builder(conn) as pandasSQL:
+            with pandasSQL.run_transaction():
+                assert pandasSQL.to_sql(test_frame1, "test_frame1") == 4
+                assert pandasSQL.has_table("test_frame1")
 
         num_entries = len(test_frame1)
         num_rows = count_rows(conn, "test_frame1")
         assert num_rows == num_entries
 
 
-@pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="fails without SQLAlchemy")
 def test_options_get_engine():
+    pytest.importorskip("sqlalchemy")
     assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine)
 
     with pd.option_context("io.sql.engine", "sqlalchemy"):
@@ -3463,17 +3447,16 @@ def test_self_join_date_columns(postgresql_psycopg2_engine):
 def test_create_and_drop_table(sqlite_sqlalchemy_memory_engine):
     conn = sqlite_sqlalchemy_memory_engine
     temp_frame = DataFrame({"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]})
-    pandasSQL = sql.SQLDatabase(conn)
-
-    with pandasSQL.run_transaction():
-        assert pandasSQL.to_sql(temp_frame, "drop_test_frame") == 4
+    with sql.SQLDatabase(conn) as pandasSQL:
+        with pandasSQL.run_transaction():
+            assert pandasSQL.to_sql(temp_frame, "drop_test_frame") == 4
 
-    assert pandasSQL.has_table("drop_test_frame")
+        assert pandasSQL.has_table("drop_test_frame")
 
-    with pandasSQL.run_transaction():
-        pandasSQL.drop_table("drop_test_frame")
+        with pandasSQL.run_transaction():
+            pandasSQL.drop_table("drop_test_frame")
 
-    assert not pandasSQL.has_table("drop_test_frame")
+        assert not pandasSQL.has_table("drop_test_frame")
 
 
 def test_sqlite_datetime_date(sqlite_buildin):

From 9de2a19b9651336cc14a2830c8460e9ad1e2d505 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Thu, 12 Oct 2023 18:31:34 -0400
Subject: [PATCH 07/19] REF: Add tests.groupby.methods (#55312)

* REF: Add tests.groupby.methods

* Merge cleanup

* Refactor

* Refactor

* Show value of ymin

* fixup

* Revert

* Revert
---
 pandas/tests/groupby/methods/__init__.py      |   0
 pandas/tests/groupby/methods/test_corrwith.py |  24 +
 pandas/tests/groupby/methods/test_describe.py | 221 +++++
 .../{ => methods}/test_groupby_shift_diff.py  |   0
 .../groupby/methods/test_is_monotonic.py      |  78 ++
 .../methods/test_nlargest_nsmallest.py        | 115 +++
 .../tests/groupby/{ => methods}/test_nth.py   |   0
 .../groupby/{ => methods}/test_quantile.py    |   0
 .../tests/groupby/{ => methods}/test_rank.py  |   0
 .../groupby/{ => methods}/test_sample.py      |   0
 .../tests/groupby/{ => methods}/test_size.py  |   0
 .../tests/groupby/{ => methods}/test_skew.py  |   0
 .../{ => methods}/test_value_counts.py        |   0
 pandas/tests/groupby/test_any_all.py          | 188 ----
 pandas/tests/groupby/test_cumulative.py       | 291 ++++++
 pandas/tests/groupby/test_function.py         | 916 ------------------
 pandas/tests/groupby/test_min_max.py          | 272 ------
 pandas/tests/groupby/test_nunique.py          | 190 ----
 pandas/tests/groupby/test_reductions.py       | 838 ++++++++++++++++
 19 files changed, 1567 insertions(+), 1566 deletions(-)
 create mode 100644 pandas/tests/groupby/methods/__init__.py
 create mode 100644 pandas/tests/groupby/methods/test_corrwith.py
 create mode 100644 pandas/tests/groupby/methods/test_describe.py
 rename pandas/tests/groupby/{ => methods}/test_groupby_shift_diff.py (100%)
 create mode 100644 pandas/tests/groupby/methods/test_is_monotonic.py
 create mode 100644 pandas/tests/groupby/methods/test_nlargest_nsmallest.py
 rename pandas/tests/groupby/{ => methods}/test_nth.py (100%)
 rename pandas/tests/groupby/{ => methods}/test_quantile.py (100%)
 rename pandas/tests/groupby/{ => methods}/test_rank.py (100%)
 rename pandas/tests/groupby/{ => methods}/test_sample.py (100%)
 rename pandas/tests/groupby/{ => methods}/test_size.py (100%)
 rename pandas/tests/groupby/{ => methods}/test_skew.py (100%)
 rename pandas/tests/groupby/{ => methods}/test_value_counts.py (100%)
 delete mode 100644 pandas/tests/groupby/test_any_all.py
 create mode 100644 pandas/tests/groupby/test_cumulative.py
 delete mode 100644 pandas/tests/groupby/test_min_max.py
 delete mode 100644 pandas/tests/groupby/test_nunique.py
 create mode 100644 pandas/tests/groupby/test_reductions.py

diff --git a/pandas/tests/groupby/methods/__init__.py b/pandas/tests/groupby/methods/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/groupby/methods/test_corrwith.py b/pandas/tests/groupby/methods/test_corrwith.py
new file mode 100644
index 0000000000000..53e8bdc4534dc
--- /dev/null
+++ b/pandas/tests/groupby/methods/test_corrwith.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+)
+import pandas._testing as tm
+
+
+def test_corrwith_with_1_axis():
+    # GH 47723
+    df = DataFrame({"a": [1, 1, 2], "b": [3, 7, 4]})
+    gb = df.groupby("a")
+
+    msg = "DataFrameGroupBy.corrwith with axis=1 is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = gb.corrwith(df, axis=1)
+    index = Index(
+        data=[(1, 0), (1, 1), (1, 2), (2, 2), (2, 0), (2, 1)],
+        name=("a", None),
+    )
+    expected = Series([np.nan] * 6, index=index)
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py
new file mode 100644
index 0000000000000..f38de8faddb59
--- /dev/null
+++ b/pandas/tests/groupby/methods/test_describe.py
@@ -0,0 +1,221 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Timestamp,
+)
+import pandas._testing as tm
+
+
+def test_apply_describe_bug(mframe):
+    grouped = mframe.groupby(level="first")
+    grouped.describe()  # it works!
+
+
+def test_series_describe_multikey():
+    ts = tm.makeTimeSeries()
+    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
+    result = grouped.describe()
+    tm.assert_series_equal(result["mean"], grouped.mean(), check_names=False)
+    tm.assert_series_equal(result["std"], grouped.std(), check_names=False)
+    tm.assert_series_equal(result["min"], grouped.min(), check_names=False)
+
+
+def test_series_describe_single():
+    ts = tm.makeTimeSeries()
+    grouped = ts.groupby(lambda x: x.month)
+    result = grouped.apply(lambda x: x.describe())
+    expected = grouped.describe().stack(future_stack=True)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("keys", ["key1", ["key1", "key2"]])
+def test_series_describe_as_index(as_index, keys):
+    # GH#49256
+    df = DataFrame(
+        {
+            "key1": ["one", "two", "two", "three", "two"],
+            "key2": ["one", "two", "two", "three", "two"],
+            "foo2": [1, 2, 4, 4, 6],
+        }
+    )
+    gb = df.groupby(keys, as_index=as_index)["foo2"]
+    result = gb.describe()
+    expected = DataFrame(
+        {
+            "key1": ["one", "three", "two"],
+            "count": [1.0, 1.0, 3.0],
+            "mean": [1.0, 4.0, 4.0],
+            "std": [np.nan, np.nan, 2.0],
+            "min": [1.0, 4.0, 2.0],
+            "25%": [1.0, 4.0, 3.0],
+            "50%": [1.0, 4.0, 4.0],
+            "75%": [1.0, 4.0, 5.0],
+            "max": [1.0, 4.0, 6.0],
+        }
+    )
+    if len(keys) == 2:
+        expected.insert(1, "key2", expected["key1"])
+    if as_index:
+        expected = expected.set_index(keys)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_frame_describe_multikey(tsframe):
+    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
+    result = grouped.describe()
+    desc_groups = []
+    for col in tsframe:
+        group = grouped[col].describe()
+        # GH 17464 - Remove duplicate MultiIndex levels
+        group_col = MultiIndex(
+            levels=[[col], group.columns],
+            codes=[[0] * len(group.columns), range(len(group.columns))],
+        )
+        group = DataFrame(group.values, columns=group_col, index=group.index)
+        desc_groups.append(group)
+    expected = pd.concat(desc_groups, axis=1)
+    tm.assert_frame_equal(result, expected)
+
+    msg = "DataFrame.groupby with axis=1 is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
+    result = groupedT.describe()
+    expected = tsframe.describe().T
+    # reverting the change from https://github.com/pandas-dev/pandas/pull/35441/
+    expected.index = MultiIndex(
+        levels=[[0, 1], expected.index],
+        codes=[[0, 0, 1, 1], range(len(expected.index))],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_frame_describe_tupleindex():
+    # GH 14848 - regression from 0.19.0 to 0.19.1
+    df1 = DataFrame(
+        {
+            "x": [1, 2, 3, 4, 5] * 3,
+            "y": [10, 20, 30, 40, 50] * 3,
+            "z": [100, 200, 300, 400, 500] * 3,
+        }
+    )
+    df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
+    df2 = df1.rename(columns={"k": "key"})
+    msg = "Names should be list-like for a MultiIndex"
+    with pytest.raises(ValueError, match=msg):
+        df1.groupby("k").describe()
+    with pytest.raises(ValueError, match=msg):
+        df2.groupby("key").describe()
+
+
+def test_frame_describe_unstacked_format():
+    # GH 4792
+    prices = {
+        Timestamp("2011-01-06 10:59:05", tz=None): 24990,
+        Timestamp("2011-01-06 12:43:33", tz=None): 25499,
+        Timestamp("2011-01-06 12:54:09", tz=None): 25499,
+    }
+    volumes = {
+        Timestamp("2011-01-06 10:59:05", tz=None): 1500000000,
+        Timestamp("2011-01-06 12:43:33", tz=None): 5000000000,
+        Timestamp("2011-01-06 12:54:09", tz=None): 100000000,
+    }
+    df = DataFrame({"PRICE": prices, "VOLUME": volumes})
+    result = df.groupby("PRICE").VOLUME.describe()
+    data = [
+        df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
+        df[df.PRICE == 25499].VOLUME.describe().values.tolist(),
+    ]
+    expected = DataFrame(
+        data,
+        index=Index([24990, 25499], name="PRICE"),
+        columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.filterwarnings(
+    "ignore:"
+    "indexing past lexsort depth may impact performance:"
+    "pandas.errors.PerformanceWarning"
+)
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
+def test_describe_with_duplicate_output_column_names(as_index, keys):
+    # GH 35314
+    df = DataFrame(
+        {
+            "a1": [99, 99, 99, 88, 88, 88],
+            "a2": [99, 99, 99, 88, 88, 88],
+            "b": [1, 2, 3, 4, 5, 6],
+            "c": [10, 20, 30, 40, 50, 60],
+        },
+        columns=["a1", "a2", "b", "b"],
+        copy=False,
+    )
+    if keys == ["a1"]:
+        df = df.drop(columns="a2")
+
+    expected = (
+        DataFrame.from_records(
+            [
+                ("b", "count", 3.0, 3.0),
+                ("b", "mean", 5.0, 2.0),
+                ("b", "std", 1.0, 1.0),
+                ("b", "min", 4.0, 1.0),
+                ("b", "25%", 4.5, 1.5),
+                ("b", "50%", 5.0, 2.0),
+                ("b", "75%", 5.5, 2.5),
+                ("b", "max", 6.0, 3.0),
+                ("b", "count", 3.0, 3.0),
+                ("b", "mean", 5.0, 2.0),
+                ("b", "std", 1.0, 1.0),
+                ("b", "min", 4.0, 1.0),
+                ("b", "25%", 4.5, 1.5),
+                ("b", "50%", 5.0, 2.0),
+                ("b", "75%", 5.5, 2.5),
+                ("b", "max", 6.0, 3.0),
+            ],
+        )
+        .set_index([0, 1])
+        .T
+    )
+    expected.columns.names = [None, None]
+    if len(keys) == 2:
+        expected.index = MultiIndex(
+            levels=[[88, 99], [88, 99]], codes=[[0, 1], [0, 1]], names=["a1", "a2"]
+        )
+    else:
+        expected.index = Index([88, 99], name="a1")
+
+    if not as_index:
+        expected = expected.reset_index()
+
+    result = df.groupby(keys, as_index=as_index).describe()
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_describe_duplicate_columns():
+    # GH#50806
+    df = DataFrame([[0, 1, 2, 3]])
+    df.columns = [0, 1, 2, 0]
+    gb = df.groupby(df[1])
+    result = gb.describe(percentiles=[])
+
+    columns = ["count", "mean", "std", "min", "50%", "max"]
+    frames = [
+        DataFrame([[1.0, val, np.nan, val, val, val]], index=[1], columns=columns)
+        for val in (0.0, 2.0, 3.0)
+    ]
+    expected = pd.concat(frames, axis=1)
+    expected.columns = MultiIndex(
+        levels=[[0, 2], columns],
+        codes=[6 * [0] + 6 * [1] + 6 * [0], 3 * list(range(6))],
+    )
+    expected.index.names = [1]
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/methods/test_groupby_shift_diff.py
similarity index 100%
rename from pandas/tests/groupby/test_groupby_shift_diff.py
rename to pandas/tests/groupby/methods/test_groupby_shift_diff.py
diff --git a/pandas/tests/groupby/methods/test_is_monotonic.py b/pandas/tests/groupby/methods/test_is_monotonic.py
new file mode 100644
index 0000000000000..3428fc90f6e51
--- /dev/null
+++ b/pandas/tests/groupby/methods/test_is_monotonic.py
@@ -0,0 +1,78 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "in_vals, out_vals",
+    [
+        # Basics: strictly increasing (T), strictly decreasing (F),
+        # abs val increasing (F), non-strictly increasing (T)
+        ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], [True, False, False, True]),
+        # Test with inf vals
+        (
+            [1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
+            [True, False, True, False],
+        ),
+        # Test with nan vals; should always be False
+        (
+            [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
+            [False, False, False, False],
+        ),
+    ],
+)
+def test_is_monotonic_increasing(in_vals, out_vals):
+    # GH 17015
+    source_dict = {
+        "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
+        "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"],
+        "C": in_vals,
+    }
+    df = DataFrame(source_dict)
+    result = df.groupby("B").C.is_monotonic_increasing
+    index = Index(list("abcd"), name="B")
+    expected = Series(index=index, data=out_vals, name="C")
+    tm.assert_series_equal(result, expected)
+
+    # Also check result equal to manually taking x.is_monotonic_increasing.
+    expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "in_vals, out_vals",
+    [
+        # Basics: strictly decreasing (T), strictly increasing (F),
+        # abs val decreasing (F), non-strictly increasing (T)
+        ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], [True, False, False, True]),
+        # Test with inf vals
+        (
+            [np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
+            [True, True, False, True],
+        ),
+        # Test with nan vals; should always be False
+        (
+            [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
+            [False, False, False, False],
+        ),
+    ],
+)
+def test_is_monotonic_decreasing(in_vals, out_vals):
+    # GH 17015
+    source_dict = {
+        "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
+        "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"],
+        "C": in_vals,
+    }
+
+    df = DataFrame(source_dict)
+    result = df.groupby("B").C.is_monotonic_decreasing
+    index = Index(list("abcd"), name="B")
+    expected = Series(index=index, data=out_vals, name="C")
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/methods/test_nlargest_nsmallest.py b/pandas/tests/groupby/methods/test_nlargest_nsmallest.py
new file mode 100644
index 0000000000000..bf983f04a3f3f
--- /dev/null
+++ b/pandas/tests/groupby/methods/test_nlargest_nsmallest.py
@@ -0,0 +1,115 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    MultiIndex,
+    Series,
+    date_range,
+)
+import pandas._testing as tm
+
+
+def test_nlargest():
+    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
+    b = Series(list("a" * 5 + "b" * 5))
+    gb = a.groupby(b)
+    r = gb.nlargest(3)
+    e = Series(
+        [7, 5, 3, 10, 9, 6],
+        index=MultiIndex.from_arrays([list("aaabbb"), [3, 2, 1, 9, 5, 8]]),
+    )
+    tm.assert_series_equal(r, e)
+
+    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
+    gb = a.groupby(b)
+    e = Series(
+        [3, 2, 1, 3, 3, 2],
+        index=MultiIndex.from_arrays([list("aaabbb"), [2, 3, 1, 6, 5, 7]]),
+    )
+    tm.assert_series_equal(gb.nlargest(3, keep="last"), e)
+
+
+def test_nlargest_mi_grouper():
+    # see gh-21411
+    npr = np.random.default_rng(2)
+
+    dts = date_range("20180101", periods=10)
+    iterables = [dts, ["one", "two"]]
+
+    idx = MultiIndex.from_product(iterables, names=["first", "second"])
+    s = Series(npr.standard_normal(20), index=idx)
+
+    result = s.groupby("first").nlargest(1)
+
+    exp_idx = MultiIndex.from_tuples(
+        [
+            (dts[0], dts[0], "one"),
+            (dts[1], dts[1], "one"),
+            (dts[2], dts[2], "one"),
+            (dts[3], dts[3], "two"),
+            (dts[4], dts[4], "one"),
+            (dts[5], dts[5], "one"),
+            (dts[6], dts[6], "one"),
+            (dts[7], dts[7], "one"),
+            (dts[8], dts[8], "one"),
+            (dts[9], dts[9], "one"),
+        ],
+        names=["first", "first", "second"],
+    )
+
+    exp_values = [
+        0.18905338179353307,
+        -0.41306354339189344,
+        1.799707382720902,
+        0.7738065867276614,
+        0.28121066979764925,
+        0.9775674511260357,
+        -0.3288239040579627,
+        0.45495807124085547,
+        0.5452887139646817,
+        0.12682784711186987,
+    ]
+
+    expected = Series(exp_values, index=exp_idx)
+    tm.assert_series_equal(result, expected, check_exact=False, rtol=1e-3)
+
+
+def test_nsmallest():
+    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
+    b = Series(list("a" * 5 + "b" * 5))
+    gb = a.groupby(b)
+    r = gb.nsmallest(3)
+    e = Series(
+        [1, 2, 3, 0, 4, 6],
+        index=MultiIndex.from_arrays([list("aaabbb"), [0, 4, 1, 6, 7, 8]]),
+    )
+    tm.assert_series_equal(r, e)
+
+    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
+    gb = a.groupby(b)
+    e = Series(
+        [0, 1, 1, 0, 1, 2],
+        index=MultiIndex.from_arrays([list("aaabbb"), [4, 1, 0, 9, 8, 7]]),
+    )
+    tm.assert_series_equal(gb.nsmallest(3, keep="last"), e)
+
+
+@pytest.mark.parametrize(
+    "data, groups",
+    [([0, 1, 2, 3], [0, 0, 1, 1]), ([0], [0])],
+)
+@pytest.mark.parametrize("dtype", [None, *tm.ALL_INT_NUMPY_DTYPES])
+@pytest.mark.parametrize("method", ["nlargest", "nsmallest"])
+def test_nlargest_and_smallest_noop(data, groups, dtype, method):
+    # GH 15272, GH 16345, GH 29129
+    # Test nlargest/smallest when it results in a noop,
+    # i.e. input is sorted and group size <= n
+    if dtype is not None:
+        data = np.array(data, dtype=dtype)
+    if method == "nlargest":
+        data = list(reversed(data))
+    ser = Series(data, name="a")
+    result = getattr(ser.groupby(groups), method)(n=2)
+    expidx = np.array(groups, dtype=int) if isinstance(groups, list) else groups
+    expected = Series(data, index=MultiIndex.from_arrays([expidx, ser.index]), name="a")
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/methods/test_nth.py
similarity index 100%
rename from pandas/tests/groupby/test_nth.py
rename to pandas/tests/groupby/methods/test_nth.py
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
similarity index 100%
rename from pandas/tests/groupby/test_quantile.py
rename to pandas/tests/groupby/methods/test_quantile.py
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/methods/test_rank.py
similarity index 100%
rename from pandas/tests/groupby/test_rank.py
rename to pandas/tests/groupby/methods/test_rank.py
diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/methods/test_sample.py
similarity index 100%
rename from pandas/tests/groupby/test_sample.py
rename to pandas/tests/groupby/methods/test_sample.py
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/methods/test_size.py
similarity index 100%
rename from pandas/tests/groupby/test_size.py
rename to pandas/tests/groupby/methods/test_size.py
diff --git a/pandas/tests/groupby/test_skew.py b/pandas/tests/groupby/methods/test_skew.py
similarity index 100%
rename from pandas/tests/groupby/test_skew.py
rename to pandas/tests/groupby/methods/test_skew.py
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
similarity index 100%
rename from pandas/tests/groupby/test_value_counts.py
rename to pandas/tests/groupby/methods/test_value_counts.py
diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py
deleted file mode 100644
index 57a83335be849..0000000000000
--- a/pandas/tests/groupby/test_any_all.py
+++ /dev/null
@@ -1,188 +0,0 @@
-import builtins
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import (
-    DataFrame,
-    Index,
-    Series,
-    isna,
-)
-import pandas._testing as tm
-
-
-@pytest.mark.parametrize("agg_func", ["any", "all"])
-@pytest.mark.parametrize(
-    "vals",
-    [
-        ["foo", "bar", "baz"],
-        ["foo", "", ""],
-        ["", "", ""],
-        [1, 2, 3],
-        [1, 0, 0],
-        [0, 0, 0],
-        [1.0, 2.0, 3.0],
-        [1.0, 0.0, 0.0],
-        [0.0, 0.0, 0.0],
-        [True, True, True],
-        [True, False, False],
-        [False, False, False],
-        [np.nan, np.nan, np.nan],
-    ],
-)
-def test_groupby_bool_aggs(skipna, agg_func, vals):
-    df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2})
-
-    # Figure out expectation using Python builtin
-    exp = getattr(builtins, agg_func)(vals)
-
-    # edge case for missing data with skipna and 'any'
-    if skipna and all(isna(vals)) and agg_func == "any":
-        exp = False
-
-    expected = DataFrame(
-        [exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")
-    )
-    result = getattr(df.groupby("key"), agg_func)(skipna=skipna)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_any():
-    df = DataFrame(
-        [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]],
-        columns=["A", "B", "C"],
-    )
-    expected = DataFrame(
-        [[True, True], [False, True]], columns=["B", "C"], index=[1, 3]
-    )
-    expected.index.name = "A"
-    result = df.groupby("A").any()
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
-def test_bool_aggs_dup_column_labels(bool_agg_func):
-    # GH#21668
-    df = DataFrame([[True, True]], columns=["a", "a"])
-    grp_by = df.groupby([0])
-    result = getattr(grp_by, bool_agg_func)()
-
-    expected = df.set_axis(np.array([0]))
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
-@pytest.mark.parametrize(
-    "data",
-    [
-        [False, False, False],
-        [True, True, True],
-        [pd.NA, pd.NA, pd.NA],
-        [False, pd.NA, False],
-        [True, pd.NA, True],
-        [True, pd.NA, False],
-    ],
-)
-def test_masked_kleene_logic(bool_agg_func, skipna, data):
-    # GH#37506
-    ser = Series(data, dtype="boolean")
-
-    # The result should match aggregating on the whole series. Correctness
-    # there is verified in test_reductions.py::test_any_all_boolean_kleene_logic
-    expected_data = getattr(ser, bool_agg_func)(skipna=skipna)
-    expected = Series(expected_data, index=np.array([0]), dtype="boolean")
-
-    result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype1,dtype2,exp_col1,exp_col2",
-    [
-        (
-            "float",
-            "Float64",
-            np.array([True], dtype=bool),
-            pd.array([pd.NA], dtype="boolean"),
-        ),
-        (
-            "Int64",
-            "float",
-            pd.array([pd.NA], dtype="boolean"),
-            np.array([True], dtype=bool),
-        ),
-        (
-            "Int64",
-            "Int64",
-            pd.array([pd.NA], dtype="boolean"),
-            pd.array([pd.NA], dtype="boolean"),
-        ),
-        (
-            "Float64",
-            "boolean",
-            pd.array([pd.NA], dtype="boolean"),
-            pd.array([pd.NA], dtype="boolean"),
-        ),
-    ],
-)
-def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2):
-    # GH#37506
-    data = [1.0, np.nan]
-    df = DataFrame(
-        {"col1": pd.array(data, dtype=dtype1), "col2": pd.array(data, dtype=dtype2)}
-    )
-    result = df.groupby([1, 1]).agg("all", skipna=False)
-
-    expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=np.array([1]))
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
-@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
-def test_masked_bool_aggs_skipna(bool_agg_func, dtype, skipna, frame_or_series):
-    # GH#40585
-    obj = frame_or_series([pd.NA, 1], dtype=dtype)
-    expected_res = True
-    if not skipna and bool_agg_func == "all":
-        expected_res = pd.NA
-    expected = frame_or_series([expected_res], index=np.array([1]), dtype="boolean")
-
-    result = obj.groupby([1, 1]).agg(bool_agg_func, skipna=skipna)
-    tm.assert_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "bool_agg_func,data,expected_res",
-    [
-        ("any", [pd.NA, np.nan], False),
-        ("any", [pd.NA, 1, np.nan], True),
-        ("all", [pd.NA, pd.NaT], True),
-        ("all", [pd.NA, False, pd.NaT], False),
-    ],
-)
-def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_series):
-    # GH#37501
-    obj = frame_or_series(data, dtype=object)
-    result = obj.groupby([1] * len(data)).agg(bool_agg_func)
-    expected = frame_or_series([expected_res], index=np.array([1]), dtype="bool")
-    tm.assert_equal(result, expected)
-
-
-@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
-def test_object_NA_raises_with_skipna_false(bool_agg_func):
-    # GH#37501
-    ser = Series([pd.NA], dtype=object)
-    with pytest.raises(TypeError, match="boolean value of NA is ambiguous"):
-        ser.groupby([1]).agg(bool_agg_func, skipna=False)
-
-
-@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
-def test_empty(frame_or_series, bool_agg_func):
-    # GH 45231
-    kwargs = {"columns": ["a"]} if frame_or_series is DataFrame else {"name": "a"}
-    obj = frame_or_series(**kwargs, dtype=object)
-    result = getattr(obj.groupby(obj.index), bool_agg_func)()
-    expected = frame_or_series(**kwargs, dtype=bool)
-    tm.assert_equal(result, expected)
diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py
new file mode 100644
index 0000000000000..eecb82cd5050b
--- /dev/null
+++ b/pandas/tests/groupby/test_cumulative.py
@@ -0,0 +1,291 @@
+import numpy as np
+import pytest
+
+from pandas.errors import UnsupportedFunctionCall
+import pandas.util._test_decorators as td
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+
+
+@pytest.fixture(
+    params=[np.int32, np.int64, np.float32, np.float64, "Int64", "Float64"],
+    ids=["np.int32", "np.int64", "np.float32", "np.float64", "Int64", "Float64"],
+)
+def dtypes_for_minmax(request):
+    """
+    Fixture of dtypes with min and max values used for testing
+    cummin and cummax
+    """
+    dtype = request.param
+
+    np_type = dtype
+    if dtype == "Int64":
+        np_type = np.int64
+    elif dtype == "Float64":
+        np_type = np.float64
+
+    min_val = (
+        np.iinfo(np_type).min
+        if np.dtype(np_type).kind == "i"
+        else np.finfo(np_type).min
+    )
+    max_val = (
+        np.iinfo(np_type).max
+        if np.dtype(np_type).kind == "i"
+        else np.finfo(np_type).max
+    )
+
+    return (dtype, min_val, max_val)
+
+
+def test_groupby_cumprod():
+    # GH 4095
+    df = DataFrame({"key": ["b"] * 10, "value": 2})
+
+    actual = df.groupby("key")["value"].cumprod()
+    expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod())
+    expected.name = "value"
+    tm.assert_series_equal(actual, expected)
+
+    df = DataFrame({"key": ["b"] * 100, "value": 2})
+    df["value"] = df["value"].astype(float)
+    actual = df.groupby("key")["value"].cumprod()
+    expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod())
+    expected.name = "value"
+    tm.assert_series_equal(actual, expected)
+
+
+def test_groupby_cumprod_overflow():
+    # GH#37493 if we overflow we return garbage consistent with numpy
+    df = DataFrame({"key": ["b"] * 4, "value": 100_000})
+    actual = df.groupby("key")["value"].cumprod()
+    expected = Series(
+        [100_000, 10_000_000_000, 1_000_000_000_000_000, 7766279631452241920],
+        name="value",
+    )
+    tm.assert_series_equal(actual, expected)
+
+    numpy_result = df.groupby("key", group_keys=False)["value"].apply(
+        lambda x: x.cumprod()
+    )
+    numpy_result.name = "value"
+    tm.assert_series_equal(actual, numpy_result)
+
+
+def test_groupby_cumprod_nan_influences_other_columns():
+    # GH#48064
+    df = DataFrame(
+        {
+            "a": 1,
+            "b": [1, np.nan, 2],
+            "c": [1, 2, 3.0],
+        }
+    )
+    result = df.groupby("a").cumprod(numeric_only=True, skipna=False)
+    expected = DataFrame({"b": [1, np.nan, np.nan], "c": [1, 2, 6.0]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_cummin(dtypes_for_minmax):
+    dtype = dtypes_for_minmax[0]
+    min_val = dtypes_for_minmax[1]
+
+    # GH 15048
+    base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]})
+    expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
+
+    df = base_df.astype(dtype)
+
+    expected = DataFrame({"B": expected_mins}).astype(dtype)
+    result = df.groupby("A").cummin()
+    tm.assert_frame_equal(result, expected)
+    result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
+    tm.assert_frame_equal(result, expected)
+
+    # Test w/ min value for dtype
+    df.loc[[2, 6], "B"] = min_val
+    df.loc[[1, 5], "B"] = min_val + 1
+    expected.loc[[2, 3, 6, 7], "B"] = min_val
+    expected.loc[[1, 5], "B"] = min_val + 1  # should not be rounded to min_val
+    result = df.groupby("A").cummin()
+    tm.assert_frame_equal(result, expected, check_exact=True)
+    expected = (
+        df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
+    )
+    tm.assert_frame_equal(result, expected, check_exact=True)
+
+    # Test nan in some values
+    # Explicit cast to float to avoid implicit cast when setting nan
+    base_df = base_df.astype({"B": "float"})
+    base_df.loc[[0, 2, 4, 6], "B"] = np.nan
+    expected = DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]})
+    result = base_df.groupby("A").cummin()
+    tm.assert_frame_equal(result, expected)
+    expected = (
+        base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # GH 15561
+    df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])})
+    expected = Series(pd.to_datetime("2001"), index=[0], name="b")
+
+    result = df.groupby("a")["b"].cummin()
+    tm.assert_series_equal(expected, result)
+
+    # GH 15635
+    df = DataFrame({"a": [1, 2, 1], "b": [1, 2, 2]})
+    result = df.groupby("a").b.cummin()
+    expected = Series([1, 2, 1], name="b")
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("method", ["cummin", "cummax"])
+@pytest.mark.parametrize("dtype", ["UInt64", "Int64", "Float64", "float", "boolean"])
+def test_cummin_max_all_nan_column(method, dtype):
+    base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8})
+    base_df["B"] = base_df["B"].astype(dtype)
+    grouped = base_df.groupby("A")
+
+    expected = DataFrame({"B": [np.nan] * 8}, dtype=dtype)
+    result = getattr(grouped, method)()
+    tm.assert_frame_equal(expected, result)
+
+    result = getattr(grouped["B"], method)().to_frame()
+    tm.assert_frame_equal(expected, result)
+
+
+def test_cummax(dtypes_for_minmax):
+    dtype = dtypes_for_minmax[0]
+    max_val = dtypes_for_minmax[2]
+
+    # GH 15048
+    base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]})
+    expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
+
+    df = base_df.astype(dtype)
+
+    expected = DataFrame({"B": expected_maxs}).astype(dtype)
+    result = df.groupby("A").cummax()
+    tm.assert_frame_equal(result, expected)
+    result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
+    tm.assert_frame_equal(result, expected)
+
+    # Test w/ max value for dtype
+    df.loc[[2, 6], "B"] = max_val
+    expected.loc[[2, 3, 6, 7], "B"] = max_val
+    result = df.groupby("A").cummax()
+    tm.assert_frame_equal(result, expected)
+    expected = (
+        df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # Test nan in some values
+    # Explicit cast to float to avoid implicit cast when setting nan
+    base_df = base_df.astype({"B": "float"})
+    base_df.loc[[0, 2, 4, 6], "B"] = np.nan
+    expected = DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]})
+    result = base_df.groupby("A").cummax()
+    tm.assert_frame_equal(result, expected)
+    expected = (
+        base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # GH 15561
+    df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])})
+    expected = Series(pd.to_datetime("2001"), index=[0], name="b")
+
+    result = df.groupby("a")["b"].cummax()
+    tm.assert_series_equal(expected, result)
+
+    # GH 15635
+    df = DataFrame({"a": [1, 2, 1], "b": [2, 1, 1]})
+    result = df.groupby("a").b.cummax()
+    expected = Series([2, 1, 2], name="b")
+    tm.assert_series_equal(result, expected)
+
+
+def test_cummax_i8_at_implementation_bound():
+    # the minimum value used to be treated as NPY_NAT+1 instead of NPY_NAT
+    #  for int64 dtype GH#46382
+    ser = Series([pd.NaT._value + n for n in range(5)])
+    df = DataFrame({"A": 1, "B": ser, "C": ser.view("M8[ns]")})
+    gb = df.groupby("A")
+
+    res = gb.cummax()
+    exp = df[["B", "C"]]
+    tm.assert_frame_equal(res, exp)
+
+
+@pytest.mark.parametrize("method", ["cummin", "cummax"])
+@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"])
+@pytest.mark.parametrize(
+    "groups,expected_data",
+    [
+        ([1, 1, 1], [1, None, None]),
+        ([1, 2, 3], [1, None, 2]),
+        ([1, 3, 3], [1, None, None]),
+    ],
+)
+def test_cummin_max_skipna(method, dtype, groups, expected_data):
+    # GH-34047
+    df = DataFrame({"a": Series([1, None, 2], dtype=dtype)})
+    orig = df.copy()
+    gb = df.groupby(groups)["a"]
+
+    result = getattr(gb, method)(skipna=False)
+    expected = Series(expected_data, dtype=dtype, name="a")
+
+    # check we didn't accidentally alter df
+    tm.assert_frame_equal(df, orig)
+
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("method", ["cummin", "cummax"])
+def test_cummin_max_skipna_multiple_cols(method):
+    # Ensure missing value in "a" doesn't cause "b" to be nan-filled
+    df = DataFrame({"a": [np.nan, 2.0, 2.0], "b": [2.0, 2.0, 2.0]})
+    gb = df.groupby([1, 1, 1])[["a", "b"]]
+
+    result = getattr(gb, method)(skipna=False)
+    expected = DataFrame({"a": [np.nan, np.nan, np.nan], "b": [2.0, 2.0, 2.0]})
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", ["cumprod", "cumsum"])
+def test_numpy_compat(func):
+    # see gh-12811
+    df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]})
+    g = df.groupby("A")
+
+    msg = "numpy operations are not valid with groupby"
+
+    with pytest.raises(UnsupportedFunctionCall, match=msg):
+        getattr(g, func)(1, 2, 3)
+    with pytest.raises(UnsupportedFunctionCall, match=msg):
+        getattr(g, func)(foo=1)
+
+
+@td.skip_if_32bit
+@pytest.mark.parametrize("method", ["cummin", "cummax"])
+@pytest.mark.parametrize(
+    "dtype,val", [("UInt64", np.iinfo("uint64").max), ("Int64", 2**53 + 1)]
+)
+def test_nullable_int_not_cast_as_float(method, dtype, val):
+    data = [val, pd.NA]
+    df = DataFrame({"grp": [1, 1], "b": data}, dtype=dtype)
+    grouped = df.groupby("grp")
+
+    result = grouped.transform(method)
+    expected = DataFrame({"b": data}, dtype=dtype)
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 08372541988d0..4876267c72f12 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1,12 +1,10 @@
 import builtins
-from io import StringIO
 import re
 
 import numpy as np
 import pytest
 
 from pandas._libs import lib
-from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
 from pandas import (
@@ -22,37 +20,6 @@
 from pandas.util import _test_decorators as td
 
 
-@pytest.fixture(
-    params=[np.int32, np.int64, np.float32, np.float64, "Int64", "Float64"],
-    ids=["np.int32", "np.int64", "np.float32", "np.float64", "Int64", "Float64"],
-)
-def dtypes_for_minmax(request):
-    """
-    Fixture of dtypes with min and max values used for testing
-    cummin and cummax
-    """
-    dtype = request.param
-
-    np_type = dtype
-    if dtype == "Int64":
-        np_type = np.int64
-    elif dtype == "Float64":
-        np_type = np.float64
-
-    min_val = (
-        np.iinfo(np_type).min
-        if np.dtype(np_type).kind == "i"
-        else np.finfo(np_type).min
-    )
-    max_val = (
-        np.iinfo(np_type).max
-        if np.dtype(np_type).kind == "i"
-        else np.finfo(np_type).max
-    )
-
-    return (dtype, min_val, max_val)
-
-
 def test_intercept_builtin_sum():
     s = Series([1.0, 2.0, np.nan, 3.0])
     grouped = s.groupby([0, 1, 2, 2])
@@ -372,39 +339,6 @@ def test_cython_api2():
     tm.assert_frame_equal(result, expected)
 
 
-def test_cython_median():
-    arr = np.random.default_rng(2).standard_normal(1000)
-    arr[::2] = np.nan
-    df = DataFrame(arr)
-
-    labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float)
-    labels[::17] = np.nan
-
-    result = df.groupby(labels).median()
-    msg = "using DataFrameGroupBy.median"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        exp = df.groupby(labels).agg(np.nanmedian)
-    tm.assert_frame_equal(result, exp)
-
-    df = DataFrame(np.random.default_rng(2).standard_normal((1000, 5)))
-    msg = "using DataFrameGroupBy.median"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        rs = df.groupby(labels).agg(np.median)
-    xp = df.groupby(labels).median()
-    tm.assert_frame_equal(rs, xp)
-
-
-def test_median_empty_bins(observed):
-    df = DataFrame(np.random.default_rng(2).integers(0, 44, 500))
-
-    grps = range(0, 55, 5)
-    bins = pd.cut(df[0], grps)
-
-    result = df.groupby(bins, observed=observed).median()
-    expected = df.groupby(bins, observed=observed).agg(lambda x: x.median())
-    tm.assert_frame_equal(result, expected)
-
-
 @pytest.mark.parametrize(
     "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"]
 )
@@ -478,105 +412,6 @@ def test_groupby_non_arithmetic_agg_int_like_precision(i):
         assert res.iloc[0].b == data["expected"]
 
 
-@pytest.mark.parametrize(
-    "func, values",
-    [
-        ("idxmin", {"c_int": [0, 2], "c_float": [1, 3], "c_date": [1, 2]}),
-        ("idxmax", {"c_int": [1, 3], "c_float": [0, 2], "c_date": [0, 3]}),
-    ],
-)
-@pytest.mark.parametrize("numeric_only", [True, False])
-def test_idxmin_idxmax_returns_int_types(func, values, numeric_only):
-    # GH 25444
-    df = DataFrame(
-        {
-            "name": ["A", "A", "B", "B"],
-            "c_int": [1, 2, 3, 4],
-            "c_float": [4.02, 3.03, 2.04, 1.05],
-            "c_date": ["2019", "2018", "2016", "2017"],
-        }
-    )
-    df["c_date"] = pd.to_datetime(df["c_date"])
-    df["c_date_tz"] = df["c_date"].dt.tz_localize("US/Pacific")
-    df["c_timedelta"] = df["c_date"] - df["c_date"].iloc[0]
-    df["c_period"] = df["c_date"].dt.to_period("W")
-    df["c_Integer"] = df["c_int"].astype("Int64")
-    df["c_Floating"] = df["c_float"].astype("Float64")
-
-    result = getattr(df.groupby("name"), func)(numeric_only=numeric_only)
-
-    expected = DataFrame(values, index=Index(["A", "B"], name="name"))
-    if numeric_only:
-        expected = expected.drop(columns=["c_date"])
-    else:
-        expected["c_date_tz"] = expected["c_date"]
-        expected["c_timedelta"] = expected["c_date"]
-        expected["c_period"] = expected["c_date"]
-    expected["c_Integer"] = expected["c_int"]
-    expected["c_Floating"] = expected["c_float"]
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_idxmin_idxmax_axis1():
-    df = DataFrame(
-        np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]
-    )
-    df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4]
-
-    gb = df.groupby("A")
-
-    warn_msg = "DataFrameGroupBy.idxmax with axis=1 is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-        res = gb.idxmax(axis=1)
-
-    alt = df.iloc[:, 1:].idxmax(axis=1)
-    indexer = res.index.get_level_values(1)
-
-    tm.assert_series_equal(alt[indexer], res.droplevel("A"))
-
-    df["E"] = date_range("2016-01-01", periods=10)
-    gb2 = df.groupby("A")
-
-    msg = "'>' not supported between instances of 'Timestamp' and 'float'"
-    with pytest.raises(TypeError, match=msg):
-        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-            gb2.idxmax(axis=1)
-
-
-@pytest.mark.parametrize(
-    "func, values, expected_values, warn",
-    [
-        ("idxmin", [0, 1, 2], [0, 2], None),
-        ("idxmax", [0, 1, 2], [1, 2], None),
-        ("idxmin", [0, np.nan, 2], [np.nan, 2], FutureWarning),
-        ("idxmax", [0, np.nan, 2], [np.nan, 2], FutureWarning),
-        ("idxmin", [1, 0, np.nan], [1, np.nan], FutureWarning),
-        ("idxmax", [1, 0, np.nan], [0, np.nan], FutureWarning),
-    ],
-)
-@pytest.mark.parametrize("test_series", [True, False])
-def test_idxmin_idxmax_skipna_false(func, values, expected_values, warn, test_series):
-    # GH#54234
-    df = DataFrame(
-        {
-            "a": [1, 1, 2],
-            "b": values,
-        }
-    )
-    gb = df.groupby("a")
-    index = Index([1, 2], name="a")
-    expected = DataFrame({"b": expected_values}, index=index)
-    if test_series:
-        gb = gb["b"]
-        expected = expected["b"]
-    klass = "Series" if test_series else "DataFrame"
-    msg = f"The behavior of {klass}GroupBy.{func} with all-NA values"
-    with tm.assert_produces_warning(warn, match=msg):
-        result = getattr(gb, func)(skipna=False)
-    tm.assert_equal(result, expected)
-
-
 @pytest.mark.parametrize("numeric_only", [True, False, None])
 def test_axis1_numeric_only(request, groupby_func, numeric_only):
     if groupby_func in ("idxmax", "idxmin"):
@@ -658,54 +493,6 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only):
         tm.assert_equal(result, expected)
 
 
-def test_groupby_cumprod():
-    # GH 4095
-    df = DataFrame({"key": ["b"] * 10, "value": 2})
-
-    actual = df.groupby("key")["value"].cumprod()
-    expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod())
-    expected.name = "value"
-    tm.assert_series_equal(actual, expected)
-
-    df = DataFrame({"key": ["b"] * 100, "value": 2})
-    df["value"] = df["value"].astype(float)
-    actual = df.groupby("key")["value"].cumprod()
-    expected = df.groupby("key", group_keys=False)["value"].apply(lambda x: x.cumprod())
-    expected.name = "value"
-    tm.assert_series_equal(actual, expected)
-
-
-def test_groupby_cumprod_overflow():
-    # GH#37493 if we overflow we return garbage consistent with numpy
-    df = DataFrame({"key": ["b"] * 4, "value": 100_000})
-    actual = df.groupby("key")["value"].cumprod()
-    expected = Series(
-        [100_000, 10_000_000_000, 1_000_000_000_000_000, 7766279631452241920],
-        name="value",
-    )
-    tm.assert_series_equal(actual, expected)
-
-    numpy_result = df.groupby("key", group_keys=False)["value"].apply(
-        lambda x: x.cumprod()
-    )
-    numpy_result.name = "value"
-    tm.assert_series_equal(actual, numpy_result)
-
-
-def test_groupby_cumprod_nan_influences_other_columns():
-    # GH#48064
-    df = DataFrame(
-        {
-            "a": 1,
-            "b": [1, np.nan, 2],
-            "c": [1, 2, 3.0],
-        }
-    )
-    result = df.groupby("a").cumprod(numeric_only=True, skipna=False)
-    expected = DataFrame({"b": [1, np.nan, np.nan], "c": [1, 2, 6.0]})
-    tm.assert_frame_equal(result, expected)
-
-
 def scipy_sem(*args, **kwargs):
     from scipy.stats import sem
 
@@ -741,627 +528,12 @@ def test_ops_general(op, targop):
     tm.assert_frame_equal(result, expected)
 
 
-def test_max_nan_bug():
-    raw = """,Date,app,File
--04-23,2013-04-23 00:00:00,,log080001.log
--05-06,2013-05-06 00:00:00,,log.log
--05-07,2013-05-07 00:00:00,OE,xlsx"""
-
-    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
-        df = pd.read_csv(StringIO(raw), parse_dates=[0])
-    gb = df.groupby("Date")
-    r = gb[["File"]].max()
-    e = gb["File"].max().to_frame()
-    tm.assert_frame_equal(r, e)
-    assert not r["File"].isna().any()
-
-
-def test_nlargest():
-    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
-    b = Series(list("a" * 5 + "b" * 5))
-    gb = a.groupby(b)
-    r = gb.nlargest(3)
-    e = Series(
-        [7, 5, 3, 10, 9, 6],
-        index=MultiIndex.from_arrays([list("aaabbb"), [3, 2, 1, 9, 5, 8]]),
-    )
-    tm.assert_series_equal(r, e)
-
-    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
-    gb = a.groupby(b)
-    e = Series(
-        [3, 2, 1, 3, 3, 2],
-        index=MultiIndex.from_arrays([list("aaabbb"), [2, 3, 1, 6, 5, 7]]),
-    )
-    tm.assert_series_equal(gb.nlargest(3, keep="last"), e)
-
-
-def test_nlargest_mi_grouper():
-    # see gh-21411
-    npr = np.random.default_rng(2)
-
-    dts = date_range("20180101", periods=10)
-    iterables = [dts, ["one", "two"]]
-
-    idx = MultiIndex.from_product(iterables, names=["first", "second"])
-    s = Series(npr.standard_normal(20), index=idx)
-
-    result = s.groupby("first").nlargest(1)
-
-    exp_idx = MultiIndex.from_tuples(
-        [
-            (dts[0], dts[0], "one"),
-            (dts[1], dts[1], "one"),
-            (dts[2], dts[2], "one"),
-            (dts[3], dts[3], "two"),
-            (dts[4], dts[4], "one"),
-            (dts[5], dts[5], "one"),
-            (dts[6], dts[6], "one"),
-            (dts[7], dts[7], "one"),
-            (dts[8], dts[8], "one"),
-            (dts[9], dts[9], "one"),
-        ],
-        names=["first", "first", "second"],
-    )
-
-    exp_values = [
-        0.18905338179353307,
-        -0.41306354339189344,
-        1.799707382720902,
-        0.7738065867276614,
-        0.28121066979764925,
-        0.9775674511260357,
-        -0.3288239040579627,
-        0.45495807124085547,
-        0.5452887139646817,
-        0.12682784711186987,
-    ]
-
-    expected = Series(exp_values, index=exp_idx)
-    tm.assert_series_equal(result, expected, check_exact=False, rtol=1e-3)
-
-
-def test_nsmallest():
-    a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
-    b = Series(list("a" * 5 + "b" * 5))
-    gb = a.groupby(b)
-    r = gb.nsmallest(3)
-    e = Series(
-        [1, 2, 3, 0, 4, 6],
-        index=MultiIndex.from_arrays([list("aaabbb"), [0, 4, 1, 6, 7, 8]]),
-    )
-    tm.assert_series_equal(r, e)
-
-    a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0])
-    gb = a.groupby(b)
-    e = Series(
-        [0, 1, 1, 0, 1, 2],
-        index=MultiIndex.from_arrays([list("aaabbb"), [4, 1, 0, 9, 8, 7]]),
-    )
-    tm.assert_series_equal(gb.nsmallest(3, keep="last"), e)
-
-
-@pytest.mark.parametrize(
-    "data, groups",
-    [([0, 1, 2, 3], [0, 0, 1, 1]), ([0], [0])],
-)
-@pytest.mark.parametrize("dtype", [None, *tm.ALL_INT_NUMPY_DTYPES])
-@pytest.mark.parametrize("method", ["nlargest", "nsmallest"])
-def test_nlargest_and_smallest_noop(data, groups, dtype, method):
-    # GH 15272, GH 16345, GH 29129
-    # Test nlargest/smallest when it results in a noop,
-    # i.e. input is sorted and group size <= n
-    if dtype is not None:
-        data = np.array(data, dtype=dtype)
-    if method == "nlargest":
-        data = list(reversed(data))
-    ser = Series(data, name="a")
-    result = getattr(ser.groupby(groups), method)(n=2)
-    expidx = np.array(groups, dtype=int) if isinstance(groups, list) else groups
-    expected = Series(data, index=MultiIndex.from_arrays([expidx, ser.index]), name="a")
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("func", ["cumprod", "cumsum"])
-def test_numpy_compat(func):
-    # see gh-12811
-    df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]})
-    g = df.groupby("A")
-
-    msg = "numpy operations are not valid with groupby"
-
-    with pytest.raises(UnsupportedFunctionCall, match=msg):
-        getattr(g, func)(1, 2, 3)
-    with pytest.raises(UnsupportedFunctionCall, match=msg):
-        getattr(g, func)(foo=1)
-
-
-def test_cummin(dtypes_for_minmax):
-    dtype = dtypes_for_minmax[0]
-    min_val = dtypes_for_minmax[1]
-
-    # GH 15048
-    base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]})
-    expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
-
-    df = base_df.astype(dtype)
-
-    expected = DataFrame({"B": expected_mins}).astype(dtype)
-    result = df.groupby("A").cummin()
-    tm.assert_frame_equal(result, expected)
-    result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
-    tm.assert_frame_equal(result, expected)
-
-    # Test w/ min value for dtype
-    df.loc[[2, 6], "B"] = min_val
-    df.loc[[1, 5], "B"] = min_val + 1
-    expected.loc[[2, 3, 6, 7], "B"] = min_val
-    expected.loc[[1, 5], "B"] = min_val + 1  # should not be rounded to min_val
-    result = df.groupby("A").cummin()
-    tm.assert_frame_equal(result, expected, check_exact=True)
-    expected = (
-        df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
-    )
-    tm.assert_frame_equal(result, expected, check_exact=True)
-
-    # Test nan in some values
-    # Explicit cast to float to avoid implicit cast when setting nan
-    base_df = base_df.astype({"B": "float"})
-    base_df.loc[[0, 2, 4, 6], "B"] = np.nan
-    expected = DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]})
-    result = base_df.groupby("A").cummin()
-    tm.assert_frame_equal(result, expected)
-    expected = (
-        base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummin()).to_frame()
-    )
-    tm.assert_frame_equal(result, expected)
-
-    # GH 15561
-    df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])})
-    expected = Series(pd.to_datetime("2001"), index=[0], name="b")
-
-    result = df.groupby("a")["b"].cummin()
-    tm.assert_series_equal(expected, result)
-
-    # GH 15635
-    df = DataFrame({"a": [1, 2, 1], "b": [1, 2, 2]})
-    result = df.groupby("a").b.cummin()
-    expected = Series([1, 2, 1], name="b")
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("method", ["cummin", "cummax"])
-@pytest.mark.parametrize("dtype", ["UInt64", "Int64", "Float64", "float", "boolean"])
-def test_cummin_max_all_nan_column(method, dtype):
-    base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8})
-    base_df["B"] = base_df["B"].astype(dtype)
-    grouped = base_df.groupby("A")
-
-    expected = DataFrame({"B": [np.nan] * 8}, dtype=dtype)
-    result = getattr(grouped, method)()
-    tm.assert_frame_equal(expected, result)
-
-    result = getattr(grouped["B"], method)().to_frame()
-    tm.assert_frame_equal(expected, result)
-
-
-def test_cummax(dtypes_for_minmax):
-    dtype = dtypes_for_minmax[0]
-    max_val = dtypes_for_minmax[2]
-
-    # GH 15048
-    base_df = DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]})
-    expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
-
-    df = base_df.astype(dtype)
-
-    expected = DataFrame({"B": expected_maxs}).astype(dtype)
-    result = df.groupby("A").cummax()
-    tm.assert_frame_equal(result, expected)
-    result = df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
-    tm.assert_frame_equal(result, expected)
-
-    # Test w/ max value for dtype
-    df.loc[[2, 6], "B"] = max_val
-    expected.loc[[2, 3, 6, 7], "B"] = max_val
-    result = df.groupby("A").cummax()
-    tm.assert_frame_equal(result, expected)
-    expected = (
-        df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
-    )
-    tm.assert_frame_equal(result, expected)
-
-    # Test nan in some values
-    # Explicit cast to float to avoid implicit cast when setting nan
-    base_df = base_df.astype({"B": "float"})
-    base_df.loc[[0, 2, 4, 6], "B"] = np.nan
-    expected = DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]})
-    result = base_df.groupby("A").cummax()
-    tm.assert_frame_equal(result, expected)
-    expected = (
-        base_df.groupby("A", group_keys=False).B.apply(lambda x: x.cummax()).to_frame()
-    )
-    tm.assert_frame_equal(result, expected)
-
-    # GH 15561
-    df = DataFrame({"a": [1], "b": pd.to_datetime(["2001"])})
-    expected = Series(pd.to_datetime("2001"), index=[0], name="b")
-
-    result = df.groupby("a")["b"].cummax()
-    tm.assert_series_equal(expected, result)
-
-    # GH 15635
-    df = DataFrame({"a": [1, 2, 1], "b": [2, 1, 1]})
-    result = df.groupby("a").b.cummax()
-    expected = Series([2, 1, 2], name="b")
-    tm.assert_series_equal(result, expected)
-
-
-def test_cummax_i8_at_implementation_bound():
-    # the minimum value used to be treated as NPY_NAT+1 instead of NPY_NAT
-    #  for int64 dtype GH#46382
-    ser = Series([pd.NaT._value + n for n in range(5)])
-    df = DataFrame({"A": 1, "B": ser, "C": ser.view("M8[ns]")})
-    gb = df.groupby("A")
-
-    res = gb.cummax()
-    exp = df[["B", "C"]]
-    tm.assert_frame_equal(res, exp)
-
-
-@pytest.mark.parametrize("method", ["cummin", "cummax"])
-@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"])
-@pytest.mark.parametrize(
-    "groups,expected_data",
-    [
-        ([1, 1, 1], [1, None, None]),
-        ([1, 2, 3], [1, None, 2]),
-        ([1, 3, 3], [1, None, None]),
-    ],
-)
-def test_cummin_max_skipna(method, dtype, groups, expected_data):
-    # GH-34047
-    df = DataFrame({"a": Series([1, None, 2], dtype=dtype)})
-    orig = df.copy()
-    gb = df.groupby(groups)["a"]
-
-    result = getattr(gb, method)(skipna=False)
-    expected = Series(expected_data, dtype=dtype, name="a")
-
-    # check we didn't accidentally alter df
-    tm.assert_frame_equal(df, orig)
-
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("method", ["cummin", "cummax"])
-def test_cummin_max_skipna_multiple_cols(method):
-    # Ensure missing value in "a" doesn't cause "b" to be nan-filled
-    df = DataFrame({"a": [np.nan, 2.0, 2.0], "b": [2.0, 2.0, 2.0]})
-    gb = df.groupby([1, 1, 1])[["a", "b"]]
-
-    result = getattr(gb, method)(skipna=False)
-    expected = DataFrame({"a": [np.nan, np.nan, np.nan], "b": [2.0, 2.0, 2.0]})
-
-    tm.assert_frame_equal(result, expected)
-
-
-@td.skip_if_32bit
-@pytest.mark.parametrize("method", ["cummin", "cummax"])
-@pytest.mark.parametrize(
-    "dtype,val", [("UInt64", np.iinfo("uint64").max), ("Int64", 2**53 + 1)]
-)
-def test_nullable_int_not_cast_as_float(method, dtype, val):
-    data = [val, pd.NA]
-    df = DataFrame({"grp": [1, 1], "b": data}, dtype=dtype)
-    grouped = df.groupby("grp")
-
-    result = grouped.transform(method)
-    expected = DataFrame({"b": data}, dtype=dtype)
-
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "in_vals, out_vals",
-    [
-        # Basics: strictly increasing (T), strictly decreasing (F),
-        # abs val increasing (F), non-strictly increasing (T)
-        ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], [True, False, False, True]),
-        # Test with inf vals
-        (
-            [1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
-            [True, False, True, False],
-        ),
-        # Test with nan vals; should always be False
-        (
-            [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
-            [False, False, False, False],
-        ),
-    ],
-)
-def test_is_monotonic_increasing(in_vals, out_vals):
-    # GH 17015
-    source_dict = {
-        "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
-        "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"],
-        "C": in_vals,
-    }
-    df = DataFrame(source_dict)
-    result = df.groupby("B").C.is_monotonic_increasing
-    index = Index(list("abcd"), name="B")
-    expected = Series(index=index, data=out_vals, name="C")
-    tm.assert_series_equal(result, expected)
-
-    # Also check result equal to manually taking x.is_monotonic_increasing.
-    expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "in_vals, out_vals",
-    [
-        # Basics: strictly decreasing (T), strictly increasing (F),
-        # abs val decreasing (F), non-strictly increasing (T)
-        ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], [True, False, False, True]),
-        # Test with inf vals
-        (
-            [np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
-            [True, True, False, True],
-        ),
-        # Test with nan vals; should always be False
-        (
-            [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
-            [False, False, False, False],
-        ),
-    ],
-)
-def test_is_monotonic_decreasing(in_vals, out_vals):
-    # GH 17015
-    source_dict = {
-        "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
-        "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"],
-        "C": in_vals,
-    }
-
-    df = DataFrame(source_dict)
-    result = df.groupby("B").C.is_monotonic_decreasing
-    index = Index(list("abcd"), name="B")
-    expected = Series(index=index, data=out_vals, name="C")
-    tm.assert_series_equal(result, expected)
-
-
-# describe
-# --------------------------------
-
-
-def test_apply_describe_bug(mframe):
-    grouped = mframe.groupby(level="first")
-    grouped.describe()  # it works!
-
-
-def test_series_describe_multikey():
-    ts = tm.makeTimeSeries()
-    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
-    result = grouped.describe()
-    tm.assert_series_equal(result["mean"], grouped.mean(), check_names=False)
-    tm.assert_series_equal(result["std"], grouped.std(), check_names=False)
-    tm.assert_series_equal(result["min"], grouped.min(), check_names=False)
-
-
-def test_series_describe_single():
-    ts = tm.makeTimeSeries()
-    grouped = ts.groupby(lambda x: x.month)
-    result = grouped.apply(lambda x: x.describe())
-    expected = grouped.describe().stack(future_stack=True)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("keys", ["key1", ["key1", "key2"]])
-def test_series_describe_as_index(as_index, keys):
-    # GH#49256
-    df = DataFrame(
-        {
-            "key1": ["one", "two", "two", "three", "two"],
-            "key2": ["one", "two", "two", "three", "two"],
-            "foo2": [1, 2, 4, 4, 6],
-        }
-    )
-    gb = df.groupby(keys, as_index=as_index)["foo2"]
-    result = gb.describe()
-    expected = DataFrame(
-        {
-            "key1": ["one", "three", "two"],
-            "count": [1.0, 1.0, 3.0],
-            "mean": [1.0, 4.0, 4.0],
-            "std": [np.nan, np.nan, 2.0],
-            "min": [1.0, 4.0, 2.0],
-            "25%": [1.0, 4.0, 3.0],
-            "50%": [1.0, 4.0, 4.0],
-            "75%": [1.0, 4.0, 5.0],
-            "max": [1.0, 4.0, 6.0],
-        }
-    )
-    if len(keys) == 2:
-        expected.insert(1, "key2", expected["key1"])
-    if as_index:
-        expected = expected.set_index(keys)
-    tm.assert_frame_equal(result, expected)
-
-
 def test_series_index_name(df):
     grouped = df.loc[:, ["C"]].groupby(df["A"])
     result = grouped.agg(lambda x: x.mean())
     assert result.index.name == "A"
 
 
-def test_frame_describe_multikey(tsframe):
-    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
-    result = grouped.describe()
-    desc_groups = []
-    for col in tsframe:
-        group = grouped[col].describe()
-        # GH 17464 - Remove duplicate MultiIndex levels
-        group_col = MultiIndex(
-            levels=[[col], group.columns],
-            codes=[[0] * len(group.columns), range(len(group.columns))],
-        )
-        group = DataFrame(group.values, columns=group_col, index=group.index)
-        desc_groups.append(group)
-    expected = pd.concat(desc_groups, axis=1)
-    tm.assert_frame_equal(result, expected)
-
-    msg = "DataFrame.groupby with axis=1 is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
-    result = groupedT.describe()
-    expected = tsframe.describe().T
-    # reverting the change from https://github.com/pandas-dev/pandas/pull/35441/
-    expected.index = MultiIndex(
-        levels=[[0, 1], expected.index],
-        codes=[[0, 0, 1, 1], range(len(expected.index))],
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_frame_describe_tupleindex():
-    # GH 14848 - regression from 0.19.0 to 0.19.1
-    df1 = DataFrame(
-        {
-            "x": [1, 2, 3, 4, 5] * 3,
-            "y": [10, 20, 30, 40, 50] * 3,
-            "z": [100, 200, 300, 400, 500] * 3,
-        }
-    )
-    df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
-    df2 = df1.rename(columns={"k": "key"})
-    msg = "Names should be list-like for a MultiIndex"
-    with pytest.raises(ValueError, match=msg):
-        df1.groupby("k").describe()
-    with pytest.raises(ValueError, match=msg):
-        df2.groupby("key").describe()
-
-
-def test_frame_describe_unstacked_format():
-    # GH 4792
-    prices = {
-        Timestamp("2011-01-06 10:59:05", tz=None): 24990,
-        Timestamp("2011-01-06 12:43:33", tz=None): 25499,
-        Timestamp("2011-01-06 12:54:09", tz=None): 25499,
-    }
-    volumes = {
-        Timestamp("2011-01-06 10:59:05", tz=None): 1500000000,
-        Timestamp("2011-01-06 12:43:33", tz=None): 5000000000,
-        Timestamp("2011-01-06 12:54:09", tz=None): 100000000,
-    }
-    df = DataFrame({"PRICE": prices, "VOLUME": volumes})
-    result = df.groupby("PRICE").VOLUME.describe()
-    data = [
-        df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
-        df[df.PRICE == 25499].VOLUME.describe().values.tolist(),
-    ]
-    expected = DataFrame(
-        data,
-        index=Index([24990, 25499], name="PRICE"),
-        columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.filterwarnings(
-    "ignore:"
-    "indexing past lexsort depth may impact performance:"
-    "pandas.errors.PerformanceWarning"
-)
-@pytest.mark.parametrize("as_index", [True, False])
-@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
-def test_describe_with_duplicate_output_column_names(as_index, keys):
-    # GH 35314
-    df = DataFrame(
-        {
-            "a1": [99, 99, 99, 88, 88, 88],
-            "a2": [99, 99, 99, 88, 88, 88],
-            "b": [1, 2, 3, 4, 5, 6],
-            "c": [10, 20, 30, 40, 50, 60],
-        },
-        columns=["a1", "a2", "b", "b"],
-        copy=False,
-    )
-    if keys == ["a1"]:
-        df = df.drop(columns="a2")
-
-    expected = (
-        DataFrame.from_records(
-            [
-                ("b", "count", 3.0, 3.0),
-                ("b", "mean", 5.0, 2.0),
-                ("b", "std", 1.0, 1.0),
-                ("b", "min", 4.0, 1.0),
-                ("b", "25%", 4.5, 1.5),
-                ("b", "50%", 5.0, 2.0),
-                ("b", "75%", 5.5, 2.5),
-                ("b", "max", 6.0, 3.0),
-                ("b", "count", 3.0, 3.0),
-                ("b", "mean", 5.0, 2.0),
-                ("b", "std", 1.0, 1.0),
-                ("b", "min", 4.0, 1.0),
-                ("b", "25%", 4.5, 1.5),
-                ("b", "50%", 5.0, 2.0),
-                ("b", "75%", 5.5, 2.5),
-                ("b", "max", 6.0, 3.0),
-            ],
-        )
-        .set_index([0, 1])
-        .T
-    )
-    expected.columns.names = [None, None]
-    if len(keys) == 2:
-        expected.index = MultiIndex(
-            levels=[[88, 99], [88, 99]], codes=[[0, 1], [0, 1]], names=["a1", "a2"]
-        )
-    else:
-        expected.index = Index([88, 99], name="a1")
-
-    if not as_index:
-        expected = expected.reset_index()
-
-    result = df.groupby(keys, as_index=as_index).describe()
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_describe_duplicate_columns():
-    # GH#50806
-    df = DataFrame([[0, 1, 2, 3]])
-    df.columns = [0, 1, 2, 0]
-    gb = df.groupby(df[1])
-    result = gb.describe(percentiles=[])
-
-    columns = ["count", "mean", "std", "min", "50%", "max"]
-    frames = [
-        DataFrame([[1.0, val, np.nan, val, val, val]], index=[1], columns=columns)
-        for val in (0.0, 2.0, 3.0)
-    ]
-    expected = pd.concat(frames, axis=1)
-    expected.columns = MultiIndex(
-        levels=[[0, 2], columns],
-        codes=[6 * [0] + 6 * [1] + 6 * [0], 3 * list(range(6))],
-    )
-    expected.index.names = [1]
-    tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_mean_no_overflow():
-    # Regression test for (#22487)
-    df = DataFrame(
-        {
-            "user": ["A", "A", "A", "A", "A"],
-            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
-        }
-    )
-    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
-
-
 @pytest.mark.parametrize(
     "values",
     [
@@ -1393,78 +565,6 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("min_count", [0, 10])
-def test_groupby_sum_mincount_boolean(min_count):
-    b = True
-    a = False
-    na = np.nan
-    dfg = pd.array([b, b, na, na, a, a, b], dtype="boolean")
-
-    df = DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": dfg})
-    result = df.groupby("A").sum(min_count=min_count)
-    if min_count == 0:
-        expected = DataFrame(
-            {"B": pd.array([3, 0, 0], dtype="Int64")},
-            index=Index([1, 2, 3], name="A"),
-        )
-        tm.assert_frame_equal(result, expected)
-    else:
-        expected = DataFrame(
-            {"B": pd.array([pd.NA] * 3, dtype="Int64")},
-            index=Index([1, 2, 3], name="A"),
-        )
-        tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_sum_below_mincount_nullable_integer():
-    # https://github.com/pandas-dev/pandas/issues/32861
-    df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64")
-    grouped = df.groupby("a")
-    idx = Index([0, 1, 2], name="a", dtype="Int64")
-
-    result = grouped["b"].sum(min_count=2)
-    expected = Series([pd.NA] * 3, dtype="Int64", index=idx, name="b")
-    tm.assert_series_equal(result, expected)
-
-    result = grouped.sum(min_count=2)
-    expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_mean_on_timedelta():
-    # GH 17382
-    df = DataFrame({"time": pd.to_timedelta(range(10)), "cat": ["A", "B"] * 5})
-    result = df.groupby("cat")["time"].mean()
-    expected = Series(
-        pd.to_timedelta([4, 5]), name="time", index=Index(["A", "B"], name="cat")
-    )
-    tm.assert_series_equal(result, expected)
-
-
-def test_groupby_sum_timedelta_with_nat():
-    # GH#42659
-    df = DataFrame(
-        {
-            "a": [1, 1, 2, 2],
-            "b": [pd.Timedelta("1d"), pd.Timedelta("2d"), pd.Timedelta("3d"), pd.NaT],
-        }
-    )
-    td3 = pd.Timedelta(days=3)
-
-    gb = df.groupby("a")
-
-    res = gb.sum()
-    expected = DataFrame({"b": [td3, td3]}, index=Index([1, 2], name="a"))
-    tm.assert_frame_equal(res, expected)
-
-    res = gb["b"].sum()
-    tm.assert_series_equal(res, expected["b"])
-
-    res = gb["b"].sum(min_count=2)
-    expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index)
-    tm.assert_series_equal(res, expected)
-
-
 @pytest.mark.parametrize(
     "kernel, has_arg",
     [
@@ -1706,22 +806,6 @@ def test_groupby_empty_dataset(dtype, kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-def test_corrwith_with_1_axis():
-    # GH 47723
-    df = DataFrame({"a": [1, 1, 2], "b": [3, 7, 4]})
-    gb = df.groupby("a")
-
-    msg = "DataFrameGroupBy.corrwith with axis=1 is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = gb.corrwith(df, axis=1)
-    index = Index(
-        data=[(1, 0), (1, 1), (1, 2), (2, 2), (2, 0), (2, 1)],
-        name=("a", None),
-    )
-    expected = Series([np.nan] * 6, index=index)
-    tm.assert_series_equal(result, expected)
-
-
 def test_multiindex_group_all_columns_when_empty(groupby_func):
     # GH 32464
     df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py
deleted file mode 100644
index 30c7e1df1e691..0000000000000
--- a/pandas/tests/groupby/test_min_max.py
+++ /dev/null
@@ -1,272 +0,0 @@
-import numpy as np
-import pytest
-
-from pandas._libs.tslibs import iNaT
-
-import pandas as pd
-from pandas import (
-    DataFrame,
-    Index,
-    Series,
-)
-import pandas._testing as tm
-
-
-def test_max_min_non_numeric():
-    # #2700
-    aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]})
-
-    result = aa.groupby("nn").max()
-    assert "ss" in result
-
-    result = aa.groupby("nn").max(numeric_only=False)
-    assert "ss" in result
-
-    result = aa.groupby("nn").min()
-    assert "ss" in result
-
-    result = aa.groupby("nn").min(numeric_only=False)
-    assert "ss" in result
-
-
-def test_max_min_object_multiple_columns(using_array_manager):
-    # GH#41111 case where the aggregation is valid for some columns but not
-    # others; we split object blocks column-wise, consistent with
-    # DataFrame._reduce
-
-    df = DataFrame(
-        {
-            "A": [1, 1, 2, 2, 3],
-            "B": [1, "foo", 2, "bar", False],
-            "C": ["a", "b", "c", "d", "e"],
-        }
-    )
-    df._consolidate_inplace()  # should already be consolidate, but double-check
-    if not using_array_manager:
-        assert len(df._mgr.blocks) == 2
-
-    gb = df.groupby("A")
-
-    result = gb[["C"]].max()
-    # "max" is valid for column "C" but not for "B"
-    ei = Index([1, 2, 3], name="A")
-    expected = DataFrame({"C": ["b", "d", "e"]}, index=ei)
-    tm.assert_frame_equal(result, expected)
-
-    result = gb[["C"]].min()
-    # "min" is valid for column "C" but not for "B"
-    ei = Index([1, 2, 3], name="A")
-    expected = DataFrame({"C": ["a", "c", "e"]}, index=ei)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_min_date_with_nans():
-    # GH26321
-    dates = pd.to_datetime(
-        Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d"
-    ).dt.date
-    df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates})
-
-    result = df.groupby("b", as_index=False)["c"].min()["c"]
-    expected = pd.to_datetime(
-        Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d"
-    ).dt.date
-    tm.assert_series_equal(result, expected)
-
-    result = df.groupby("b")["c"].min()
-    expected.index.name = "b"
-    tm.assert_series_equal(result, expected)
-
-
-def test_max_inat():
-    # GH#40767 dont interpret iNaT as NaN
-    ser = Series([1, iNaT])
-    key = np.array([1, 1], dtype=np.int64)
-    gb = ser.groupby(key)
-
-    result = gb.max(min_count=2)
-    expected = Series({1: 1}, dtype=np.int64)
-    tm.assert_series_equal(result, expected, check_exact=True)
-
-    result = gb.min(min_count=2)
-    expected = Series({1: iNaT}, dtype=np.int64)
-    tm.assert_series_equal(result, expected, check_exact=True)
-
-    # not enough entries -> gets masked to NaN
-    result = gb.min(min_count=3)
-    expected = Series({1: np.nan})
-    tm.assert_series_equal(result, expected, check_exact=True)
-
-
-def test_max_inat_not_all_na():
-    # GH#40767 dont interpret iNaT as NaN
-
-    # make sure we dont round iNaT+1 to iNaT
-    ser = Series([1, iNaT, 2, iNaT + 1])
-    gb = ser.groupby([1, 2, 3, 3])
-    result = gb.min(min_count=2)
-
-    # Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy
-    expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1})
-    expected.index = expected.index.astype(int)
-    tm.assert_series_equal(result, expected, check_exact=True)
-
-
-@pytest.mark.parametrize("func", ["min", "max"])
-def test_groupby_aggregate_period_column(func):
-    # GH 31471
-    groups = [1, 2]
-    periods = pd.period_range("2020", periods=2, freq="Y")
-    df = DataFrame({"a": groups, "b": periods})
-
-    result = getattr(df.groupby("a")["b"], func)()
-    idx = Index([1, 2], name="a")
-    expected = Series(periods, index=idx, name="b")
-
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("func", ["min", "max"])
-def test_groupby_aggregate_period_frame(func):
-    # GH 31471
-    groups = [1, 2]
-    periods = pd.period_range("2020", periods=2, freq="Y")
-    df = DataFrame({"a": groups, "b": periods})
-
-    result = getattr(df.groupby("a"), func)()
-    idx = Index([1, 2], name="a")
-    expected = DataFrame({"b": periods}, index=idx)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_aggregate_numeric_object_dtype():
-    # https://github.com/pandas-dev/pandas/issues/39329
-    # simplified case: multiple object columns where one is all-NaN
-    # -> gets split as the all-NaN is inferred as float
-    df = DataFrame(
-        {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
-    ).astype(object)
-    result = df.groupby("key").min()
-    expected = (
-        DataFrame(
-            {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]},
-        )
-        .set_index("key")
-        .astype(object)
-    )
-    tm.assert_frame_equal(result, expected)
-
-    # same but with numbers
-    df = DataFrame(
-        {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
-    ).astype(object)
-    result = df.groupby("key").min()
-    expected = (
-        DataFrame({"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]})
-        .set_index("key")
-        .astype(object)
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("func", ["min", "max"])
-def test_aggregate_categorical_lost_index(func: str):
-    # GH: 28641 groupby drops index, when grouping over categorical column with min/max
-    ds = Series(["b"], dtype="category").cat.as_ordered()
-    df = DataFrame({"A": [1997], "B": ds})
-    result = df.groupby("A").agg({"B": func})
-    expected = DataFrame({"B": ["b"]}, index=Index([1997], name="A"))
-
-    # ordered categorical dtype should be preserved
-    expected["B"] = expected["B"].astype(ds.dtype)
-
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float64", "Float32", "boolean"])
-def test_groupby_min_max_nullable(dtype):
-    if dtype == "Int64":
-        # GH#41743 avoid precision loss
-        ts = 1618556707013635762
-    elif dtype == "boolean":
-        ts = 0
-    else:
-        ts = 4.0
-
-    df = DataFrame({"id": [2, 2], "ts": [ts, ts + 1]})
-    df["ts"] = df["ts"].astype(dtype)
-
-    gb = df.groupby("id")
-
-    result = gb.min()
-    expected = df.iloc[:1].set_index("id")
-    tm.assert_frame_equal(result, expected)
-
-    res_max = gb.max()
-    expected_max = df.iloc[1:].set_index("id")
-    tm.assert_frame_equal(res_max, expected_max)
-
-    result2 = gb.min(min_count=3)
-    expected2 = DataFrame({"ts": [pd.NA]}, index=expected.index, dtype=dtype)
-    tm.assert_frame_equal(result2, expected2)
-
-    res_max2 = gb.max(min_count=3)
-    tm.assert_frame_equal(res_max2, expected2)
-
-    # Case with NA values
-    df2 = DataFrame({"id": [2, 2, 2], "ts": [ts, pd.NA, ts + 1]})
-    df2["ts"] = df2["ts"].astype(dtype)
-    gb2 = df2.groupby("id")
-
-    result3 = gb2.min()
-    tm.assert_frame_equal(result3, expected)
-
-    res_max3 = gb2.max()
-    tm.assert_frame_equal(res_max3, expected_max)
-
-    result4 = gb2.min(min_count=100)
-    tm.assert_frame_equal(result4, expected2)
-
-    res_max4 = gb2.max(min_count=100)
-    tm.assert_frame_equal(res_max4, expected2)
-
-
-def test_min_max_nullable_uint64_empty_group():
-    # don't raise NotImplementedError from libgroupby
-    cat = pd.Categorical([0] * 10, categories=[0, 1])
-    df = DataFrame({"A": cat, "B": pd.array(np.arange(10, dtype=np.uint64))})
-    gb = df.groupby("A", observed=False)
-
-    res = gb.min()
-
-    idx = pd.CategoricalIndex([0, 1], dtype=cat.dtype, name="A")
-    expected = DataFrame({"B": pd.array([0, pd.NA], dtype="UInt64")}, index=idx)
-    tm.assert_frame_equal(res, expected)
-
-    res = gb.max()
-    expected.iloc[0, 0] = 9
-    tm.assert_frame_equal(res, expected)
-
-
-@pytest.mark.parametrize("func", ["first", "last", "min", "max"])
-def test_groupby_min_max_categorical(func):
-    # GH: 52151
-    df = DataFrame(
-        {
-            "col1": pd.Categorical(["A"], categories=list("AB"), ordered=True),
-            "col2": pd.Categorical([1], categories=[1, 2], ordered=True),
-            "value": 0.1,
-        }
-    )
-    result = getattr(df.groupby("col1", observed=False), func)()
-
-    idx = pd.CategoricalIndex(data=["A", "B"], name="col1", ordered=True)
-    expected = DataFrame(
-        {
-            "col2": pd.Categorical([1, None], categories=[1, 2], ordered=True),
-            "value": [0.1, None],
-        },
-        index=idx,
-    )
-    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py
deleted file mode 100644
index 9c9e32d9ce226..0000000000000
--- a/pandas/tests/groupby/test_nunique.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import datetime as dt
-from string import ascii_lowercase
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas import (
-    DataFrame,
-    MultiIndex,
-    NaT,
-    Series,
-    Timestamp,
-    date_range,
-)
-import pandas._testing as tm
-
-
-@pytest.mark.slow
-@pytest.mark.parametrize("sort", [False, True])
-@pytest.mark.parametrize("dropna", [False, True])
-@pytest.mark.parametrize("as_index", [True, False])
-@pytest.mark.parametrize("with_nan", [True, False])
-@pytest.mark.parametrize("keys", [["joe"], ["joe", "jim"]])
-def test_series_groupby_nunique(sort, dropna, as_index, with_nan, keys):
-    n = 100
-    m = 10
-    days = date_range("2015-08-23", periods=10)
-    df = DataFrame(
-        {
-            "jim": np.random.default_rng(2).choice(list(ascii_lowercase), n),
-            "joe": np.random.default_rng(2).choice(days, n),
-            "julie": np.random.default_rng(2).integers(0, m, n),
-        }
-    )
-    if with_nan:
-        df = df.astype({"julie": float})  # Explicit cast to avoid implicit cast below
-        df.loc[1::17, "jim"] = None
-        df.loc[3::37, "joe"] = None
-        df.loc[7::19, "julie"] = None
-        df.loc[8::19, "julie"] = None
-        df.loc[9::19, "julie"] = None
-    original_df = df.copy()
-    gr = df.groupby(keys, as_index=as_index, sort=sort)
-    left = gr["julie"].nunique(dropna=dropna)
-
-    gr = df.groupby(keys, as_index=as_index, sort=sort)
-    right = gr["julie"].apply(Series.nunique, dropna=dropna)
-    if not as_index:
-        right = right.reset_index(drop=True)
-
-    if as_index:
-        tm.assert_series_equal(left, right, check_names=False)
-    else:
-        tm.assert_frame_equal(left, right, check_names=False)
-    tm.assert_frame_equal(df, original_df)
-
-
-def test_nunique():
-    df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")})
-
-    expected = DataFrame({"A": list("abc"), "B": [1, 2, 1], "C": [1, 1, 2]})
-    result = df.groupby("A", as_index=False).nunique()
-    tm.assert_frame_equal(result, expected)
-
-    # as_index
-    expected.index = list("abc")
-    expected.index.name = "A"
-    expected = expected.drop(columns="A")
-    result = df.groupby("A").nunique()
-    tm.assert_frame_equal(result, expected)
-
-    # with na
-    result = df.replace({"x": None}).groupby("A").nunique(dropna=False)
-    tm.assert_frame_equal(result, expected)
-
-    # dropna
-    expected = DataFrame({"B": [1] * 3, "C": [1] * 3}, index=list("abc"))
-    expected.index.name = "A"
-    result = df.replace({"x": None}).groupby("A").nunique()
-    tm.assert_frame_equal(result, expected)
-
-
-def test_nunique_with_object():
-    # GH 11077
-    data = DataFrame(
-        [
-            [100, 1, "Alice"],
-            [200, 2, "Bob"],
-            [300, 3, "Charlie"],
-            [-400, 4, "Dan"],
-            [500, 5, "Edith"],
-        ],
-        columns=["amount", "id", "name"],
-    )
-
-    result = data.groupby(["id", "amount"])["name"].nunique()
-    index = MultiIndex.from_arrays([data.id, data.amount])
-    expected = Series([1] * 5, name="name", index=index)
-    tm.assert_series_equal(result, expected)
-
-
-def test_nunique_with_empty_series():
-    # GH 12553
-    data = Series(name="name", dtype=object)
-    result = data.groupby(level=0).nunique()
-    expected = Series(name="name", dtype="int64")
-    tm.assert_series_equal(result, expected)
-
-
-def test_nunique_with_timegrouper():
-    # GH 13453
-    test = DataFrame(
-        {
-            "time": [
-                Timestamp("2016-06-28 09:35:35"),
-                Timestamp("2016-06-28 16:09:30"),
-                Timestamp("2016-06-28 16:46:28"),
-            ],
-            "data": ["1", "2", "3"],
-        }
-    ).set_index("time")
-    result = test.groupby(pd.Grouper(freq="h"))["data"].nunique()
-    expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(Series.nunique)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "key, data, dropna, expected",
-    [
-        (
-            ["x", "x", "x"],
-            [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")],
-            True,
-            Series([1], index=pd.Index(["x"], name="key"), name="data"),
-        ),
-        (
-            ["x", "x", "x"],
-            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
-            True,
-            Series([1], index=pd.Index(["x"], name="key"), name="data"),
-        ),
-        (
-            ["x", "x", "x", "y", "y"],
-            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
-            False,
-            Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"),
-        ),
-        (
-            ["x", "x", "x", "x", "y"],
-            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
-            False,
-            Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"),
-        ),
-    ],
-)
-def test_nunique_with_NaT(key, data, dropna, expected):
-    # GH 27951
-    df = DataFrame({"key": key, "data": data})
-    result = df.groupby(["key"])["data"].nunique(dropna=dropna)
-    tm.assert_series_equal(result, expected)
-
-
-def test_nunique_preserves_column_level_names():
-    # GH 23222
-    test = DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))
-    result = test.groupby([0, 0, 0]).nunique()
-    expected = DataFrame([2], index=np.array([0]), columns=test.columns)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_nunique_transform_with_datetime():
-    # GH 35109 - transform with nunique on datetimes results in integers
-    df = DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"])
-    result = df.groupby([0, 0, 1])["date"].transform("nunique")
-    expected = Series([2, 2, 1], name="date")
-    tm.assert_series_equal(result, expected)
-
-
-def test_empty_categorical(observed):
-    # GH#21334
-    cat = Series([1]).astype("category")
-    ser = cat[:0]
-    gb = ser.groupby(ser, observed=observed)
-    result = gb.nunique()
-    if observed:
-        expected = Series([], index=cat[:0], dtype="int64")
-    else:
-        expected = Series([0], index=cat, dtype="int64")
-    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
new file mode 100644
index 0000000000000..fdfb211ac2269
--- /dev/null
+++ b/pandas/tests/groupby/test_reductions.py
@@ -0,0 +1,838 @@
+import builtins
+import datetime as dt
+from io import StringIO
+from string import ascii_lowercase
+
+import numpy as np
+import pytest
+
+from pandas._libs.tslibs import iNaT
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    MultiIndex,
+    Series,
+    Timestamp,
+    date_range,
+    isna,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("agg_func", ["any", "all"])
+@pytest.mark.parametrize(
+    "vals",
+    [
+        ["foo", "bar", "baz"],
+        ["foo", "", ""],
+        ["", "", ""],
+        [1, 2, 3],
+        [1, 0, 0],
+        [0, 0, 0],
+        [1.0, 2.0, 3.0],
+        [1.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0],
+        [True, True, True],
+        [True, False, False],
+        [False, False, False],
+        [np.nan, np.nan, np.nan],
+    ],
+)
+def test_groupby_bool_aggs(skipna, agg_func, vals):
+    df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2})
+
+    # Figure out expectation using Python builtin
+    exp = getattr(builtins, agg_func)(vals)
+
+    # edge case for missing data with skipna and 'any'
+    if skipna and all(isna(vals)) and agg_func == "any":
+        exp = False
+
+    expected = DataFrame(
+        [exp] * 2, columns=["val"], index=pd.Index(["a", "b"], name="key")
+    )
+    result = getattr(df.groupby("key"), agg_func)(skipna=skipna)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_any():
+    df = DataFrame(
+        [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]],
+        columns=["A", "B", "C"],
+    )
+    expected = DataFrame(
+        [[True, True], [False, True]], columns=["B", "C"], index=[1, 3]
+    )
+    expected.index.name = "A"
+    result = df.groupby("A").any()
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+def test_bool_aggs_dup_column_labels(bool_agg_func):
+    # GH#21668
+    df = DataFrame([[True, True]], columns=["a", "a"])
+    grp_by = df.groupby([0])
+    result = getattr(grp_by, bool_agg_func)()
+
+    expected = df.set_axis(np.array([0]))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [False, False, False],
+        [True, True, True],
+        [pd.NA, pd.NA, pd.NA],
+        [False, pd.NA, False],
+        [True, pd.NA, True],
+        [True, pd.NA, False],
+    ],
+)
+def test_masked_kleene_logic(bool_agg_func, skipna, data):
+    # GH#37506
+    ser = Series(data, dtype="boolean")
+
+    # The result should match aggregating on the whole series. Correctness
+    # there is verified in test_reductions.py::test_any_all_boolean_kleene_logic
+    expected_data = getattr(ser, bool_agg_func)(skipna=skipna)
+    expected = Series(expected_data, index=np.array([0]), dtype="boolean")
+
+    result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype1,dtype2,exp_col1,exp_col2",
+    [
+        (
+            "float",
+            "Float64",
+            np.array([True], dtype=bool),
+            pd.array([pd.NA], dtype="boolean"),
+        ),
+        (
+            "Int64",
+            "float",
+            pd.array([pd.NA], dtype="boolean"),
+            np.array([True], dtype=bool),
+        ),
+        (
+            "Int64",
+            "Int64",
+            pd.array([pd.NA], dtype="boolean"),
+            pd.array([pd.NA], dtype="boolean"),
+        ),
+        (
+            "Float64",
+            "boolean",
+            pd.array([pd.NA], dtype="boolean"),
+            pd.array([pd.NA], dtype="boolean"),
+        ),
+    ],
+)
+def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2):
+    # GH#37506
+    data = [1.0, np.nan]
+    df = DataFrame(
+        {"col1": pd.array(data, dtype=dtype1), "col2": pd.array(data, dtype=dtype2)}
+    )
+    result = df.groupby([1, 1]).agg("all", skipna=False)
+
+    expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=np.array([1]))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+def test_masked_bool_aggs_skipna(bool_agg_func, dtype, skipna, frame_or_series):
+    # GH#40585
+    obj = frame_or_series([pd.NA, 1], dtype=dtype)
+    expected_res = True
+    if not skipna and bool_agg_func == "all":
+        expected_res = pd.NA
+    expected = frame_or_series([expected_res], index=np.array([1]), dtype="boolean")
+
+    result = obj.groupby([1, 1]).agg(bool_agg_func, skipna=skipna)
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "bool_agg_func,data,expected_res",
+    [
+        ("any", [pd.NA, np.nan], False),
+        ("any", [pd.NA, 1, np.nan], True),
+        ("all", [pd.NA, pd.NaT], True),
+        ("all", [pd.NA, False, pd.NaT], False),
+    ],
+)
+def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_series):
+    # GH#37501
+    obj = frame_or_series(data, dtype=object)
+    result = obj.groupby([1] * len(data)).agg(bool_agg_func)
+    expected = frame_or_series([expected_res], index=np.array([1]), dtype="bool")
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+def test_object_NA_raises_with_skipna_false(bool_agg_func):
+    # GH#37501
+    ser = Series([pd.NA], dtype=object)
+    with pytest.raises(TypeError, match="boolean value of NA is ambiguous"):
+        ser.groupby([1]).agg(bool_agg_func, skipna=False)
+
+
+@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
+def test_empty(frame_or_series, bool_agg_func):
+    # GH 45231
+    kwargs = {"columns": ["a"]} if frame_or_series is DataFrame else {"name": "a"}
+    obj = frame_or_series(**kwargs, dtype=object)
+    result = getattr(obj.groupby(obj.index), bool_agg_func)()
+    expected = frame_or_series(**kwargs, dtype=bool)
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "func, values",
+    [
+        ("idxmin", {"c_int": [0, 2], "c_float": [1, 3], "c_date": [1, 2]}),
+        ("idxmax", {"c_int": [1, 3], "c_float": [0, 2], "c_date": [0, 3]}),
+    ],
+)
+@pytest.mark.parametrize("numeric_only", [True, False])
+def test_idxmin_idxmax_returns_int_types(func, values, numeric_only):
+    # GH 25444
+    df = DataFrame(
+        {
+            "name": ["A", "A", "B", "B"],
+            "c_int": [1, 2, 3, 4],
+            "c_float": [4.02, 3.03, 2.04, 1.05],
+            "c_date": ["2019", "2018", "2016", "2017"],
+        }
+    )
+    df["c_date"] = pd.to_datetime(df["c_date"])
+    df["c_date_tz"] = df["c_date"].dt.tz_localize("US/Pacific")
+    df["c_timedelta"] = df["c_date"] - df["c_date"].iloc[0]
+    df["c_period"] = df["c_date"].dt.to_period("W")
+    df["c_Integer"] = df["c_int"].astype("Int64")
+    df["c_Floating"] = df["c_float"].astype("Float64")
+
+    result = getattr(df.groupby("name"), func)(numeric_only=numeric_only)
+
+    expected = DataFrame(values, index=pd.Index(["A", "B"], name="name"))
+    if numeric_only:
+        expected = expected.drop(columns=["c_date"])
+    else:
+        expected["c_date_tz"] = expected["c_date"]
+        expected["c_timedelta"] = expected["c_date"]
+        expected["c_period"] = expected["c_date"]
+    expected["c_Integer"] = expected["c_int"]
+    expected["c_Floating"] = expected["c_float"]
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_idxmin_idxmax_axis1():
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]
+    )
+    df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4]
+
+    gb = df.groupby("A")
+
+    warn_msg = "DataFrameGroupBy.idxmax with axis=1 is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+        res = gb.idxmax(axis=1)
+
+    alt = df.iloc[:, 1:].idxmax(axis=1)
+    indexer = res.index.get_level_values(1)
+
+    tm.assert_series_equal(alt[indexer], res.droplevel("A"))
+
+    df["E"] = date_range("2016-01-01", periods=10)
+    gb2 = df.groupby("A")
+
+    msg = "'>' not supported between instances of 'Timestamp' and 'float'"
+    with pytest.raises(TypeError, match=msg):
+        with tm.assert_produces_warning(FutureWarning, match=warn_msg):
+            gb2.idxmax(axis=1)
+
+
+def test_groupby_mean_no_overflow():
+    # Regression test for (#22487)
+    df = DataFrame(
+        {
+            "user": ["A", "A", "A", "A", "A"],
+            "connections": [4970, 4749, 4719, 4704, 18446744073699999744],
+        }
+    )
+    assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840
+
+
+def test_mean_on_timedelta():
+    # GH 17382
+    df = DataFrame({"time": pd.to_timedelta(range(10)), "cat": ["A", "B"] * 5})
+    result = df.groupby("cat")["time"].mean()
+    expected = Series(
+        pd.to_timedelta([4, 5]), name="time", index=pd.Index(["A", "B"], name="cat")
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_cython_median():
+    arr = np.random.default_rng(2).standard_normal(1000)
+    arr[::2] = np.nan
+    df = DataFrame(arr)
+
+    labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float)
+    labels[::17] = np.nan
+
+    result = df.groupby(labels).median()
+    msg = "using DataFrameGroupBy.median"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        exp = df.groupby(labels).agg(np.nanmedian)
+    tm.assert_frame_equal(result, exp)
+
+    df = DataFrame(np.random.default_rng(2).standard_normal((1000, 5)))
+    msg = "using DataFrameGroupBy.median"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        rs = df.groupby(labels).agg(np.median)
+    xp = df.groupby(labels).median()
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_median_empty_bins(observed):
+    df = DataFrame(np.random.default_rng(2).integers(0, 44, 500))
+
+    grps = range(0, 55, 5)
+    bins = pd.cut(df[0], grps)
+
+    result = df.groupby(bins, observed=observed).median()
+    expected = df.groupby(bins, observed=observed).agg(lambda x: x.median())
+    tm.assert_frame_equal(result, expected)
+
+
+def test_max_min_non_numeric():
+    # #2700
+    aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]})
+
+    result = aa.groupby("nn").max()
+    assert "ss" in result
+
+    result = aa.groupby("nn").max(numeric_only=False)
+    assert "ss" in result
+
+    result = aa.groupby("nn").min()
+    assert "ss" in result
+
+    result = aa.groupby("nn").min(numeric_only=False)
+    assert "ss" in result
+
+
+def test_max_min_object_multiple_columns(using_array_manager):
+    # GH#41111 case where the aggregation is valid for some columns but not
+    # others; we split object blocks column-wise, consistent with
+    # DataFrame._reduce
+
+    df = DataFrame(
+        {
+            "A": [1, 1, 2, 2, 3],
+            "B": [1, "foo", 2, "bar", False],
+            "C": ["a", "b", "c", "d", "e"],
+        }
+    )
+    df._consolidate_inplace()  # should already be consolidate, but double-check
+    if not using_array_manager:
+        assert len(df._mgr.blocks) == 2
+
+    gb = df.groupby("A")
+
+    result = gb[["C"]].max()
+    # "max" is valid for column "C" but not for "B"
+    ei = pd.Index([1, 2, 3], name="A")
+    expected = DataFrame({"C": ["b", "d", "e"]}, index=ei)
+    tm.assert_frame_equal(result, expected)
+
+    result = gb[["C"]].min()
+    # "min" is valid for column "C" but not for "B"
+    ei = pd.Index([1, 2, 3], name="A")
+    expected = DataFrame({"C": ["a", "c", "e"]}, index=ei)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_min_date_with_nans():
+    # GH26321
+    dates = pd.to_datetime(
+        Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d"
+    ).dt.date
+    df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates})
+
+    result = df.groupby("b", as_index=False)["c"].min()["c"]
+    expected = pd.to_datetime(
+        Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d"
+    ).dt.date
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby("b")["c"].min()
+    expected.index.name = "b"
+    tm.assert_series_equal(result, expected)
+
+
+def test_max_inat():
+    # GH#40767 dont interpret iNaT as NaN
+    ser = Series([1, iNaT])
+    key = np.array([1, 1], dtype=np.int64)
+    gb = ser.groupby(key)
+
+    result = gb.max(min_count=2)
+    expected = Series({1: 1}, dtype=np.int64)
+    tm.assert_series_equal(result, expected, check_exact=True)
+
+    result = gb.min(min_count=2)
+    expected = Series({1: iNaT}, dtype=np.int64)
+    tm.assert_series_equal(result, expected, check_exact=True)
+
+    # not enough entries -> gets masked to NaN
+    result = gb.min(min_count=3)
+    expected = Series({1: np.nan})
+    tm.assert_series_equal(result, expected, check_exact=True)
+
+
+def test_max_inat_not_all_na():
+    # GH#40767 dont interpret iNaT as NaN
+
+    # make sure we dont round iNaT+1 to iNaT
+    ser = Series([1, iNaT, 2, iNaT + 1])
+    gb = ser.groupby([1, 2, 3, 3])
+    result = gb.min(min_count=2)
+
+    # Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy
+    expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1})
+    expected.index = expected.index.astype(int)
+    tm.assert_series_equal(result, expected, check_exact=True)
+
+
+@pytest.mark.parametrize("func", ["min", "max"])
+def test_groupby_aggregate_period_column(func):
+    # GH 31471
+    groups = [1, 2]
+    periods = pd.period_range("2020", periods=2, freq="Y")
+    df = DataFrame({"a": groups, "b": periods})
+
+    result = getattr(df.groupby("a")["b"], func)()
+    idx = pd.Index([1, 2], name="a")
+    expected = Series(periods, index=idx, name="b")
+
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", ["min", "max"])
+def test_groupby_aggregate_period_frame(func):
+    # GH 31471
+    groups = [1, 2]
+    periods = pd.period_range("2020", periods=2, freq="Y")
+    df = DataFrame({"a": groups, "b": periods})
+
+    result = getattr(df.groupby("a"), func)()
+    idx = pd.Index([1, 2], name="a")
+    expected = DataFrame({"b": periods}, index=idx)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_aggregate_numeric_object_dtype():
+    # https://github.com/pandas-dev/pandas/issues/39329
+    # simplified case: multiple object columns where one is all-NaN
+    # -> gets split as the all-NaN is inferred as float
+    df = DataFrame(
+        {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
+    ).astype(object)
+    result = df.groupby("key").min()
+    expected = (
+        DataFrame(
+            {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]},
+        )
+        .set_index("key")
+        .astype(object)
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # same but with numbers
+    df = DataFrame(
+        {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
+    ).astype(object)
+    result = df.groupby("key").min()
+    expected = (
+        DataFrame({"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]})
+        .set_index("key")
+        .astype(object)
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", ["min", "max"])
+def test_aggregate_categorical_lost_index(func: str):
+    # GH: 28641 groupby drops index, when grouping over categorical column with min/max
+    ds = Series(["b"], dtype="category").cat.as_ordered()
+    df = DataFrame({"A": [1997], "B": ds})
+    result = df.groupby("A").agg({"B": func})
+    expected = DataFrame({"B": ["b"]}, index=pd.Index([1997], name="A"))
+
+    # ordered categorical dtype should be preserved
+    expected["B"] = expected["B"].astype(ds.dtype)
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float64", "Float32", "boolean"])
+def test_groupby_min_max_nullable(dtype):
+    if dtype == "Int64":
+        # GH#41743 avoid precision loss
+        ts = 1618556707013635762
+    elif dtype == "boolean":
+        ts = 0
+    else:
+        ts = 4.0
+
+    df = DataFrame({"id": [2, 2], "ts": [ts, ts + 1]})
+    df["ts"] = df["ts"].astype(dtype)
+
+    gb = df.groupby("id")
+
+    result = gb.min()
+    expected = df.iloc[:1].set_index("id")
+    tm.assert_frame_equal(result, expected)
+
+    res_max = gb.max()
+    expected_max = df.iloc[1:].set_index("id")
+    tm.assert_frame_equal(res_max, expected_max)
+
+    result2 = gb.min(min_count=3)
+    expected2 = DataFrame({"ts": [pd.NA]}, index=expected.index, dtype=dtype)
+    tm.assert_frame_equal(result2, expected2)
+
+    res_max2 = gb.max(min_count=3)
+    tm.assert_frame_equal(res_max2, expected2)
+
+    # Case with NA values
+    df2 = DataFrame({"id": [2, 2, 2], "ts": [ts, pd.NA, ts + 1]})
+    df2["ts"] = df2["ts"].astype(dtype)
+    gb2 = df2.groupby("id")
+
+    result3 = gb2.min()
+    tm.assert_frame_equal(result3, expected)
+
+    res_max3 = gb2.max()
+    tm.assert_frame_equal(res_max3, expected_max)
+
+    result4 = gb2.min(min_count=100)
+    tm.assert_frame_equal(result4, expected2)
+
+    res_max4 = gb2.max(min_count=100)
+    tm.assert_frame_equal(res_max4, expected2)
+
+
+def test_min_max_nullable_uint64_empty_group():
+    # don't raise NotImplementedError from libgroupby
+    cat = pd.Categorical([0] * 10, categories=[0, 1])
+    df = DataFrame({"A": cat, "B": pd.array(np.arange(10, dtype=np.uint64))})
+    gb = df.groupby("A", observed=False)
+
+    res = gb.min()
+
+    idx = pd.CategoricalIndex([0, 1], dtype=cat.dtype, name="A")
+    expected = DataFrame({"B": pd.array([0, pd.NA], dtype="UInt64")}, index=idx)
+    tm.assert_frame_equal(res, expected)
+
+    res = gb.max()
+    expected.iloc[0, 0] = 9
+    tm.assert_frame_equal(res, expected)
+
+
+@pytest.mark.parametrize("func", ["first", "last", "min", "max"])
+def test_groupby_min_max_categorical(func):
+    # GH: 52151
+    df = DataFrame(
+        {
+            "col1": pd.Categorical(["A"], categories=list("AB"), ordered=True),
+            "col2": pd.Categorical([1], categories=[1, 2], ordered=True),
+            "value": 0.1,
+        }
+    )
+    result = getattr(df.groupby("col1", observed=False), func)()
+
+    idx = pd.CategoricalIndex(data=["A", "B"], name="col1", ordered=True)
+    expected = DataFrame(
+        {
+            "col2": pd.Categorical([1, None], categories=[1, 2], ordered=True),
+            "value": [0.1, None],
+        },
+        index=idx,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_max_nan_bug():
+    raw = """,Date,app,File
+-04-23,2013-04-23 00:00:00,,log080001.log
+-05-06,2013-05-06 00:00:00,,log.log
+-05-07,2013-05-07 00:00:00,OE,xlsx"""
+
+    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+        df = pd.read_csv(StringIO(raw), parse_dates=[0])
+    gb = df.groupby("Date")
+    r = gb[["File"]].max()
+    e = gb["File"].max().to_frame()
+    tm.assert_frame_equal(r, e)
+    assert not r["File"].isna().any()
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("sort", [False, True])
+@pytest.mark.parametrize("dropna", [False, True])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("with_nan", [True, False])
+@pytest.mark.parametrize("keys", [["joe"], ["joe", "jim"]])
+def test_series_groupby_nunique(sort, dropna, as_index, with_nan, keys):
+    n = 100
+    m = 10
+    days = date_range("2015-08-23", periods=10)
+    df = DataFrame(
+        {
+            "jim": np.random.default_rng(2).choice(list(ascii_lowercase), n),
+            "joe": np.random.default_rng(2).choice(days, n),
+            "julie": np.random.default_rng(2).integers(0, m, n),
+        }
+    )
+    if with_nan:
+        df = df.astype({"julie": float})  # Explicit cast to avoid implicit cast below
+        df.loc[1::17, "jim"] = None
+        df.loc[3::37, "joe"] = None
+        df.loc[7::19, "julie"] = None
+        df.loc[8::19, "julie"] = None
+        df.loc[9::19, "julie"] = None
+    original_df = df.copy()
+    gr = df.groupby(keys, as_index=as_index, sort=sort)
+    left = gr["julie"].nunique(dropna=dropna)
+
+    gr = df.groupby(keys, as_index=as_index, sort=sort)
+    right = gr["julie"].apply(Series.nunique, dropna=dropna)
+    if not as_index:
+        right = right.reset_index(drop=True)
+
+    if as_index:
+        tm.assert_series_equal(left, right, check_names=False)
+    else:
+        tm.assert_frame_equal(left, right, check_names=False)
+    tm.assert_frame_equal(df, original_df)
+
+
+def test_nunique():
+    df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")})
+
+    expected = DataFrame({"A": list("abc"), "B": [1, 2, 1], "C": [1, 1, 2]})
+    result = df.groupby("A", as_index=False).nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # as_index
+    expected.index = list("abc")
+    expected.index.name = "A"
+    expected = expected.drop(columns="A")
+    result = df.groupby("A").nunique()
+    tm.assert_frame_equal(result, expected)
+
+    # with na
+    result = df.replace({"x": None}).groupby("A").nunique(dropna=False)
+    tm.assert_frame_equal(result, expected)
+
+    # dropna
+    expected = DataFrame({"B": [1] * 3, "C": [1] * 3}, index=list("abc"))
+    expected.index.name = "A"
+    result = df.replace({"x": None}).groupby("A").nunique()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nunique_with_object():
+    # GH 11077
+    data = DataFrame(
+        [
+            [100, 1, "Alice"],
+            [200, 2, "Bob"],
+            [300, 3, "Charlie"],
+            [-400, 4, "Dan"],
+            [500, 5, "Edith"],
+        ],
+        columns=["amount", "id", "name"],
+    )
+
+    result = data.groupby(["id", "amount"])["name"].nunique()
+    index = MultiIndex.from_arrays([data.id, data.amount])
+    expected = Series([1] * 5, name="name", index=index)
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_empty_series():
+    # GH 12553
+    data = Series(name="name", dtype=object)
+    result = data.groupby(level=0).nunique()
+    expected = Series(name="name", dtype="int64")
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_with_timegrouper():
+    # GH 13453
+    test = DataFrame(
+        {
+            "time": [
+                Timestamp("2016-06-28 09:35:35"),
+                Timestamp("2016-06-28 16:09:30"),
+                Timestamp("2016-06-28 16:46:28"),
+            ],
+            "data": ["1", "2", "3"],
+        }
+    ).set_index("time")
+    result = test.groupby(pd.Grouper(freq="h"))["data"].nunique()
+    expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(Series.nunique)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "key, data, dropna, expected",
+    [
+        (
+            ["x", "x", "x"],
+            [Timestamp("2019-01-01"), pd.NaT, Timestamp("2019-01-01")],
+            True,
+            Series([1], index=pd.Index(["x"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x"],
+            [dt.date(2019, 1, 1), pd.NaT, dt.date(2019, 1, 1)],
+            True,
+            Series([1], index=pd.Index(["x"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x", "y", "y"],
+            [
+                dt.date(2019, 1, 1),
+                pd.NaT,
+                dt.date(2019, 1, 1),
+                pd.NaT,
+                dt.date(2019, 1, 1),
+            ],
+            False,
+            Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x", "x", "y"],
+            [
+                dt.date(2019, 1, 1),
+                pd.NaT,
+                dt.date(2019, 1, 1),
+                pd.NaT,
+                dt.date(2019, 1, 1),
+            ],
+            False,
+            Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"),
+        ),
+    ],
+)
+def test_nunique_with_NaT(key, data, dropna, expected):
+    # GH 27951
+    df = DataFrame({"key": key, "data": data})
+    result = df.groupby(["key"])["data"].nunique(dropna=dropna)
+    tm.assert_series_equal(result, expected)
+
+
+def test_nunique_preserves_column_level_names():
+    # GH 23222
+    test = DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))
+    result = test.groupby([0, 0, 0]).nunique()
+    expected = DataFrame([2], index=np.array([0]), columns=test.columns)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nunique_transform_with_datetime():
+    # GH 35109 - transform with nunique on datetimes results in integers
+    df = DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"])
+    result = df.groupby([0, 0, 1])["date"].transform("nunique")
+    expected = Series([2, 2, 1], name="date")
+    tm.assert_series_equal(result, expected)
+
+
+def test_empty_categorical(observed):
+    # GH#21334
+    cat = Series([1]).astype("category")
+    ser = cat[:0]
+    gb = ser.groupby(ser, observed=observed)
+    result = gb.nunique()
+    if observed:
+        expected = Series([], index=cat[:0], dtype="int64")
+    else:
+        expected = Series([0], index=cat, dtype="int64")
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("min_count", [0, 10])
+def test_groupby_sum_mincount_boolean(min_count):
+    b = True
+    a = False
+    na = np.nan
+    dfg = pd.array([b, b, na, na, a, a, b], dtype="boolean")
+
+    df = DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": dfg})
+    result = df.groupby("A").sum(min_count=min_count)
+    if min_count == 0:
+        expected = DataFrame(
+            {"B": pd.array([3, 0, 0], dtype="Int64")},
+            index=pd.Index([1, 2, 3], name="A"),
+        )
+        tm.assert_frame_equal(result, expected)
+    else:
+        expected = DataFrame(
+            {"B": pd.array([pd.NA] * 3, dtype="Int64")},
+            index=pd.Index([1, 2, 3], name="A"),
+        )
+        tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_sum_below_mincount_nullable_integer():
+    # https://github.com/pandas-dev/pandas/issues/32861
+    df = DataFrame({"a": [0, 1, 2], "b": [0, 1, 2], "c": [0, 1, 2]}, dtype="Int64")
+    grouped = df.groupby("a")
+    idx = pd.Index([0, 1, 2], name="a", dtype="Int64")
+
+    result = grouped["b"].sum(min_count=2)
+    expected = Series([pd.NA] * 3, dtype="Int64", index=idx, name="b")
+    tm.assert_series_equal(result, expected)
+
+    result = grouped.sum(min_count=2)
+    expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_sum_timedelta_with_nat():
+    # GH#42659
+    df = DataFrame(
+        {
+            "a": [1, 1, 2, 2],
+            "b": [pd.Timedelta("1d"), pd.Timedelta("2d"), pd.Timedelta("3d"), pd.NaT],
+        }
+    )
+    td3 = pd.Timedelta(days=3)
+
+    gb = df.groupby("a")
+
+    res = gb.sum()
+    expected = DataFrame({"b": [td3, td3]}, index=pd.Index([1, 2], name="a"))
+    tm.assert_frame_equal(res, expected)
+
+    res = gb["b"].sum()
+    tm.assert_series_equal(res, expected["b"])
+
+    res = gb["b"].sum(min_count=2)
+    expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index)
+    tm.assert_series_equal(res, expected)

From fe07fd5a201c551df679053fbe068302a1337a4a Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Thu, 12 Oct 2023 18:32:58 -0400
Subject: [PATCH 08/19] BLD: Pin numpy to < 2 (#55488)

* BLD: Pin numpy to < 2

* typo

* Update action.yml

* Update unit-tests.yml
---
 .github/actions/build_pandas/action.yml    | 4 ++--
 .github/workflows/unit-tests.yml           | 2 +-
 ci/deps/actions-310.yaml                   | 2 +-
 ci/deps/actions-311-downstream_compat.yaml | 2 +-
 ci/deps/actions-311-pyarrownightly.yaml    | 2 +-
 ci/deps/actions-311.yaml                   | 2 +-
 ci/deps/actions-39-minimum_versions.yaml   | 2 +-
 ci/deps/actions-39.yaml                    | 2 +-
 ci/deps/actions-pypy-39.yaml               | 2 +-
 ci/deps/circle-310-arm64.yaml              | 2 +-
 environment.yml                            | 2 +-
 pyproject.toml                             | 8 ++++----
 requirements-dev.txt                       | 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml
index 73d7723e2fb49..3ee10efaaf96f 100644
--- a/.github/actions/build_pandas/action.yml
+++ b/.github/actions/build_pandas/action.yml
@@ -25,8 +25,8 @@ runs:
     - name: Build Pandas
       run: |
         if [[ ${{ inputs.editable }} == "true" ]]; then
-          pip install -e . --no-build-isolation -v
+          pip install -e . --no-build-isolation -v --no-deps
         else
-          pip install . --no-build-isolation -v
+          pip install . --no-build-isolation -v --no-deps
         fi
       shell: bash -el {0}
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 96010a4a0227d..53a1f5b95374d 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -348,7 +348,7 @@ jobs:
           python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
           python -m pip install versioneer[toml]
           python -m pip install python-dateutil pytz tzdata "cython<3.0.3" hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
-          python -m pip install -ve . --no-build-isolation --no-index
+          python -m pip install -ve . --no-build-isolation --no-index --no-deps
           python -m pip list
 
       - name: Run Tests
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index ebd1556b8a5f5..180d425b07d82 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -20,7 +20,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
 
   # optional dependencies
diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml
index 4d0406814c873..c8a5d8cfb7640 100644
--- a/ci/deps/actions-311-downstream_compat.yaml
+++ b/ci/deps/actions-311-downstream_compat.yaml
@@ -21,7 +21,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
 
   # optional dependencies
diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml
index c259286a5359c..1a770d74043bf 100644
--- a/ci/deps/actions-311-pyarrownightly.yaml
+++ b/ci/deps/actions-311-pyarrownightly.yaml
@@ -19,7 +19,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
   - pip
 
diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml
index f6df5a6e894a7..3d1df9e3bcd59 100644
--- a/ci/deps/actions-311.yaml
+++ b/ci/deps/actions-311.yaml
@@ -20,7 +20,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
 
   # optional dependencies
diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml
index 586768765325e..691bdf25d4d96 100644
--- a/ci/deps/actions-39-minimum_versions.yaml
+++ b/ci/deps/actions-39-minimum_versions.yaml
@@ -22,7 +22,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil=2.8.2
-  - numpy=1.22.4
+  - numpy=1.22.4, <2
   - pytz=2020.1
 
   # optional dependencies
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index 3751651a2a2f2..199ce4dea1ac0 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -20,7 +20,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
 
   # optional dependencies
diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml
index db0723cd3b8fa..a2f4d6395783a 100644
--- a/ci/deps/actions-pypy-39.yaml
+++ b/ci/deps/actions-pypy-39.yaml
@@ -21,7 +21,7 @@ dependencies:
   - hypothesis>=6.46.1
 
   # required
-  - numpy
+  - numpy<2
   - python-dateutil
   - pytz
   - pip:
diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml
index 65f72278a0291..c9f46b98273b3 100644
--- a/ci/deps/circle-310-arm64.yaml
+++ b/ci/deps/circle-310-arm64.yaml
@@ -20,7 +20,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
 
   # optional dependencies
diff --git a/environment.yml b/environment.yml
index db6138c34f37c..a9648f3298198 100644
--- a/environment.yml
+++ b/environment.yml
@@ -21,7 +21,7 @@ dependencies:
 
   # required dependencies
   - python-dateutil
-  - numpy
+  - numpy<2
   - pytz
 
   # optional dependencies
diff --git a/pyproject.toml b/pyproject.toml
index a8388a9ff52de..a5aaa72289209 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ requires = [
     # we don't want to force users to compile with 1.25 though
     # (Ideally, in the future, though, oldest-supported-numpy can be dropped when our min numpy is 1.25.x)
     "oldest-supported-numpy>=2022.8.16; python_version<'3.12'",
-    "numpy>=1.26.0; python_version>='3.12'",
+    "numpy>=1.26.0,<2; python_version>='3.12'",
     "versioneer[toml]"
 ]
 
@@ -29,9 +29,9 @@ authors = [
 license = {file = 'LICENSE'}
 requires-python = '>=3.9'
 dependencies = [
-  "numpy>=1.22.4; python_version<'3.11'",
-  "numpy>=1.23.2; python_version=='3.11'",
-  "numpy>=1.26.0; python_version>='3.12'",
+  "numpy>=1.22.4,<2; python_version<'3.11'",
+  "numpy>=1.23.2,<2; python_version=='3.11'",
+  "numpy>=1.26.0,<2; python_version>='3.12'",
   "python-dateutil>=2.8.2",
   "pytz>=2020.1",
   "tzdata>=2022.1"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 98339f45a5052..6e1a6058dce0e 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -12,7 +12,7 @@ pytest-xdist>=2.2.0
 pytest-asyncio>=0.17.0
 coverage
 python-dateutil
-numpy
+numpy<2
 pytz
 beautifulsoup4>=4.11.1
 blosc

From 579b8268474c5c89133ae7106a323dd94e1e33fa Mon Sep 17 00:00:00 2001
From: Paras Gupta <parasgupta1096@gmail.com>
Date: Fri, 13 Oct 2023 21:57:36 +0530
Subject: [PATCH 09/19] BUG: categorical dtype equality for level in different
 type (#55486)

* BUG: categorical dtype equality for level in different type

* BUG: categorical dtype equality for level in different type - Comments#1
---
 doc/source/whatsnew/v2.2.0.rst     |  1 +
 pandas/core/dtypes/dtypes.py       |  2 +-
 pandas/tests/dtypes/test_dtypes.py | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index eec82ae26afcc..ef1a34c4c27c6 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -285,6 +285,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`)
+- Bug in :meth:`CategoricalDtype.__eq__` returning false for unordered categorical data with mixed types (:issue:`55468`)
 -
 
 Datetimelike
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 196c95c3673e9..9f85f82df666f 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -456,7 +456,7 @@ def __eq__(self, other: object) -> bool:
 
             # With object-dtype we need a comparison that identifies
             #  e.g. int(2) as distinct from float(2)
-            return hash(self) == hash(other)
+            return set(left) == set(right)
 
     def __repr__(self) -> str_type:
         if self.categories is None:
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 1f9c371c50ad4..27994708d2bdb 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -917,6 +917,24 @@ def test_equal_but_different(self):
         assert c1 is not c2
         assert c1 != c2
 
+    def test_equal_but_different_mixed_dtypes(self):
+        c1 = CategoricalDtype([1, 2, "3"])
+        c2 = CategoricalDtype(["3", 1, 2])
+        assert c1 is not c2
+        assert c1 == c2
+
+    def test_equal_empty_ordered(self):
+        c1 = CategoricalDtype([], ordered=True)
+        c2 = CategoricalDtype([], ordered=True)
+        assert c1 is not c2
+        assert c1 == c2
+
+    def test_equal_empty_unordered(self):
+        c1 = CategoricalDtype([])
+        c2 = CategoricalDtype([])
+        assert c1 is not c2
+        assert c1 == c2
+
     @pytest.mark.parametrize("v1, v2", [([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1])])
     def test_order_hashes_different(self, v1, v2):
         c1 = CategoricalDtype(v1, ordered=False)

From c1d36044bd80e987105518a8986b016f7b54d584 Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Fri, 13 Oct 2023 12:32:41 -0400
Subject: [PATCH 10/19] Update get_utc declarations (#55507)

* updated parameter definitions

* revert memview -> ndarray declaration
---
 pandas/_libs/tslibs/tzconversion.pyx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
index a96779aa33255..9c8865fbdf428 100644
--- a/pandas/_libs/tslibs/tzconversion.pyx
+++ b/pandas/_libs/tslibs/tzconversion.pyx
@@ -425,7 +425,11 @@ timedelta-like}
     return result.base  # .base to get underlying ndarray
 
 
-cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
+cdef Py_ssize_t bisect_right_i8(
+    const int64_t *data,
+    int64_t val,
+    Py_ssize_t n
+) noexcept:
     # Caller is responsible for checking n > 0
     # This looks very similar to local_search_right in the ndarray.searchsorted
     #  implementation.
@@ -463,7 +467,7 @@ cdef str _render_tstamp(int64_t val, NPY_DATETIMEUNIT creso):
 
 cdef _get_utc_bounds(
     ndarray[int64_t] vals,
-    int64_t* tdata,
+    const int64_t* tdata,
     Py_ssize_t ntrans,
     const int64_t[::1] deltas,
     NPY_DATETIMEUNIT creso,

From e1368cfd506a782f386a0c3afe02b32c31e2856e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 13 Oct 2023 09:35:01 -0700
Subject: [PATCH 11/19] BUG: tslibs uncaught overflows (#55503)

* BUG: tslibs uncaught overflows

* GH refs

* windows/32bit builds
---
 doc/source/whatsnew/v2.2.0.rst                |  8 +++---
 pandas/_libs/tslibs/offsets.pyx               |  7 ++++-
 pandas/_libs/tslibs/period.pyx                |  7 ++---
 pandas/_libs/tslibs/timedeltas.pyx            | 26 ++++++++++++++-----
 pandas/tests/scalar/period/test_period.py     | 20 ++++++++++++++
 .../scalar/timedelta/test_constructors.py     | 10 +++++++
 .../tests/scalar/timestamp/test_arithmetic.py |  9 ++-----
 pandas/tests/tseries/offsets/test_ticks.py    |  9 +++++++
 8 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index ef1a34c4c27c6..e495c3c204de5 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -291,12 +291,14 @@ Categorical
 Datetimelike
 ^^^^^^^^^^^^
 - Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
--
+- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
+- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
+
 
 Timedelta
 ^^^^^^^^^
+- Bug in :class:`Timedelta` construction raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
 - Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`)
--
 
 Timezones
 ^^^^^^^^^
@@ -353,7 +355,7 @@ I/O
 
 Period
 ^^^^^^
--
+- Bug in :class:`Period` addition silently wrapping around instead of raising ``OverflowError`` (:issue:`55503`)
 -
 
 Plotting
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 6a6f30de8dade..6c5cdde20da5f 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -961,7 +961,12 @@ cdef class Tick(SingleConstructorOffset):
 
     @property
     def delta(self):
-        return self.n * Timedelta(self._nanos_inc)
+        try:
+            return self.n * Timedelta(self._nanos_inc)
+        except OverflowError as err:
+            # GH#55503 as_unit will raise a more useful OutOfBoundsTimedelta
+            Timedelta(self).as_unit("ns")
+            raise AssertionError("This should not be reached.")
 
     @property
     def nanos(self) -> int64_t:
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index cacfe43b236d8..d305f27dd1090 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -1814,7 +1814,7 @@ cdef class _Period(PeriodMixin):
 
     def _add_timedeltalike_scalar(self, other) -> "Period":
         cdef:
-            int64_t inc
+            int64_t inc, ordinal
 
         if not self._dtype._is_tick_like():
             raise IncompatibleFrequency("Input cannot be converted to "
@@ -1832,8 +1832,8 @@ cdef class _Period(PeriodMixin):
         except ValueError as err:
             raise IncompatibleFrequency("Input cannot be converted to "
                                         f"Period(freq={self.freqstr})") from err
-        # TODO: overflow-check here
-        ordinal = self.ordinal + inc
+        with cython.overflowcheck(True):
+            ordinal = self.ordinal + inc
         return Period(ordinal=ordinal, freq=self.freq)
 
     def _add_offset(self, other) -> "Period":
@@ -1846,6 +1846,7 @@ cdef class _Period(PeriodMixin):
         ordinal = self.ordinal + other.n
         return Period(ordinal=ordinal, freq=self.freq)
 
+    @cython.overflowcheck(True)
     def __add__(self, other):
         if not is_period_object(self):
             # cython semantics; this is analogous to a call to __radd__
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index e5d81bd5928b9..a573d9a8ed0c0 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1784,7 +1784,7 @@ class Timedelta(_Timedelta):
                 )
 
             # GH43764, convert any input to nanoseconds first and then
-            # create the timestamp. This ensures that any potential
+            # create the timedelta. This ensures that any potential
             # nanosecond contributions from kwargs parsed as floats
             # are taken into consideration.
             seconds = int((
@@ -1797,12 +1797,24 @@ class Timedelta(_Timedelta):
                 ) * 1_000_000_000
             )
 
-            value = np.timedelta64(
-                int(kwargs.get("nanoseconds", 0))
-                + int(kwargs.get("microseconds", 0) * 1_000)
-                + int(kwargs.get("milliseconds", 0) * 1_000_000)
-                + seconds
-            )
+            ns = kwargs.get("nanoseconds", 0)
+            us = kwargs.get("microseconds", 0)
+            ms = kwargs.get("milliseconds", 0)
+            try:
+                value = np.timedelta64(
+                    int(ns)
+                    + int(us * 1_000)
+                    + int(ms * 1_000_000)
+                    + seconds
+                )
+            except OverflowError as err:
+                # GH#55503
+                msg = (
+                    f"seconds={seconds}, milliseconds={ms}, "
+                    f"microseconds={us}, nanoseconds={ns}"
+                )
+                raise OutOfBoundsTimedelta(msg) from err
+
         if unit in {"Y", "y", "M"}:
             raise ValueError(
                 "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py
index 6c27881e44b56..dc2938ec345f3 100644
--- a/pandas/tests/scalar/period/test_period.py
+++ b/pandas/tests/scalar/period/test_period.py
@@ -1182,6 +1182,26 @@ def test_comparison_numpy_zerodim_arr(self, zerodim_arr, expected):
 
 
 class TestArithmetic:
+    def test_add_overflow_raises(self):
+        # GH#55503
+        per = Timestamp.max.to_period("ns")
+
+        msg = "|".join(
+            [
+                "Python int too large to convert to C long",
+                # windows, 32bit linux builds
+                "int too big to convert",
+            ]
+        )
+        with pytest.raises(OverflowError, match=msg):
+            per + 1
+
+        msg = "value too large"
+        with pytest.raises(OverflowError, match=msg):
+            per + Timedelta(1)
+        with pytest.raises(OverflowError, match=msg):
+            per + offsets.Nano(1)
+
     @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "m"])
     def test_add_sub_td64_nat(self, unit):
         # GH#47196
diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py
index 0d876fbb9bde8..858ab29d79c5e 100644
--- a/pandas/tests/scalar/timedelta/test_constructors.py
+++ b/pandas/tests/scalar/timedelta/test_constructors.py
@@ -15,6 +15,16 @@
 )
 
 
+def test_construct_from_kwargs_overflow():
+    # GH#55503
+    msg = "seconds=86400000000000000000, milliseconds=0, microseconds=0, nanoseconds=0"
+    with pytest.raises(OutOfBoundsTimedelta, match=msg):
+        Timedelta(days=10**6)
+    msg = "seconds=60000000000000000000, milliseconds=0, microseconds=0, nanoseconds=0"
+    with pytest.raises(OutOfBoundsTimedelta, match=msg):
+        Timedelta(minutes=10**9)
+
+
 def test_construct_with_weeks_unit_overflow():
     # GH#47268 don't silently wrap around
     with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index f5c9c576abc24..9c24d364841d1 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -40,17 +40,12 @@ def test_overflow_offset_raises(self):
 
         stamp = Timestamp("2017-01-13 00:00:00").as_unit("ns")
         offset_overflow = 20169940 * offsets.Day(1)
-        msg = (
-            "the add operation between "
-            r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
-            "will overflow"
-        )
         lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow"
 
         with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
             stamp + offset_overflow
 
-        with pytest.raises(OverflowError, match=msg):
+        with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
             offset_overflow + stamp
 
         with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
@@ -68,7 +63,7 @@ def test_overflow_offset_raises(self):
         with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
             stamp + offset_overflow
 
-        with pytest.raises(OverflowError, match=msg):
+        with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
             offset_overflow + stamp
 
         with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py
index 69953955ebbce..abf187ace7cb3 100644
--- a/pandas/tests/tseries/offsets/test_ticks.py
+++ b/pandas/tests/tseries/offsets/test_ticks.py
@@ -15,6 +15,7 @@
 import pytest
 
 from pandas._libs.tslibs.offsets import delta_to_tick
+from pandas.errors import OutOfBoundsTimedelta
 
 from pandas import (
     Timedelta,
@@ -237,6 +238,14 @@ def test_tick_addition(kls, expected):
         assert result == expected
 
 
+def test_tick_delta_overflow():
+    # GH#55503 raise OutOfBoundsTimedelta, not OverflowError
+    tick = offsets.Day(10**9)
+    msg = "Cannot cast 1000000000 days 00:00:00 to unit='ns' without overflow"
+    with pytest.raises(OutOfBoundsTimedelta, match=msg):
+        tick.delta
+
+
 @pytest.mark.parametrize("cls", tick_classes)
 def test_tick_division(cls):
     off = cls(10)

From abba4e2df1ba1651f89ca4b02f8eeeffc17375c2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 13 Oct 2023 09:41:22 -0700
Subject: [PATCH 12/19] REF: share Index/Block get_values_for_csv (#55485)

* REF: share Index/Block get_values_for_csv

* mypy fixup
---
 pandas/core/indexes/base.py                   | 154 +++++++++++++++---
 pandas/core/indexes/datetimelike.py           |   2 +-
 pandas/core/indexes/datetimes.py              |   1 -
 pandas/core/indexes/multi.py                  |   6 +-
 pandas/core/indexes/period.py                 |   2 +-
 pandas/core/indexes/timedeltas.py             |   1 -
 pandas/core/internals/array_manager.py        |   2 +-
 pandas/core/internals/blocks.py               |  93 +----------
 pandas/io/formats/csvs.py                     |  15 +-
 pandas/io/formats/format.py                   |   2 +-
 .../tests/indexes/datetimes/test_formats.py   |  16 +-
 pandas/tests/indexes/interval/test_formats.py |   2 +-
 pandas/tests/indexes/period/test_formats.py   |  10 +-
 13 files changed, 158 insertions(+), 148 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 515f750f11219..f53c5606db4d3 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -30,6 +30,7 @@
     algos as libalgos,
     index as libindex,
     lib,
+    writers,
 )
 from pandas._libs.internals import BlockValuesRefs
 import pandas._libs.join as libjoin
@@ -97,7 +98,6 @@
     is_bool_dtype,
     is_ea_or_datetimelike_dtype,
     is_float,
-    is_float_dtype,
     is_hashable,
     is_integer,
     is_iterator,
@@ -119,6 +119,7 @@
     ExtensionDtype,
     IntervalDtype,
     PeriodDtype,
+    SparseDtype,
 )
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -151,7 +152,9 @@
     ArrowExtensionArray,
     BaseMaskedArray,
     Categorical,
+    DatetimeArray,
     ExtensionArray,
+    TimedeltaArray,
 )
 from pandas.core.arrays.string_ import StringArray
 from pandas.core.base import (
@@ -199,7 +202,10 @@
         MultiIndex,
         Series,
     )
-    from pandas.core.arrays import PeriodArray
+    from pandas.core.arrays import (
+        IntervalArray,
+        PeriodArray,
+    )
 
 __all__ = ["Index"]
 
@@ -1403,7 +1409,7 @@ def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str
         result = trim_front(formatted)
         return header + result
 
-    def _format_native_types(
+    def _get_values_for_csv(
         self,
         *,
         na_rep: str_t = "",
@@ -1412,30 +1418,14 @@ def _format_native_types(
         date_format=None,
         quoting=None,
     ) -> npt.NDArray[np.object_]:
-        """
-        Actually format specific types of the index.
-        """
-        from pandas.io.formats.format import FloatArrayFormatter
-
-        if is_float_dtype(self.dtype) and not isinstance(self.dtype, ExtensionDtype):
-            formatter = FloatArrayFormatter(
-                self._values,
-                na_rep=na_rep,
-                float_format=float_format,
-                decimal=decimal,
-                quoting=quoting,
-                fixed_width=False,
-            )
-            return formatter.get_result_as_array()
-
-        mask = isna(self)
-        if self.dtype != object and not quoting:
-            values = np.asarray(self).astype(str)
-        else:
-            values = np.array(self, dtype=object, copy=True)
-
-        values[mask] = na_rep
-        return values
+        return get_values_for_csv(
+            self._values,
+            na_rep=na_rep,
+            decimal=decimal,
+            float_format=float_format,
+            date_format=date_format,
+            quoting=quoting,
+        )
 
     def _summary(self, name=None) -> str_t:
         """
@@ -7629,3 +7619,113 @@ def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):
                 stacklevel=find_stack_level(),
             )
     return result
+
+
+def get_values_for_csv(
+    values: ArrayLike,
+    *,
+    date_format,
+    na_rep: str = "nan",
+    quoting=None,
+    float_format=None,
+    decimal: str = ".",
+) -> npt.NDArray[np.object_]:
+    """
+    Convert to types which can be consumed by the standard library's
+    csv.writer.writerows.
+    """
+    if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
+        # GH#40754 Convert categorical datetimes to datetime array
+        values = algos.take_nd(
+            values.categories._values,
+            ensure_platform_int(values._codes),
+            fill_value=na_rep,
+        )
+
+    values = ensure_wrapped_if_datetimelike(values)
+
+    if isinstance(values, (DatetimeArray, TimedeltaArray)):
+        if values.ndim == 1:
+            result = values._format_native_types(na_rep=na_rep, date_format=date_format)
+            result = result.astype(object, copy=False)
+            return result
+
+        # GH#21734 Process every column separately, they might have different formats
+        results_converted = []
+        for i in range(len(values)):
+            result = values[i, :]._format_native_types(
+                na_rep=na_rep, date_format=date_format
+            )
+            results_converted.append(result.astype(object, copy=False))
+        return np.vstack(results_converted)
+
+    elif isinstance(values.dtype, PeriodDtype):
+        # TODO: tests that get here in column path
+        values = cast("PeriodArray", values)
+        res = values._format_native_types(na_rep=na_rep, date_format=date_format)
+        return res
+
+    elif isinstance(values.dtype, IntervalDtype):
+        # TODO: tests that get here in column path
+        values = cast("IntervalArray", values)
+        mask = values.isna()
+        if not quoting:
+            result = np.asarray(values).astype(str)
+        else:
+            result = np.array(values, dtype=object, copy=True)
+
+        result[mask] = na_rep
+        return result
+
+    elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype):
+        # see GH#13418: no special formatting is desired at the
+        # output (important for appropriate 'quoting' behaviour),
+        # so do not pass it through the FloatArrayFormatter
+        if float_format is None and decimal == ".":
+            mask = isna(values)
+
+            if not quoting:
+                values = values.astype(str)
+            else:
+                values = np.array(values, dtype="object")
+
+            values[mask] = na_rep
+            values = values.astype(object, copy=False)
+            return values
+
+        from pandas.io.formats.format import FloatArrayFormatter
+
+        formatter = FloatArrayFormatter(
+            values,
+            na_rep=na_rep,
+            float_format=float_format,
+            decimal=decimal,
+            quoting=quoting,
+            fixed_width=False,
+        )
+        res = formatter.get_result_as_array()
+        res = res.astype(object, copy=False)
+        return res
+
+    elif isinstance(values, ExtensionArray):
+        mask = isna(values)
+
+        new_values = np.asarray(values.astype(object))
+        new_values[mask] = na_rep
+        return new_values
+
+    else:
+        mask = isna(values)
+        itemsize = writers.word_len(na_rep)
+
+        if values.dtype != _dtype_obj and not quoting and itemsize:
+            values = values.astype(str)
+            if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
+                # enlarge for the na_rep
+                values = values.astype(f"<U{itemsize}")
+        else:
+            values = np.array(values, dtype="object")
+
+        values[mask] = na_rep
+        values = values.astype(object, copy=False)
+        return values
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index f02f7dcb65251..9ab3f26f88152 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -219,7 +219,7 @@ def _format_with_header(
         # TODO: not reached in tests 2023-10-11
         # matches base class except for whitespace padding and date_format
         return header + list(
-            self._format_native_types(na_rep=na_rep, date_format=date_format)
+            self._get_values_for_csv(na_rep=na_rep, date_format=date_format)
         )
 
     @property
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 12f93cf482a1d..136327e3787f9 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -118,7 +118,6 @@ def _new_DatetimeIndex(cls, d):
         "tzinfo",
         "dtype",
         "to_pydatetime",
-        "_format_native_types",
         "date",
         "time",
         "timetz",
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 0c3593eca178d..d88eb3f18a3bd 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1384,7 +1384,7 @@ def _formatter_func(self, tup):
         formatter_funcs = [level._formatter_func for level in self.levels]
         return tuple(func(val) for func, val in zip(formatter_funcs, tup))
 
-    def _format_native_types(
+    def _get_values_for_csv(
         self, *, na_rep: str = "nan", **kwargs
     ) -> npt.NDArray[np.object_]:
         new_levels = []
@@ -1392,7 +1392,7 @@ def _format_native_types(
 
         # go through the levels and format them
         for level, level_codes in zip(self.levels, self.codes):
-            level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
+            level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs)
             # add nan values, if there are any
             mask = level_codes == -1
             if mask.any():
@@ -1408,7 +1408,7 @@ def _format_native_types(
 
         if len(new_levels) == 1:
             # a single-level multi-index
-            return Index(new_levels[0].take(new_codes[0]))._format_native_types()
+            return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()
         else:
             # reconstruct the multi-index
             mi = MultiIndex(
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index b1023febe813d..09b41d9c32ec2 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -80,7 +80,7 @@ def _new_PeriodIndex(cls, **d):
     PeriodArray,
     wrap=True,
 )
-@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
+@inherit_names(["is_leap_year"], PeriodArray)
 class PeriodIndex(DatetimeIndexOpsMixin):
     """
     Immutable ndarray holding ordinal values indicating regular periods in time.
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 498fe56a7ae7f..b1d8d0efb60e8 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -48,7 +48,6 @@
         "sum",
         "std",
         "median",
-        "_format_native_types",
     ],
     TimedeltaArray,
 )
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 99af4f51661b1..9987908f407b3 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -68,6 +68,7 @@
     Index,
     ensure_index,
 )
+from pandas.core.indexes.base import get_values_for_csv
 from pandas.core.internals.base import (
     DataManager,
     SingleDataManager,
@@ -79,7 +80,6 @@
     ensure_block_shape,
     external_values,
     extract_pandas_array,
-    get_values_for_csv,
     maybe_coerce_values,
     new_block,
 )
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index f0c14eec81c3c..330effe0f0a9f 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -24,7 +24,6 @@
     NaT,
     internals as libinternals,
     lib,
-    writers,
 )
 from pandas._libs.internals import (
     BlockPlacement,
@@ -61,7 +60,6 @@
     np_can_hold_element,
 )
 from pandas.core.dtypes.common import (
-    ensure_platform_int,
     is_1d_only_ea_dtype,
     is_float_dtype,
     is_integer_dtype,
@@ -75,7 +73,6 @@
     IntervalDtype,
     NumpyEADtype,
     PeriodDtype,
-    SparseDtype,
 )
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -122,6 +119,7 @@
     extract_array,
 )
 from pandas.core.indexers import check_setitem_lengths
+from pandas.core.indexes.base import get_values_for_csv
 
 if TYPE_CHECKING:
     from collections.abc import (
@@ -2602,95 +2600,6 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
     return values
 
 
-def get_values_for_csv(
-    values: ArrayLike,
-    *,
-    date_format,
-    na_rep: str = "nan",
-    quoting=None,
-    float_format=None,
-    decimal: str = ".",
-) -> npt.NDArray[np.object_]:
-    """convert to our native types format"""
-    if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
-        # GH#40754 Convert categorical datetimes to datetime array
-        values = algos.take_nd(
-            values.categories._values,
-            ensure_platform_int(values._codes),
-            fill_value=na_rep,
-        )
-
-    values = ensure_wrapped_if_datetimelike(values)
-
-    if isinstance(values, (DatetimeArray, TimedeltaArray)):
-        if values.ndim == 1:
-            result = values._format_native_types(na_rep=na_rep, date_format=date_format)
-            result = result.astype(object, copy=False)
-            return result
-
-        # GH#21734 Process every column separately, they might have different formats
-        results_converted = []
-        for i in range(len(values)):
-            result = values[i, :]._format_native_types(
-                na_rep=na_rep, date_format=date_format
-            )
-            results_converted.append(result.astype(object, copy=False))
-        return np.vstack(results_converted)
-
-    elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype):
-        # see GH#13418: no special formatting is desired at the
-        # output (important for appropriate 'quoting' behaviour),
-        # so do not pass it through the FloatArrayFormatter
-        if float_format is None and decimal == ".":
-            mask = isna(values)
-
-            if not quoting:
-                values = values.astype(str)
-            else:
-                values = np.array(values, dtype="object")
-
-            values[mask] = na_rep
-            values = values.astype(object, copy=False)
-            return values
-
-        from pandas.io.formats.format import FloatArrayFormatter
-
-        formatter = FloatArrayFormatter(
-            values,
-            na_rep=na_rep,
-            float_format=float_format,
-            decimal=decimal,
-            quoting=quoting,
-            fixed_width=False,
-        )
-        res = formatter.get_result_as_array()
-        res = res.astype(object, copy=False)
-        return res
-
-    elif isinstance(values, ExtensionArray):
-        mask = isna(values)
-
-        new_values = np.asarray(values.astype(object))
-        new_values[mask] = na_rep
-        return new_values
-
-    else:
-        mask = isna(values)
-        itemsize = writers.word_len(na_rep)
-
-        if values.dtype != _dtype_obj and not quoting and itemsize:
-            values = values.astype(str)
-            if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
-                # enlarge for the na_rep
-                values = values.astype(f"<U{itemsize}")
-        else:
-            values = np.array(values, dtype="object")
-
-        values[mask] = na_rep
-        values = values.astype(object, copy=False)
-        return values
-
-
 def external_values(values: ArrayLike) -> ArrayLike:
     """
     The array that Series.values returns (public attribute).
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 717dae6eea97c..50503e862ef43 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -44,6 +44,7 @@
         IndexLabel,
         StorageOptions,
         WriteBuffer,
+        npt,
     )
 
     from pandas.io.formats.format import DataFrameFormatter
@@ -53,7 +54,7 @@
 
 
 class CSVFormatter:
-    cols: np.ndarray
+    cols: npt.NDArray[np.object_]
 
     def __init__(
         self,
@@ -149,7 +150,9 @@ def _initialize_quotechar(self, quotechar: str | None) -> str | None:
     def has_mi_columns(self) -> bool:
         return bool(isinstance(self.obj.columns, ABCMultiIndex))
 
-    def _initialize_columns(self, cols: Iterable[Hashable] | None) -> np.ndarray:
+    def _initialize_columns(
+        self, cols: Iterable[Hashable] | None
+    ) -> npt.NDArray[np.object_]:
         # validate mi options
         if self.has_mi_columns:
             if cols is not None:
@@ -158,7 +161,7 @@ def _initialize_columns(self, cols: Iterable[Hashable] | None) -> np.ndarray:
 
         if cols is not None:
             if isinstance(cols, ABCIndex):
-                cols = cols._format_native_types(**self._number_format)
+                cols = cols._get_values_for_csv(**self._number_format)
             else:
                 cols = list(cols)
             self.obj = self.obj.loc[:, cols]
@@ -166,7 +169,7 @@ def _initialize_columns(self, cols: Iterable[Hashable] | None) -> np.ndarray:
         # update columns to include possible multiplicity of dupes
         # and make sure cols is just a list of labels
         new_cols = self.obj.columns
-        return new_cols._format_native_types(**self._number_format)
+        return new_cols._get_values_for_csv(**self._number_format)
 
     def _initialize_chunksize(self, chunksize: int | None) -> int:
         if chunksize is None:
@@ -223,7 +226,7 @@ def write_cols(self) -> SequenceNotStr[Hashable]:
                 )
             return self.header
         else:
-            # self.cols is an ndarray derived from Index._format_native_types,
+            # self.cols is an ndarray derived from Index._get_values_for_csv,
             #  so its entries are strings, i.e. hashable
             return cast(SequenceNotStr[Hashable], self.cols)
 
@@ -317,7 +320,7 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
         res = df._get_values_for_csv(**self._number_format)
         data = list(res._iter_column_arrays())
 
-        ix = self.data_index[slicer]._format_native_types(**self._number_format)
+        ix = self.data_index[slicer]._get_values_for_csv(**self._number_format)
         libwriters.write_csv_rows(
             data,
             ix,
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index bb976b3a0208e..c87b8261916f2 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1105,7 +1105,7 @@ def format_array(
         the leading space to pad between columns.
 
         When formatting an Index subclass
-        (e.g. IntervalIndex._format_native_types), we don't want the
+        (e.g. IntervalIndex._get_values_for_csv), we don't want the
         leading space since it should be left-aligned.
     fallback_formatter
 
diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py
index 9fb5db9e034ee..caeb7fcb86f49 100644
--- a/pandas/tests/indexes/datetimes/test_formats.py
+++ b/pandas/tests/indexes/datetimes/test_formats.py
@@ -13,38 +13,38 @@
 import pandas._testing as tm
 
 
-def test_format_native_types():
+def test_get_values_for_csv():
     index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
 
     # First, with no arguments.
     expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
 
-    result = index._format_native_types()
+    result = index._get_values_for_csv()
     tm.assert_numpy_array_equal(result, expected)
 
     # No NaN values, so na_rep has no effect
-    result = index._format_native_types(na_rep="pandas")
+    result = index._get_values_for_csv(na_rep="pandas")
     tm.assert_numpy_array_equal(result, expected)
 
     # Make sure date formatting works
     expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
 
-    result = index._format_native_types(date_format="%m-%Y-%d")
+    result = index._get_values_for_csv(date_format="%m-%Y-%d")
     tm.assert_numpy_array_equal(result, expected)
 
     # NULL object handling should work
     index = DatetimeIndex(["2017-01-01", pd.NaT, "2017-01-03"])
     expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
 
-    result = index._format_native_types()
+    result = index._get_values_for_csv(na_rep="NaT")
     tm.assert_numpy_array_equal(result, expected)
 
     expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
 
-    result = index._format_native_types(na_rep="pandas")
+    result = index._get_values_for_csv(na_rep="pandas")
     tm.assert_numpy_array_equal(result, expected)
 
-    result = index._format_native_types(date_format="%Y-%m-%d %H:%M:%S.%f")
+    result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
     expected = np.array(
         ["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
         dtype=object,
@@ -52,7 +52,7 @@ def test_format_native_types():
     tm.assert_numpy_array_equal(result, expected)
 
     # invalid format
-    result = index._format_native_types(date_format="foo")
+    result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
     expected = np.array(["foo", "NaT", "foo"], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
 
diff --git a/pandas/tests/indexes/interval/test_formats.py b/pandas/tests/indexes/interval/test_formats.py
index acb330c190d6f..5b509edc9ff88 100644
--- a/pandas/tests/indexes/interval/test_formats.py
+++ b/pandas/tests/indexes/interval/test_formats.py
@@ -104,7 +104,7 @@ def test_repr_floats(self):
     def test_to_native_types(self, tuples, closed, expected_data):
         # GH 28210
         index = IntervalIndex.from_tuples(tuples, closed=closed)
-        result = index._format_native_types(na_rep="NaN")
+        result = index._get_values_for_csv(na_rep="NaN")
         expected = np.array(expected_data)
         tm.assert_numpy_array_equal(result, expected)
 
diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py
index 9441f56a75f03..7245c6a7116fc 100644
--- a/pandas/tests/indexes/period/test_formats.py
+++ b/pandas/tests/indexes/period/test_formats.py
@@ -15,29 +15,29 @@ def test_to_native_types():
     # First, with no arguments.
     expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
 
-    result = index._format_native_types()
+    result = index._get_values_for_csv()
     tm.assert_numpy_array_equal(result, expected)
 
     # No NaN values, so na_rep has no effect
-    result = index._format_native_types(na_rep="pandas")
+    result = index._get_values_for_csv(na_rep="pandas")
     tm.assert_numpy_array_equal(result, expected)
 
     # Make sure date formatting works
     expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
 
-    result = index._format_native_types(date_format="%m-%Y-%d")
+    result = index._get_values_for_csv(date_format="%m-%Y-%d")
     tm.assert_numpy_array_equal(result, expected)
 
     # NULL object handling should work
     index = PeriodIndex(["2017-01-01", pd.NaT, "2017-01-03"], freq="D")
     expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
 
-    result = index._format_native_types()
+    result = index._get_values_for_csv(na_rep="NaT")
     tm.assert_numpy_array_equal(result, expected)
 
     expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
 
-    result = index._format_native_types(na_rep="pandas")
+    result = index._get_values_for_csv(na_rep="pandas")
     tm.assert_numpy_array_equal(result, expected)
 
 

From 2f3b0eda7231f33131c5fabf065483e61ac1ea59 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 13 Oct 2023 09:57:40 -0700
Subject: [PATCH 13/19] DEPR: Index.format (#55439)

* REF: implement Index._format_flat, _format_multi

* de-duplicate, change keyword

* DEPR: Index.format

* add formatter kwd

* Post-merge fixup
---
 doc/source/whatsnew/v2.2.0.rst                |  2 +
 pandas/core/indexes/base.py                   | 33 +++++++++
 pandas/core/indexes/datetimelike.py           | 11 +++
 pandas/core/indexes/multi.py                  | 72 +++++++++++++++++++
 pandas/io/formats/excel.py                    |  8 +--
 pandas/io/formats/format.py                   | 23 +++---
 pandas/io/formats/html.py                     | 14 ++--
 pandas/io/formats/style_render.py             |  4 +-
 .../tests/indexes/base_class/test_formats.py  |  9 ++-
 .../tests/indexes/categorical/test_formats.py |  5 +-
 .../indexes/datetimes/test_datetimelike.py    |  5 +-
 .../tests/indexes/datetimes/test_formats.py   |  8 ++-
 pandas/tests/indexes/multi/test_formats.py    | 20 ++++--
 pandas/tests/indexes/period/test_period.py    |  7 +-
 pandas/tests/indexes/ranges/test_range.py     | 11 ++-
 pandas/tests/indexes/test_base.py             | 12 +++-
 pandas/tests/indexes/test_old_base.py         | 15 ++--
 pandas/tests/io/excel/test_writers.py         |  2 +-
 pandas/tests/io/formats/test_format.py        | 57 ++++++++++-----
 pandas/tests/series/test_repr.py              |  4 +-
 20 files changed, 259 insertions(+), 63 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index e495c3c204de5..93ca2541d7ecd 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -234,6 +234,7 @@ For example:
 Other Deprecations
 ^^^^^^^^^^^^^^^^^^
 - Changed :meth:`Timedelta.resolution_string` to return ``h``, ``min``, ``s``, ``ms``, ``us``, and ``ns`` instead of ``H``, ``T``, ``S``, ``L``, ``U``, and ``N``, for compatibility with respective deprecations in frequency aliases (:issue:`52536`)
+- Deprecated :meth:`Index.format`, use ``index.astype(str)`` or ``index.map(formatter)`` instead (:issue:`55413`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_clipboard`. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_csv` except ``path_or_buf``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_dict`. (:issue:`54229`)
@@ -261,6 +262,7 @@ Other Deprecations
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
 - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
 - Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f53c5606db4d3..252e88d7c7d51 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1375,6 +1375,14 @@ def format(
         """
         Render a string representation of the Index.
         """
+        warnings.warn(
+            # GH#55413
+            f"{type(self).__name__}.format is deprecated and will be removed "
+            "in a future version. Convert using index.astype(str) or "
+            "index.map(formatter) instead.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
         header = []
         if name:
             header.append(
@@ -1388,6 +1396,31 @@ def format(
 
         return self._format_with_header(header=header, na_rep=na_rep)
 
+    _default_na_rep = "NaN"
+
+    @final
+    def _format_flat(
+        self,
+        *,
+        include_name: bool,
+        formatter: Callable | None = None,
+    ) -> list[str_t]:
+        """
+        Render a string representation of the Index.
+        """
+        header = []
+        if include_name:
+            header.append(
+                pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
+                if self.name is not None
+                else ""
+            )
+
+        if formatter is not None:
+            return header + list(self.map(formatter))
+
+        return self._format_with_header(header=header, na_rep=self._default_na_rep)
+
     def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str_t]:
         from pandas.io.formats.format import format_array
 
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 9ab3f26f88152..a3e6c50b21642 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -14,6 +14,7 @@
     cast,
     final,
 )
+import warnings
 
 import numpy as np
 
@@ -42,6 +43,7 @@
     cache_readonly,
     doc,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_integer,
@@ -187,6 +189,7 @@ def _convert_tolerance(self, tolerance, target):
 
     # --------------------------------------------------------------------
     # Rendering Methods
+    _default_na_rep = "NaT"
 
     def format(
         self,
@@ -198,6 +201,14 @@ def format(
         """
         Render a string representation of the Index.
         """
+        warnings.warn(
+            # GH#55413
+            f"{type(self).__name__}.format is deprecated and will be removed "
+            "in a future version. Convert using index.astype(str) or "
+            "index.map(formatter) instead.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
         header = []
         if name:
             header.append(
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index d88eb3f18a3bd..86693f241ddb1 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1430,6 +1430,15 @@ def format(
         sparsify=None,
         adjoin: bool = True,
     ) -> list:
+        warnings.warn(
+            # GH#55413
+            f"{type(self).__name__}.format is deprecated and will be removed "
+            "in a future version. Convert using index.astype(str) or "
+            "index.map(formatter) instead.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+
         if name is not None:
             names = name
 
@@ -1492,6 +1501,69 @@ def format(
         else:
             return result_levels
 
+    def _format_multi(
+        self,
+        *,
+        include_names: bool,
+        sparsify: bool | None | lib.NoDefault,
+        formatter: Callable | None = None,
+    ) -> list:
+        if len(self) == 0:
+            return []
+
+        stringified_levels = []
+        for lev, level_codes in zip(self.levels, self.codes):
+            na = _get_na_rep(lev.dtype)
+
+            if len(lev) > 0:
+                taken = formatted = lev.take(level_codes)
+                formatted = taken._format_flat(include_name=False, formatter=formatter)
+
+                # we have some NA
+                mask = level_codes == -1
+                if mask.any():
+                    formatted = np.array(formatted, dtype=object)
+                    formatted[mask] = na
+                    formatted = formatted.tolist()
+
+            else:
+                # weird all NA case
+                formatted = [
+                    pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
+                    for x in algos.take_nd(lev._values, level_codes)
+                ]
+            stringified_levels.append(formatted)
+
+        result_levels = []
+        for lev, lev_name in zip(stringified_levels, self.names):
+            level = []
+
+            if include_names:
+                level.append(
+                    pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
+                    if lev_name is not None
+                    else ""
+                )
+
+            level.extend(np.array(lev, dtype=object))
+            result_levels.append(level)
+
+        if sparsify is None:
+            sparsify = get_option("display.multi_sparse")
+
+        if sparsify:
+            sentinel: Literal[""] | bool | lib.NoDefault = ""
+            # GH3547 use value of sparsify as sentinel if it's "Falsey"
+            assert isinstance(sparsify, bool) or sparsify is lib.no_default
+            if sparsify is lib.no_default:
+                sentinel = sparsify
+            # little bit of a kludge job for #1217
+            result_levels = sparsify_labels(
+                result_levels, start=int(include_names), sentinel=sentinel
+            )
+
+        return result_levels
+
     # --------------------------------------------------------------------
     # Names Methods
 
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index b344d9849f16c..684cd4340cd2b 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -623,8 +623,8 @@ def _format_header_mi(self) -> Iterable[ExcelCell]:
             return
 
         columns = self.columns
-        level_strs = columns.format(
-            sparsify=self.merge_cells, adjoin=False, names=False
+        level_strs = columns._format_multi(
+            sparsify=self.merge_cells, include_names=False
         )
         level_lengths = get_level_lengths(level_strs)
         coloffset = 0
@@ -813,8 +813,8 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
 
             if self.merge_cells:
                 # Format hierarchical rows as merged cells.
-                level_strs = self.df.index.format(
-                    sparsify=True, adjoin=False, names=False
+                level_strs = self.df.index._format_multi(
+                    sparsify=True, include_names=False
                 )
                 level_lengths = get_level_lengths(level_strs)
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index c87b8261916f2..1a7e4d7a80e13 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -313,8 +313,14 @@ def to_string(self) -> str:
         if len(series) == 0:
             return f"{type(self.series).__name__}([], {footer})"
 
-        have_header = _has_names(series.index)
-        fmt_index = self.tr_series.index.format(name=True)
+        index = series.index
+        have_header = _has_names(index)
+        if isinstance(index, MultiIndex):
+            fmt_index = index._format_multi(include_names=True, sparsify=None)
+            adj = printing.get_adjustment()
+            fmt_index = adj.adjoin(2, *fmt_index).split("\n")
+        else:
+            fmt_index = index._format_flat(include_name=True)
         fmt_values = self._get_formatted_values()
 
         if self.is_truncated_vertically:
@@ -777,7 +783,7 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
         columns = frame.columns
 
         if isinstance(columns, MultiIndex):
-            fmt_columns = columns.format(sparsify=False, adjoin=False)
+            fmt_columns = columns._format_multi(sparsify=False, include_names=False)
             fmt_columns = list(zip(*fmt_columns))
             dtypes = self.frame.dtypes._values
 
@@ -802,7 +808,7 @@ def space_format(x, y):
 
             str_columns = [list(x) for x in zip(*str_columns)]
         else:
-            fmt_columns = columns.format()
+            fmt_columns = columns._format_flat(include_name=False)
             dtypes = self.frame.dtypes
             need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
             str_columns = [
@@ -821,14 +827,15 @@ def _get_formatted_index(self, frame: DataFrame) -> list[str]:
         fmt = self._get_formatter("__index__")
 
         if isinstance(index, MultiIndex):
-            fmt_index = index.format(
+            fmt_index = index._format_multi(
                 sparsify=self.sparsify,
-                adjoin=False,
-                names=self.show_row_idx_names,
+                include_names=self.show_row_idx_names,
                 formatter=fmt,
             )
         else:
-            fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
+            fmt_index = [
+                index._format_flat(include_name=self.show_row_idx_names, formatter=fmt)
+            ]
 
         fmt_index = [
             tuple(
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
index b1a3504d46b27..794ce77b3b45e 100644
--- a/pandas/io/formats/html.py
+++ b/pandas/io/formats/html.py
@@ -282,7 +282,7 @@ def _write_col_header(self, indent: int) -> None:
                 sentinel = lib.no_default
             else:
                 sentinel = False
-            levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False)
+            levels = self.columns._format_multi(sparsify=sentinel, include_names=False)
             level_lengths = get_level_lengths(levels, sentinel)
             inner_lvl = len(level_lengths) - 1
             for lnum, (records, values) in enumerate(zip(level_lengths, levels)):
@@ -437,7 +437,8 @@ def _write_regular_rows(
             if fmt is not None:
                 index_values = self.fmt.tr_frame.index.map(fmt)
             else:
-                index_values = self.fmt.tr_frame.index.format()
+                # only reached with non-Multi index
+                index_values = self.fmt.tr_frame.index._format_flat(include_name=False)
 
         row: list[str] = []
         for i in range(nrows):
@@ -480,13 +481,13 @@ def _write_hierarchical_rows(
         nrows = len(frame)
 
         assert isinstance(frame.index, MultiIndex)
-        idx_values = frame.index.format(sparsify=False, adjoin=False, names=False)
+        idx_values = frame.index._format_multi(sparsify=False, include_names=False)
         idx_values = list(zip(*idx_values))
 
         if self.fmt.sparsify:
             # GH3547
             sentinel = lib.no_default
-            levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False)
+            levels = frame.index._format_multi(sparsify=sentinel, include_names=False)
 
             level_lengths = get_level_lengths(levels, sentinel)
             inner_lvl = len(level_lengths) - 1
@@ -579,7 +580,7 @@ def _write_hierarchical_rows(
                     )
 
                 idx_values = list(
-                    zip(*frame.index.format(sparsify=False, adjoin=False, names=False))
+                    zip(*frame.index._format_multi(sparsify=False, include_names=False))
                 )
                 row = []
                 row.extend(idx_values[i])
@@ -606,7 +607,8 @@ def _get_formatted_values(self) -> dict[int, list[str]]:
         return {i: self.fmt.format_col(i) for i in range(self.ncols)}
 
     def _get_columns_formatted_values(self) -> list[str]:
-        return self.columns.format()
+        # only reached with non-Multi Index
+        return self.columns._format_flat(include_name=False)
 
     def write_style(self) -> None:
         # We use the "scoped" attribute here so that the desired
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
index 829ed4a33f6a4..416b263ba8497 100644
--- a/pandas/io/formats/style_render.py
+++ b/pandas/io/formats/style_render.py
@@ -1652,9 +1652,9 @@ def _get_level_lengths(
         Result is a dictionary of (level, initial_position): span
     """
     if isinstance(index, MultiIndex):
-        levels = index.format(sparsify=lib.no_default, adjoin=False)
+        levels = index._format_multi(sparsify=lib.no_default, include_names=False)
     else:
-        levels = index.format()
+        levels = index._format_flat(include_name=False)
 
     if hidden_elements is None:
         hidden_elements = []
diff --git a/pandas/tests/indexes/base_class/test_formats.py b/pandas/tests/indexes/base_class/test_formats.py
index 9053d45dee623..20f94010f56f8 100644
--- a/pandas/tests/indexes/base_class/test_formats.py
+++ b/pandas/tests/indexes/base_class/test_formats.py
@@ -4,6 +4,7 @@
 import pandas._config.config as cf
 
 from pandas import Index
+import pandas._testing as tm
 
 
 class TestIndexRendering:
@@ -133,7 +134,9 @@ def test_summary_bug(self):
     def test_index_repr_bool_nan(self):
         # GH32146
         arr = Index([True, False, np.nan], dtype=object)
-        exp1 = arr.format()
+        msg = "Index.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            exp1 = arr.format()
         out1 = ["True", "False", "NaN"]
         assert out1 == exp1
 
@@ -145,4 +148,6 @@ def test_format_different_scalar_lengths(self):
         # GH#35439
         idx = Index(["aaaaaaaaa", "b"])
         expected = ["aaaaaaaaa", "b"]
-        assert idx.format() == expected
+        msg = r"Index\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
diff --git a/pandas/tests/indexes/categorical/test_formats.py b/pandas/tests/indexes/categorical/test_formats.py
index 7dbcaaa8d4ba6..ea3e4ce213e67 100644
--- a/pandas/tests/indexes/categorical/test_formats.py
+++ b/pandas/tests/indexes/categorical/test_formats.py
@@ -4,6 +4,7 @@
 import pandas._config.config as cf
 
 from pandas import CategoricalIndex
+import pandas._testing as tm
 
 
 class TestCategoricalIndexRepr:
@@ -11,7 +12,9 @@ def test_format_different_scalar_lengths(self):
         # GH#35439
         idx = CategoricalIndex(["aaaaaaaaa", "b"])
         expected = ["aaaaaaaaa", "b"]
-        assert idx.format() == expected
+        msg = r"CategoricalIndex\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
 
     def test_string_categorical_index_repr(self):
         # short
diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py
index a6bee20d3d3ec..a012a2985b41c 100644
--- a/pandas/tests/indexes/datetimes/test_datetimelike.py
+++ b/pandas/tests/indexes/datetimes/test_datetimelike.py
@@ -1,5 +1,6 @@
 """ generic tests from the Datetimelike class """
 from pandas import date_range
+import pandas._testing as tm
 
 
 class TestDatetimeIndex:
@@ -7,4 +8,6 @@ def test_format(self):
         # GH35439
         idx = date_range("20130101", periods=5)
         expected = [f"{x:%Y-%m-%d}" for x in idx]
-        assert idx.format() == expected
+        msg = r"DatetimeIndex\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py
index caeb7fcb86f49..6f75ac1b569c0 100644
--- a/pandas/tests/indexes/datetimes/test_formats.py
+++ b/pandas/tests/indexes/datetimes/test_formats.py
@@ -285,13 +285,17 @@ def test_format_with_name_time_info(self):
         # bug I fixed 12/20/2011
         dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
 
-        formatted = dates.format(name=True)
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dates.format(name=True)
         assert formatted[0] == "something"
 
     def test_format_datetime_with_time(self):
         dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
 
-        result = dti.format()
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = dti.format()
         expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
         assert len(result) == 2
         assert result == expected
diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py
index 011f61fac90e8..bbe94824eefa1 100644
--- a/pandas/tests/indexes/multi/test_formats.py
+++ b/pandas/tests/indexes/multi/test_formats.py
@@ -6,24 +6,31 @@
     Index,
     MultiIndex,
 )
+import pandas._testing as tm
 
 
 def test_format(idx):
-    idx.format()
-    idx[:0].format()
+    msg = "MultiIndex.format is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        idx.format()
+        idx[:0].format()
 
 
 def test_format_integer_names():
     index = MultiIndex(
         levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
     )
-    index.format(names=True)
+    msg = "MultiIndex.format is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        index.format(names=True)
 
 
 def test_format_sparse_config(idx):
     # GH1538
+    msg = "MultiIndex.format is deprecated"
     with pd.option_context("display.multi_sparse", False):
-        result = idx.format()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = idx.format()
     assert result[1] == "foo  two"
 
 
@@ -37,8 +44,9 @@ def test_format_sparse_display():
             [0, 0, 0, 0, 0, 0],
         ],
     )
-
-    result = index.format()
+    msg = "MultiIndex.format is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = index.format()
     assert result[3] == "1  0  0  0"
 
 
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 22bb63d67f57f..1bb8d66332cd0 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -275,8 +275,11 @@ def test_map(self):
     def test_format_empty(self):
         # GH35712
         empty_idx = PeriodIndex([], freq="Y")
-        assert empty_idx.format() == []
-        assert empty_idx.format(name=True) == [""]
+        msg = r"PeriodIndex\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert empty_idx.format() == []
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert empty_idx.format(name=True) == [""]
 
     def test_period_index_frequency_ME_error_message(self):
         msg = "Invalid frequency: 2ME"
diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
index 132704434829e..95756b04bca69 100644
--- a/pandas/tests/indexes/ranges/test_range.py
+++ b/pandas/tests/indexes/ranges/test_range.py
@@ -240,7 +240,9 @@ def test_cache(self):
             pass
         assert idx._cache == {}
 
-        idx.format()
+        msg = "RangeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            idx.format()
         assert idx._cache == {}
 
         df = pd.DataFrame({"a": range(10)}, index=idx)
@@ -566,8 +568,11 @@ def test_engineless_lookup(self):
     def test_format_empty(self):
         # GH35712
         empty_idx = RangeIndex(0)
-        assert empty_idx.format() == []
-        assert empty_idx.format(name=True) == [""]
+        msg = r"RangeIndex\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert empty_idx.format() == []
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert empty_idx.format(name=True) == [""]
 
     @pytest.mark.parametrize(
         "ri",
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 6afab569797f2..04ab2020b4c7a 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -666,13 +666,17 @@ def test_format_bug(self):
         # include us since the default for Timestamp shows these but Index
         # formatting does not we are skipping)
         now = datetime.now()
+        msg = r"Index\.format is deprecated"
+
         if not str(now).endswith("000"):
             index = Index([now])
-            formatted = index.format()
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                formatted = index.format()
             expected = [str(index[0])]
             assert formatted == expected
 
-        Index([]).format()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            Index([]).format()
 
     @pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]])
     def test_format_missing(self, vals, nulls_fixture):
@@ -682,7 +686,9 @@ def test_format_missing(self, vals, nulls_fixture):
         index = Index(vals, dtype=object)
         # TODO: case with complex dtype?
 
-        formatted = index.format()
+        msg = r"Index\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = index.format()
         null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture)
         expected = [str(index[0]), str(index[1]), str(index[2]), null_repr]
 
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 79dc423f12a85..32adbc693390b 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -559,15 +559,20 @@ def test_format(self, simple_index):
             pytest.skip("Tested elsewhere.")
         idx = simple_index
         expected = [str(x) for x in idx]
-        assert idx.format() == expected
+        msg = r"Index\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
 
     def test_format_empty(self, simple_index):
         # GH35712
         if isinstance(simple_index, (PeriodIndex, RangeIndex)):
             pytest.skip("Tested elsewhere")
         empty_idx = type(simple_index)([])
-        assert empty_idx.format() == []
-        assert empty_idx.format(name=True) == [""]
+        msg = r"Index\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert empty_idx.format() == []
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert empty_idx.format(name=True) == [""]
 
     def test_fillna(self, index):
         # GH 11343
@@ -955,7 +960,9 @@ def test_format(self, simple_index):
         idx = simple_index
         max_width = max(len(str(x)) for x in idx)
         expected = [str(x).ljust(max_width) for x in idx]
-        assert idx.format() == expected
+        msg = r"Index\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
 
     def test_insert_non_na(self, simple_index):
         # GH#43921 inserting an element that we know we can hold should
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index e4ce969daab53..18af18ade85f4 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -809,7 +809,7 @@ def test_to_excel_multiindex_cols(self, merge_cells, frame, path):
                 reader, sheet_name="test1", header=header, index_col=[0, 1]
             )
         if not merge_cells:
-            fm = frame.columns.format(sparsify=False, adjoin=False, names=False)
+            fm = frame.columns._format_multi(sparsify=False, include_names=False)
             frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
         tm.assert_frame_equal(frame, df)
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 57f1f082708ae..d18c333d79244 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -3333,7 +3333,9 @@ def test_period_format_and_strftime_default(self):
         per = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="h")
 
         # Default formatting
-        formatted = per.format()
+        msg = "PeriodIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = per.format()
         assert formatted[0] == "2003-01-01 12:00"  # default: minutes not shown
         assert formatted[1] == "NaT"
         # format is equivalent to strftime(None)...
@@ -3342,35 +3344,40 @@ def test_period_format_and_strftime_default(self):
 
         # Same test with nanoseconds freq
         per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="ns")
-        formatted = per.format()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = per.format()
         assert (formatted == per.strftime(None)).all()
         assert formatted[0] == "2003-01-01 12:01:01.123456789"
         assert formatted[1] == "2003-01-01 12:01:01.123456790"
 
     def test_period_custom(self):
         # GH#46252 custom formatting directives %l (ms) and %u (us)
+        msg = "PeriodIndex.format is deprecated"
 
         # 3 digits
         per = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="ms")
-        formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
         assert formatted[0] == "03 12:01:01 (ms=123 us=123000 ns=123000000)"
         assert formatted[1] == "03 12:01:01 (ms=124 us=124000 ns=124000000)"
 
         # 6 digits
         per = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="us")
-        formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
         assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456000)"
         assert formatted[1] == "03 12:01:01 (ms=123 us=123457 ns=123457000)"
 
         # 9 digits
         per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="ns")
-        formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
         assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456789)"
         assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)"
 
     def test_period_tz(self):
         # Formatting periods created from a datetime with timezone.
-
+        msg = r"PeriodIndex\.format is deprecated"
         # This timestamp is in 2013 in Europe/Paris but is 2012 in UTC
         dt = pd.to_datetime(["2013-01-01 00:00:00+01:00"], utc=True)
 
@@ -3378,13 +3385,15 @@ def test_period_tz(self):
         # Since tz is currently set as utc, we'll see 2012
         with tm.assert_produces_warning(UserWarning, match="will drop timezone"):
             per = dt.to_period(freq="h")
-        assert per.format()[0] == "2012-12-31 23:00"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert per.format()[0] == "2012-12-31 23:00"
 
         # If tz is currently set as paris before conversion, we'll see 2013
         dt = dt.tz_convert("Europe/Paris")
         with tm.assert_produces_warning(UserWarning, match="will drop timezone"):
             per = dt.to_period(freq="h")
-        assert per.format()[0] == "2013-01-01 00:00"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert per.format()[0] == "2013-01-01 00:00"
 
     @pytest.mark.parametrize(
         "locale_str",
@@ -3411,7 +3420,9 @@ def test_period_non_ascii_fmt(self, locale_str):
 
             # Index
             per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h")
-            formatted = per.format(date_format="%y é")
+            msg = "PeriodIndex.format is deprecated"
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                formatted = per.format(date_format="%y é")
             assert formatted[0] == "03 é"
             assert formatted[1] == "03 é"
 
@@ -3443,33 +3454,45 @@ def test_period_custom_locale_directive(self, locale_str):
 
             # Index
             per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h")
-            formatted = per.format(date_format="%y %I:%M:%S%p")
+            msg = "PeriodIndex.format is deprecated"
+            with tm.assert_produces_warning(FutureWarning, match=msg):
+                formatted = per.format(date_format="%y %I:%M:%S%p")
             assert formatted[0] == f"03 01:00:00{am_local}"
             assert formatted[1] == f"03 01:00:00{pm_local}"
 
 
 class TestDatetimeIndexFormat:
     def test_datetime(self):
-        formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
         assert formatted[0] == "2003-01-01 12:00:00"
         assert formatted[1] == "NaT"
 
     def test_date(self):
-        formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
         assert formatted[0] == "2003-01-01"
         assert formatted[1] == "NaT"
 
     def test_date_tz(self):
-        formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format()
+        dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dti.format()
         assert formatted[0] == "2013-01-01 00:00:00+00:00"
 
-        formatted = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True).format()
+        dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dti.format()
         assert formatted[0] == "2013-01-01 00:00:00+00:00"
 
     def test_date_explicit_date_format(self):
-        formatted = pd.to_datetime([datetime(2003, 2, 1), NaT]).format(
-            date_format="%m-%d-%Y", na_rep="UT"
-        )
+        dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
         assert formatted[0] == "02-01-2003"
         assert formatted[1] == "UT"
 
diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
index 86474a38d29fb..86addb9dadfad 100644
--- a/pandas/tests/series/test_repr.py
+++ b/pandas/tests/series/test_repr.py
@@ -258,7 +258,9 @@ def test_index_repr_in_frame_with_nan(self):
 
     def test_format_pre_1900_dates(self):
         rng = date_range("1/1/1850", "1/1/1950", freq="Y-DEC")
-        rng.format()
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            rng.format()
         ts = Series(1, index=rng)
         repr(ts)
 

From b5b8be04b2a63fc02e89650a740779e718d7a99b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 13 Oct 2023 11:41:56 -0700
Subject: [PATCH 14/19] REF: de-duplicate some Timedelta helpers (#55501)

* REF: implement disallow_ambiguous_unit

* REF: implement get_unit_for_round
---
 pandas/_libs/tslibs/timedeltas.pxd |  1 +
 pandas/_libs/tslibs/timedeltas.pyi |  2 ++
 pandas/_libs/tslibs/timedeltas.pyx | 27 ++++++++++++++++++---------
 pandas/_libs/tslibs/timestamps.pyx |  7 ++-----
 pandas/core/arrays/datetimelike.py |  6 ++----
 pandas/core/indexes/timedeltas.py  |  7 ++-----
 pandas/core/tools/timedeltas.py    |  8 ++------
 7 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd
index fb6e29a8932a1..f3473e46b6699 100644
--- a/pandas/_libs/tslibs/timedeltas.pxd
+++ b/pandas/_libs/tslibs/timedeltas.pxd
@@ -4,6 +4,7 @@ from numpy cimport int64_t
 from .np_datetime cimport NPY_DATETIMEUNIT
 
 
+cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1
 # Exposed for tslib, not intended for outside use.
 cpdef int64_t delta_to_nanoseconds(
     delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi
index 6d993722ce1d4..181703c5f55b2 100644
--- a/pandas/_libs/tslibs/timedeltas.pyi
+++ b/pandas/_libs/tslibs/timedeltas.pyi
@@ -68,6 +68,8 @@ UnitChoices: TypeAlias = Literal[
 ]
 _S = TypeVar("_S", bound=timedelta)
 
+def get_unit_for_round(freq, creso: int) -> int: ...
+def disallow_ambiguous_unit(unit: str | None) -> None: ...
 def ints_to_pytimedelta(
     arr: npt.NDArray[np.timedelta64],
     box: bool = ...,
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index a573d9a8ed0c0..5e124b89eab5e 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -827,6 +827,14 @@ def _binary_op_method_timedeltalike(op, name):
 # ----------------------------------------------------------------------
 # Timedelta Construction
 
+cpdef disallow_ambiguous_unit(unit):
+    if unit in {"Y", "y", "M"}:
+        raise ValueError(
+            "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
+            "represent unambiguous timedelta values durations."
+        )
+
+
 cdef int64_t parse_iso_format_string(str ts) except? -1:
     """
     Extracts and cleanses the appropriate values from a match object with
@@ -1815,11 +1823,7 @@ class Timedelta(_Timedelta):
                 )
                 raise OutOfBoundsTimedelta(msg) from err
 
-        if unit in {"Y", "y", "M"}:
-            raise ValueError(
-                "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
-                "represent unambiguous timedelta values durations."
-            )
+        disallow_ambiguous_unit(unit)
 
         # GH 30543 if pd.Timedelta already passed, return it
         # check that only value is passed
@@ -1932,10 +1936,7 @@ class Timedelta(_Timedelta):
             int64_t result, unit
             ndarray[int64_t] arr
 
-        from pandas._libs.tslibs.offsets import to_offset
-
-        to_offset(freq).nanos  # raises on non-fixed freq
-        unit = delta_to_nanoseconds(to_offset(freq), self._creso)
+        unit = get_unit_for_round(freq, self._creso)
 
         arr = np.array([self._value], dtype="i8")
         try:
@@ -2292,3 +2293,11 @@ cdef bint _should_cast_to_timedelta(object obj):
     return (
         is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str)
     )
+
+
+cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1:
+    from pandas._libs.tslibs.offsets import to_offset
+
+    freq = to_offset(freq)
+    freq.nanos  # raises on non-fixed freq
+    return delta_to_nanoseconds(freq, creso)
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index edd061fd8cdf1..66b1cec63e9e9 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -107,7 +107,7 @@ from pandas._libs.tslibs.np_datetime import (
 from pandas._libs.tslibs.offsets cimport to_offset
 from pandas._libs.tslibs.timedeltas cimport (
     _Timedelta,
-    delta_to_nanoseconds,
+    get_unit_for_round,
     is_any_td_scalar,
 )
 
@@ -1896,8 +1896,7 @@ class Timestamp(_Timestamp):
             int64_t nanos
 
         freq = to_offset(freq, is_period=False)
-        freq.nanos  # raises on non-fixed freq
-        nanos = delta_to_nanoseconds(freq, self._creso)
+        nanos = get_unit_for_round(freq, self._creso)
         if nanos == 0:
             if freq.nanos == 0:
                 raise ValueError("Division by zero in rounding")
@@ -1905,8 +1904,6 @@ class Timestamp(_Timestamp):
             # e.g. self.unit == "s" and sub-second freq
             return self
 
-        # TODO: problem if nanos==0
-
         if self.tz is not None:
             value = self.tz_localize(None)._value
         else:
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index fd51303ebd55f..22ffaceeff1bb 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -36,7 +36,6 @@
     Timedelta,
     Timestamp,
     astype_overflowsafe,
-    delta_to_nanoseconds,
     get_unit_from_dtype,
     iNaT,
     ints_to_pydatetime,
@@ -49,6 +48,7 @@
     round_nsint64,
 )
 from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
+from pandas._libs.tslibs.timedeltas import get_unit_for_round
 from pandas._libs.tslibs.timestamps import integer_op_not_supported
 from pandas._typing import (
     ArrayLike,
@@ -2129,9 +2129,7 @@ def _round(self, freq, mode, ambiguous, nonexistent):
 
         values = self.view("i8")
         values = cast(np.ndarray, values)
-        offset = to_offset(freq)
-        offset.nanos  # raises on non-fixed frequencies
-        nanos = delta_to_nanoseconds(offset, self._creso)
+        nanos = get_unit_for_round(freq, self._creso)
         if nanos == 0:
             # GH 52761
             return self.copy()
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index b1d8d0efb60e8..9d8ef5b0a69cd 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -13,6 +13,7 @@
     Timedelta,
     to_offset,
 )
+from pandas._libs.tslibs.timedeltas import disallow_ambiguous_unit
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
@@ -170,11 +171,7 @@ def __new__(
         if is_scalar(data):
             cls._raise_scalar_data_error(data)
 
-        if unit in {"Y", "y", "M"}:
-            raise ValueError(
-                "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
-                "represent unambiguous timedelta values durations."
-            )
+        disallow_ambiguous_unit(unit)
         if dtype is not None:
             dtype = pandas_dtype(dtype)
 
diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
index 8db77725a1aa3..587946aba5041 100644
--- a/pandas/core/tools/timedeltas.py
+++ b/pandas/core/tools/timedeltas.py
@@ -17,6 +17,7 @@
 )
 from pandas._libs.tslibs.timedeltas import (
     Timedelta,
+    disallow_ambiguous_unit,
     parse_timedelta_unit,
 )
 
@@ -178,16 +179,11 @@ def to_timedelta(
     """
     if unit is not None:
         unit = parse_timedelta_unit(unit)
+        disallow_ambiguous_unit(unit)
 
     if errors not in ("ignore", "raise", "coerce"):
         raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.")
 
-    if unit in {"Y", "y", "M"}:
-        raise ValueError(
-            "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
-            "represent unambiguous timedelta values durations."
-        )
-
     if arg is None:
         return arg
     elif isinstance(arg, ABCSeries):

From 0021d241c6aa1b8db91151361b48d7864201fd01 Mon Sep 17 00:00:00 2001
From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com>
Date: Sat, 14 Oct 2023 09:58:45 +0200
Subject: [PATCH 15/19] =?UTF-8?q?DEPR:=20rename=20=E2=80=98BM=E2=80=99/?=
 =?UTF-8?q?=E2=80=98CBM=E2=80=99=20to=20=E2=80=98BME=E2=80=99/=E2=80=98CBM?=
 =?UTF-8?q?E=E2=80=99=20for=20offsets=20(#55496)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* rename BM, CBM to BME, CBME for offsets

* fix tests

* correct docs

* add tests, add notes to 2.2.0 whatsnew
---
 doc/source/user_guide/timeseries.rst             | 16 ++++++++--------
 doc/source/whatsnew/v2.2.0.rst                   |  6 +++++-
 pandas/_libs/tslibs/dtypes.pyx                   |  4 +++-
 pandas/_libs/tslibs/offsets.pyx                  | 10 +++++-----
 pandas/core/generic.py                           |  4 ++--
 pandas/core/resample.py                          |  4 ++--
 pandas/tests/frame/methods/test_asfreq.py        | 10 +++++-----
 .../indexes/datetimes/methods/test_shift.py      |  2 +-
 .../indexes/datetimes/methods/test_to_period.py  |  2 +-
 pandas/tests/indexes/datetimes/test_datetime.py  | 14 ++++++++++++++
 pandas/tests/indexes/datetimes/test_misc.py      |  2 +-
 .../tests/indexes/multi/test_get_level_values.py |  2 +-
 pandas/tests/plotting/test_datetimelike.py       |  2 +-
 pandas/tests/resample/test_datetime_index.py     | 15 +++++++++++++--
 .../tests/tseries/frequencies/test_inference.py  |  2 +-
 pandas/tests/tseries/offsets/test_offsets.py     |  8 ++++----
 pandas/tseries/frequencies.py                    |  2 +-
 17 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 9f3077e266e98..7ffbd9b2d740a 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -461,7 +461,7 @@ of those specified will not be generated:
 
 .. ipython:: python
 
-   pd.date_range(start, end, freq="BM")
+   pd.date_range(start, end, freq="BME")
 
    pd.date_range(start, end, freq="W")
 
@@ -557,7 +557,7 @@ intelligent functionality like selection, slicing, etc.
 
 .. ipython:: python
 
-   rng = pd.date_range(start, end, freq="BM")
+   rng = pd.date_range(start, end, freq="BME")
    ts = pd.Series(np.random.randn(len(rng)), index=rng)
    ts.index
    ts[:5].index
@@ -884,9 +884,9 @@ into ``freq`` keyword arguments. The available date offsets and associated frequ
     :class:`~pandas.tseries.offsets.LastWeekOfMonth`, ``'LWOM'``, "the x-th day of the last week of each month"
     :class:`~pandas.tseries.offsets.MonthEnd`, ``'ME'``, "calendar month end"
     :class:`~pandas.tseries.offsets.MonthBegin`, ``'MS'``, "calendar month begin"
-    :class:`~pandas.tseries.offsets.BMonthEnd` or :class:`~pandas.tseries.offsets.BusinessMonthEnd`, ``'BM'``, "business month end"
+    :class:`~pandas.tseries.offsets.BMonthEnd` or :class:`~pandas.tseries.offsets.BusinessMonthEnd`, ``'BME'``, "business month end"
     :class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin"
-    :class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBM'``, "custom business month end"
+    :class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBME'``, "custom business month end"
     :class:`~pandas.tseries.offsets.CBMonthBegin` or :class:`~pandas.tseries.offsets.CustomBusinessMonthBegin`, ``'CBMS'``, "custom business month begin"
     :class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SM'``, "15th (or other day_of_month) and calendar month end"
     :class:`~pandas.tseries.offsets.SemiMonthBegin`, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
@@ -1248,8 +1248,8 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "W", "weekly frequency"
     "ME", "month end frequency"
     "SM", "semi-month end frequency (15th and end of month)"
-    "BM", "business month end frequency"
-    "CBM", "custom business month end frequency"
+    "BME", "business month end frequency"
+    "CBME", "custom business month end frequency"
     "MS", "month start frequency"
     "SMS", "semi-month start frequency (1st and 15th)"
     "BMS", "business month start frequency"
@@ -1586,7 +1586,7 @@ rather than changing the alignment of the data and the index:
 
    ts.shift(5, freq="D")
    ts.shift(5, freq=pd.offsets.BDay())
-   ts.shift(5, freq="BM")
+   ts.shift(5, freq="BME")
 
 Note that with when ``freq`` is specified, the leading entry is no longer NaN
 because the data is not being realigned.
@@ -1692,7 +1692,7 @@ the end of the interval.
 .. warning::
 
     The default values for ``label`` and ``closed`` is '**left**' for all
-    frequency offsets except for 'ME', 'Y', 'Q', 'BM', 'BY', 'BQ', and 'W'
+    frequency offsets except for 'ME', 'Y', 'Q', 'BME', 'BY', 'BQ', and 'W'
     which all have a default of 'right'.
 
     This might unintendedly lead to looking ahead, where the value for a later
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 93ca2541d7ecd..ef59c86a21598 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -253,7 +253,11 @@ Other Deprecations
 - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
 - Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
 - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
-- Deprecated string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`52536`)
+- Deprecated string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`)
+- Deprecated string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`54275`)
+- Deprecated string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`)
+- Deprecated string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`54275`)
+- Deprecated strings ``BM``, and ``CBM`` denoting frequencies in :class:`BusinessMonthEnd`, :class:`CustomBusinessMonthEnd` (:issue:`52064`)
 - Deprecated strings ``H``, ``BH``, and ``CBH`` denoting frequencies in :class:`Hour`, :class:`BusinessHour`, :class:`CustomBusinessHour` (:issue:`52536`)
 - Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx
index 26181d8f15518..370917df4dca6 100644
--- a/pandas/_libs/tslibs/dtypes.pyx
+++ b/pandas/_libs/tslibs/dtypes.pyx
@@ -188,7 +188,7 @@ cdef dict _abbrev_to_attrnames = {v: k for k, v in attrname_to_abbrevs.items()}
 OFFSET_TO_PERIOD_FREQSTR: dict = {
     "WEEKDAY": "D",
     "EOM": "M",
-    "BM": "M",
+    "BME": "M",
     "BQS": "Q",
     "QS": "Q",
     "BQ": "Q",
@@ -280,6 +280,8 @@ DEPR_ABBREVS: dict[str, str]= {
     "BAS-SEP": "BYS-SEP",
     "BAS-OCT": "BYS-OCT",
     "BAS-NOV": "BYS-NOV",
+    "BM": "BME",
+    "CBM": "CBME",
     "H": "h",
     "BH": "bh",
     "CBH": "cbh",
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 6c5cdde20da5f..f20073766bc3a 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -2935,7 +2935,7 @@ cdef class BusinessMonthEnd(MonthOffset):
     >>> pd.offsets.BMonthEnd().rollforward(ts)
     Timestamp('2022-11-30 00:00:00')
     """
-    _prefix = "BM"
+    _prefix = "BME"
     _day_opt = "business_end"
 
 
@@ -4465,10 +4465,10 @@ cdef class CustomBusinessMonthEnd(_CustomBusinessMonth):
     >>> freq = pd.offsets.CustomBusinessMonthEnd(calendar=bdc)
     >>> pd.date_range(dt.datetime(2022, 7, 10), dt.datetime(2022, 11, 10), freq=freq)
     DatetimeIndex(['2022-07-29', '2022-08-31', '2022-09-29', '2022-10-28'],
-                   dtype='datetime64[ns]', freq='CBM')
+                   dtype='datetime64[ns]', freq='CBME')
     """
 
-    _prefix = "CBM"
+    _prefix = "CBME"
 
 
 cdef class CustomBusinessMonthBegin(_CustomBusinessMonth):
@@ -4551,12 +4551,12 @@ prefix_mapping = {
         BYearEnd,  # 'BY'
         BusinessDay,  # 'B'
         BusinessMonthBegin,  # 'BMS'
-        BusinessMonthEnd,  # 'BM'
+        BusinessMonthEnd,  # 'BME'
         BQuarterEnd,  # 'BQ'
         BQuarterBegin,  # 'BQS'
         BusinessHour,  # 'bh'
         CustomBusinessDay,  # 'C'
-        CustomBusinessMonthEnd,  # 'CBM'
+        CustomBusinessMonthEnd,  # 'CBME'
         CustomBusinessMonthBegin,  # 'CBMS'
         CustomBusinessHour,  # 'cbh'
         MonthEnd,  # 'ME'
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a7183a9d9498a..f1ecc57335a51 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9187,11 +9187,11 @@ def resample(
                 Use frame.T.resample(...) instead.
         closed : {{'right', 'left'}}, default None
             Which side of bin interval is closed. The default is 'left'
-            for all frequency offsets except for 'ME', 'Y', 'Q', 'BM',
+            for all frequency offsets except for 'ME', 'Y', 'Q', 'BME',
             'BA', 'BQ', and 'W' which all have a default of 'right'.
         label : {{'right', 'left'}}, default None
             Which bin edge label to label bucket with. The default is 'left'
-            for all frequency offsets except for 'ME', 'Y', 'Q', 'BM',
+            for all frequency offsets except for 'ME', 'Y', 'Q', 'BME',
             'BA', 'BQ', and 'W' which all have a default of 'right'.
         convention : {{'start', 'end', 's', 'e'}}, default 'start'
             For `PeriodIndex` only, controls whether to use the start or
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 8b3071a6f8582..969748209772a 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -2101,7 +2101,7 @@ def __init__(
         else:
             freq = to_offset(freq)
 
-        end_types = {"ME", "Y", "Q", "BM", "BY", "BQ", "W"}
+        end_types = {"ME", "Y", "Q", "BME", "BY", "BQ", "W"}
         rule = freq.rule_code
         if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
             if closed is None:
@@ -2297,7 +2297,7 @@ def _adjust_bin_edges(
     ) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:
         # Some hacks for > daily data, see #1471, #1458, #1483
 
-        if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
+        if self.freq.name in ("BME", "ME", "W") or self.freq.name.split("-")[0] in (
             "BQ",
             "BY",
             "Q",
diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py
index b3ab11d07bd7e..bc6e74d5b1f00 100644
--- a/pandas/tests/frame/methods/test_asfreq.py
+++ b/pandas/tests/frame/methods/test_asfreq.py
@@ -32,16 +32,16 @@ def test_asfreq2(self, frame_or_series):
                     datetime(2009, 11, 30),
                     datetime(2009, 12, 31),
                 ],
-                freq="BM",
+                freq="BME",
             ),
         )
 
         daily_ts = ts.asfreq("B")
-        monthly_ts = daily_ts.asfreq("BM")
+        monthly_ts = daily_ts.asfreq("BME")
         tm.assert_equal(monthly_ts, ts)
 
         daily_ts = ts.asfreq("B", method="pad")
-        monthly_ts = daily_ts.asfreq("BM")
+        monthly_ts = daily_ts.asfreq("BME")
         tm.assert_equal(monthly_ts, ts)
 
         daily_ts = ts.asfreq(offsets.BDay())
@@ -140,12 +140,12 @@ def test_asfreq_resample_set_correct_freq(self, frame_or_series):
     def test_asfreq_empty(self, datetime_frame):
         # test does not blow up on length-0 DataFrame
         zero_length = datetime_frame.reindex([])
-        result = zero_length.asfreq("BM")
+        result = zero_length.asfreq("BME")
         assert result is not zero_length
 
     def test_asfreq(self, datetime_frame):
         offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd())
-        rule_monthly = datetime_frame.asfreq("BM")
+        rule_monthly = datetime_frame.asfreq("BME")
 
         tm.assert_frame_equal(offset_monthly, rule_monthly)
 
diff --git a/pandas/tests/indexes/datetimes/methods/test_shift.py b/pandas/tests/indexes/datetimes/methods/test_shift.py
index 064f664a4de10..c50f75894d810 100644
--- a/pandas/tests/indexes/datetimes/methods/test_shift.py
+++ b/pandas/tests/indexes/datetimes/methods/test_shift.py
@@ -157,6 +157,6 @@ def test_shift_bmonth(self):
 
     def test_shift_empty(self):
         # GH#14811
-        dti = date_range(start="2016-10-21", end="2016-10-21", freq="BM")
+        dti = date_range(start="2016-10-21", end="2016-10-21", freq="BME")
         result = dti.shift(1)
         tm.assert_index_equal(result, dti)
diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py
index 6839fafcdc114..d95cd6f3a2cc5 100644
--- a/pandas/tests/indexes/datetimes/methods/test_to_period.py
+++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py
@@ -63,7 +63,7 @@ def test_to_period_annualish(self, off):
         assert prng.freq == "Y-DEC"
 
     def test_to_period_monthish(self):
-        offsets = ["MS", "BM"]
+        offsets = ["MS", "BME"]
         for off in offsets:
             rng = date_range("01-Jan-2012", periods=8, freq=off)
             prng = rng.to_period()
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index a18501a193b60..3b0b856d07673 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -264,3 +264,17 @@ def test_AS_BA_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
         )
 
         tm.assert_index_equal(result, expected)
+
+    def test_BM_deprecated(self):
+        # GH#52064
+        msg = "'BM' is deprecated and will be removed in a future version."
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = date_range(start="2016-02-21", end="2016-08-21", freq="2BM")
+        result = DatetimeIndex(
+            ["2016-02-29", "2016-04-29", "2016-06-30"],
+            dtype="datetime64[ns]",
+            freq="2BME",
+        )
+
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index 0a5287d154adc..d86d78ba47c5b 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -141,7 +141,7 @@ def test_datetimeindex_accessors4(self):
 
     def test_datetimeindex_accessors5(self):
         freq_m = to_offset("ME")
-        bm = to_offset("BM")
+        bm = to_offset("BME")
         qfeb = to_offset("Q-FEB")
         qsfeb = to_offset("QS-FEB")
         bq = to_offset("BQ")
diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py
index 84907f5279876..28c77e78924cb 100644
--- a/pandas/tests/indexes/multi/test_get_level_values.py
+++ b/pandas/tests/indexes/multi/test_get_level_values.py
@@ -115,7 +115,7 @@ def test_get_level_values_when_periods():
 
 def test_values_loses_freq_of_underlying_index():
     # GH#49054
-    idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BM"))
+    idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME"))
     expected = idx.copy(deep=True)
     idx2 = Index([1, 2, 3])
     midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]])
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index db7c0cec09e6c..a384fd9cdc8f2 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -360,7 +360,7 @@ def test_business_freq(self):
             assert PeriodIndex(data=idx).freqstr == "B"
 
     def test_business_freq_convert(self):
-        bts = tm.makeTimeSeries(300).asfreq("BM")
+        bts = tm.makeTimeSeries(300).asfreq("BME")
         ts = bts.to_period("M")
         _, ax = mpl.pyplot.subplots()
         bts.plot(ax=ax)
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index f66f5bf50974e..e0ba7902a8a6c 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -498,12 +498,12 @@ def test_resample_how_method(unit):
 
 def test_resample_extra_index_point(unit):
     # GH#9756
-    index = date_range(start="20150101", end="20150331", freq="BM").as_unit(unit)
+    index = date_range(start="20150101", end="20150331", freq="BME").as_unit(unit)
     expected = DataFrame({"A": Series([21, 41, 63], index=index)})
 
     index = date_range(start="20150101", end="20150331", freq="B").as_unit(unit)
     df = DataFrame({"A": Series(range(len(index)), index=index)}, dtype="int64")
-    result = df.resample("BM").last()
+    result = df.resample("BME").last()
     tm.assert_frame_equal(result, expected)
 
 
@@ -2020,6 +2020,17 @@ def test_resample_M_deprecated():
     tm.assert_series_equal(result, expected)
 
 
+def test_resample_BM_deprecated():
+    # GH#52064
+    depr_msg = "'BM' is deprecated and will be removed in a future version."
+
+    s = Series(range(10), index=date_range("20130101", freq="d", periods=10))
+    expected = s.resample("2BME").mean()
+    with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+        result = s.resample("2BM").mean()
+    tm.assert_series_equal(result, expected)
+
+
 def test_resample_ms_closed_right():
     # https://github.com/pandas-dev/pandas/issues/55271
     dti = date_range(start="2020-01-31", freq="1min", periods=6000)
diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py
index 22ff7f8405a40..aec9915d69e3c 100644
--- a/pandas/tests/tseries/frequencies/test_inference.py
+++ b/pandas/tests/tseries/frequencies/test_inference.py
@@ -53,7 +53,7 @@ def base_delta_code_pair(request):
 freqs = (
     [f"Q-{month}" for month in MONTHS]
     + [f"{annual}-{month}" for annual in ["Y", "BY"] for month in MONTHS]
-    + ["ME", "BM", "BMS"]
+    + ["ME", "BME", "BMS"]
     + [f"WOM-{count}{day}" for count in range(1, 5) for day in DAYS]
     + [f"W-{day}" for day in DAYS]
 )
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 9389f78c9e672..5678dd1fb511e 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -757,7 +757,7 @@ class TestOffsetNames:
     def test_get_offset_name(self):
         assert BDay().freqstr == "B"
         assert BDay(2).freqstr == "2B"
-        assert BMonthEnd().freqstr == "BM"
+        assert BMonthEnd().freqstr == "BME"
         assert Week(weekday=0).freqstr == "W-MON"
         assert Week(weekday=1).freqstr == "W-TUE"
         assert Week(weekday=2).freqstr == "W-WED"
@@ -776,8 +776,8 @@ def test_get_offset():
     pairs = [
         ("B", BDay()),
         ("b", BDay()),
-        ("bm", BMonthEnd()),
-        ("Bm", BMonthEnd()),
+        ("bme", BMonthEnd()),
+        ("Bme", BMonthEnd()),
         ("W-MON", Week(weekday=0)),
         ("W-TUE", Week(weekday=1)),
         ("W-WED", Week(weekday=2)),
@@ -811,7 +811,7 @@ def test_alias_equality(self):
             assert k == v.copy()
 
     def test_rule_code(self):
-        lst = ["ME", "MS", "BM", "BMS", "D", "B", "h", "min", "s", "ms", "us"]
+        lst = ["ME", "MS", "BME", "BMS", "D", "B", "h", "min", "s", "ms", "us"]
         for k in lst:
             assert k == _get_offset(k).rule_code
             # should be cached - this is kind of an internals test...
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index db4fdf0d24465..4bd558a30c92f 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -369,7 +369,7 @@ def _get_monthly_rule(self) -> str | None:
         if pos_check is None:
             return None
         else:
-            return {"cs": "MS", "bs": "BMS", "ce": "ME", "be": "BM"}.get(pos_check)
+            return {"cs": "MS", "bs": "BMS", "ce": "ME", "be": "BME"}.get(pos_check)
 
     def _is_business_daily(self) -> bool:
         # quick check: cannot be business daily

From 10cf330662b34a2686722abe5fab35009fb3ee9a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 15 Oct 2023 03:27:59 +0200
Subject: [PATCH 16/19] CLN/TST: clean logic of old datetime test_indexing test
 (#55523)

---
 pandas/tests/series/indexing/test_datetime.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index 84cf80fa1ffce..fc1c80eb4dec6 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -427,10 +427,10 @@ def test_indexing():
     # getting
 
     # GH 3070, make sure semantics work on Series/Frame
-    expected = ts["2001"]
-    expected.name = "A"
+    result = ts["2001"]
+    tm.assert_series_equal(result, ts.iloc[:12])
 
-    df = DataFrame({"A": ts})
+    df = DataFrame({"A": ts.copy()})
 
     # GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
     #  like any other key, so raises
@@ -438,14 +438,16 @@ def test_indexing():
         df["2001"]
 
     # setting
+    ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
+    expected = ts.copy()
+    expected.iloc[:12] = 1
     ts["2001"] = 1
-    expected = ts["2001"]
-    expected.name = "A"
+    tm.assert_series_equal(ts, expected)
 
+    expected = df.copy()
+    expected.iloc[:12, 0] = 1
     df.loc["2001", "A"] = 1
-
-    with pytest.raises(KeyError, match="2001"):
-        df["2001"]
+    tm.assert_frame_equal(df, expected)
 
 
 def test_getitem_str_month_with_datetimeindex():

From 68e3c4b2f855e6e9a8469aeca6eb73ae60327160 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 15 Oct 2023 15:39:09 +0200
Subject: [PATCH 17/19] BUG: idxmax raising for arrow strings (#55384)

---
 pandas/core/arrays/arrow/array.py     | 11 ++++++++++-
 pandas/core/arrays/string_arrow.py    | 11 +++++++++++
 pandas/tests/frame/test_reductions.py |  9 +++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index c91f892936640..a00640a88d7fb 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -1627,6 +1627,15 @@ def _reduce(
         ------
         TypeError : subclass does not define reductions
         """
+        result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
+        if isinstance(result, pa.Array):
+            return type(self)(result)
+        else:
+            return result
+
+    def _reduce_calc(
+        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
+    ):
         pa_result = self._reduce_pyarrow(name, skipna=skipna, **kwargs)
 
         if keepdims:
@@ -1637,7 +1646,7 @@ def _reduce(
                     [pa_result],
                     type=to_pyarrow_type(infer_dtype_from_scalar(pa_result)[0]),
                 )
-            return type(self)(result)
+            return result
 
         if pc.is_null(pa_result).as_py():
             return self.dtype.na_value
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 24b99b5d4852e..2a10e87981bc3 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -502,6 +502,17 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
     def _convert_int_dtype(self, result):
         return Int64Dtype().__from_arrow__(result)
 
+    def _reduce(
+        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
+    ):
+        result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
+        if name in ("argmin", "argmax") and isinstance(result, pa.Array):
+            return self._convert_int_dtype(result)
+        elif isinstance(result, pa.Array):
+            return type(self)(result)
+        else:
+            return result
+
     def _rank(
         self,
         *,
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 0d5c2e3cd6c13..a17dc4a789fe3 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1073,6 +1073,15 @@ def test_idxmax_arrow_types(self):
         expected = Series([2, 1], index=["a", "b"])
         tm.assert_series_equal(result, expected)
 
+        df = DataFrame({"a": ["b", "c", "a"]}, dtype="string[pyarrow]")
+        result = df.idxmax(numeric_only=False)
+        expected = Series([1], index=["a"])
+        tm.assert_series_equal(result, expected)
+
+        result = df.idxmin(numeric_only=False)
+        expected = Series([2], index=["a"])
+        tm.assert_series_equal(result, expected)
+
     def test_idxmax_axis_2(self, float_frame):
         frame = float_frame
         msg = "No axis named 2 for object type DataFrame"

From 7b8c6f6b410331f7b83348cb5f8812a58dab2b39 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Sun, 15 Oct 2023 15:43:58 +0200
Subject: [PATCH 18/19] DOC: Adjust user guide for CoW docs (#55337)

---
 doc/source/user_guide/copy_on_write.rst | 130 ++++++++++++++----------
 1 file changed, 75 insertions(+), 55 deletions(-)

diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst
index 59bdb1926895f..d0c57b56585db 100644
--- a/doc/source/user_guide/copy_on_write.rst
+++ b/doc/source/user_guide/copy_on_write.rst
@@ -7,8 +7,8 @@ Copy-on-Write (CoW)
 *******************
 
 Copy-on-Write was first introduced in version 1.5.0. Starting from version 2.0 most of the
-optimizations that become possible through CoW are implemented and supported. A complete list
-can be found at :ref:`Copy-on-Write optimizations <copy_on_write.optimizations>`.
+optimizations that become possible through CoW are implemented and supported. All possible
+optimizations are supported starting from pandas 2.1.
 
 We expect that CoW will be enabled by default in version 3.0.
 
@@ -154,6 +154,77 @@ With copy on write this can be done by using ``loc``.
 
     df.loc[df["bar"] > 5, "foo"] = 100
 
+Read-only NumPy arrays
+----------------------
+
+Accessing the underlying NumPy array of a DataFrame will return a read-only array if the array
+shares data with the initial DataFrame:
+
+The array is a copy if the initial DataFrame consists of more than one array:
+
+
+.. ipython:: python
+
+    df = pd.DataFrame({"a": [1, 2], "b": [1.5, 2.5]})
+    df.to_numpy()
+
+The array shares data with the DataFrame if the DataFrame consists of only one NumPy array:
+
+.. ipython:: python
+
+    df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    df.to_numpy()
+
+This array is read-only, which means that it can't be modified inplace:
+
+.. ipython:: python
+    :okexcept:
+
+    arr = df.to_numpy()
+    arr[0, 0] = 100
+
+The same holds true for a Series, since a Series always consists of a single array.
+
+There are two potential solution to this:
+
+- Trigger a copy manually if you want to avoid updating DataFrames that share memory with your array.
+- Make the array writeable. This is a more performant solution but circumvents Copy-on-Write rules, so
+  it should be used with caution.
+
+.. ipython:: python
+
+    arr = df.to_numpy()
+    arr.flags.writeable = True
+    arr[0, 0] = 100
+    arr
+
+Patterns to avoid
+-----------------
+
+No defensive copy will be performed if two objects share the same data while
+you are modifying one object inplace.
+
+.. ipython:: python
+
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df2 = df.reset_index()
+    df2.iloc[0, 0] = 100
+
+This creates two objects that share data and thus the setitem operation will trigger a
+copy. This is not necessary if the initial object ``df`` isn't needed anymore.
+Simply reassigning to the same variable will invalidate the reference that is
+held by the object.
+
+.. ipython:: python
+
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df = df.reset_index()
+    df.iloc[0, 0] = 100
+
+No copy is necessary in this example.
+Creating multiple references keeps unnecessary references alive
+and thus will hurt performance with Copy-on-Write.
+
 .. _copy_on_write.optimizations:
 
 Copy-on-Write optimizations
@@ -161,59 +232,8 @@ Copy-on-Write optimizations
 
 A new lazy copy mechanism that defers the copy until the object in question is modified
 and only if this object shares data with another object. This mechanism was added to
-following methods:
-
-  - :meth:`DataFrame.reset_index` / :meth:`Series.reset_index`
-  - :meth:`DataFrame.set_index`
-  - :meth:`DataFrame.set_axis` / :meth:`Series.set_axis`
-  - :meth:`DataFrame.set_flags` / :meth:`Series.set_flags`
-  - :meth:`DataFrame.rename_axis` / :meth:`Series.rename_axis`
-  - :meth:`DataFrame.reindex` / :meth:`Series.reindex`
-  - :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like`
-  - :meth:`DataFrame.assign`
-  - :meth:`DataFrame.drop`
-  - :meth:`DataFrame.dropna` / :meth:`Series.dropna`
-  - :meth:`DataFrame.select_dtypes`
-  - :meth:`DataFrame.align` / :meth:`Series.align`
-  - :meth:`Series.to_frame`
-  - :meth:`DataFrame.rename` / :meth:`Series.rename`
-  - :meth:`DataFrame.add_prefix` / :meth:`Series.add_prefix`
-  - :meth:`DataFrame.add_suffix` / :meth:`Series.add_suffix`
-  - :meth:`DataFrame.drop_duplicates` / :meth:`Series.drop_duplicates`
-  - :meth:`DataFrame.droplevel` / :meth:`Series.droplevel`
-  - :meth:`DataFrame.reorder_levels` / :meth:`Series.reorder_levels`
-  - :meth:`DataFrame.between_time` / :meth:`Series.between_time`
-  - :meth:`DataFrame.filter` / :meth:`Series.filter`
-  - :meth:`DataFrame.head` / :meth:`Series.head`
-  - :meth:`DataFrame.tail` / :meth:`Series.tail`
-  - :meth:`DataFrame.isetitem`
-  - :meth:`DataFrame.pipe` / :meth:`Series.pipe`
-  - :meth:`DataFrame.pop` / :meth:`Series.pop`
-  - :meth:`DataFrame.replace` / :meth:`Series.replace`
-  - :meth:`DataFrame.shift` / :meth:`Series.shift`
-  - :meth:`DataFrame.sort_index` / :meth:`Series.sort_index`
-  - :meth:`DataFrame.sort_values` / :meth:`Series.sort_values`
-  - :meth:`DataFrame.squeeze` / :meth:`Series.squeeze`
-  - :meth:`DataFrame.swapaxes`
-  - :meth:`DataFrame.swaplevel` / :meth:`Series.swaplevel`
-  - :meth:`DataFrame.take` / :meth:`Series.take`
-  - :meth:`DataFrame.to_timestamp` / :meth:`Series.to_timestamp`
-  - :meth:`DataFrame.to_period` / :meth:`Series.to_period`
-  - :meth:`DataFrame.truncate`
-  - :meth:`DataFrame.iterrows`
-  - :meth:`DataFrame.tz_convert` / :meth:`Series.tz_localize`
-  - :meth:`DataFrame.fillna` / :meth:`Series.fillna`
-  - :meth:`DataFrame.interpolate` / :meth:`Series.interpolate`
-  - :meth:`DataFrame.ffill` / :meth:`Series.ffill`
-  - :meth:`DataFrame.bfill` / :meth:`Series.bfill`
-  - :meth:`DataFrame.where` / :meth:`Series.where`
-  - :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
-  - :meth:`DataFrame.astype` / :meth:`Series.astype`
-  - :meth:`DataFrame.convert_dtypes` / :meth:`Series.convert_dtypes`
-  - :meth:`DataFrame.join`
-  - :meth:`DataFrame.eval`
-  - :func:`concat`
-  - :func:`merge`
+methods that don't require a copy of the underlying data. Popular examples are :meth:`DataFrame.drop` for ``axis=1``
+and :meth:`DataFrame.rename`.
 
 These methods return views when Copy-on-Write is enabled, which provides a significant
 performance improvement compared to the regular execution.

From e0d6051f985994e594b07a2b93b9ca2eff43eae4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Sun, 15 Oct 2023 09:00:34 -1000
Subject: [PATCH 19/19] TST: Replace node.add_marker with applymarker (#55513)

---
 .../development/contributing_codebase.rst     |  2 +-
 pandas/tests/apply/test_frame_apply.py        |  2 +-
 pandas/tests/apply/test_frame_transform.py    |  4 +-
 pandas/tests/apply/test_series_apply.py       |  2 +-
 pandas/tests/apply/test_str.py                |  6 +-
 pandas/tests/arithmetic/test_datetime64.py    |  2 +-
 pandas/tests/arrays/string_/test_string.py    |  6 +-
 pandas/tests/base/test_conversion.py          |  2 +-
 pandas/tests/base/test_misc.py                |  4 +-
 pandas/tests/computation/test_eval.py         |  6 +-
 .../copy_view/test_core_functionalities.py    |  2 +-
 .../tests/extension/decimal/test_decimal.py   |  4 +-
 pandas/tests/extension/json/test_json.py      |  6 +-
 pandas/tests/extension/test_arrow.py          | 62 +++++++++----------
 pandas/tests/extension/test_categorical.py    |  4 +-
 pandas/tests/extension/test_masked.py         |  2 +-
 pandas/tests/extension/test_numpy.py          |  8 +--
 pandas/tests/extension/test_sparse.py         |  4 +-
 .../tests/frame/methods/test_interpolate.py   |  2 +-
 pandas/tests/frame/methods/test_nlargest.py   |  2 +-
 pandas/tests/frame/methods/test_quantile.py   | 48 ++++----------
 .../tests/frame/methods/test_sort_values.py   |  4 +-
 .../frame/methods/test_to_dict_of_blocks.py   |  2 +-
 pandas/tests/frame/test_arithmetic.py         |  2 +-
 pandas/tests/frame/test_constructors.py       |  4 +-
 pandas/tests/frame/test_reductions.py         |  6 +-
 pandas/tests/frame/test_ufunc.py              |  6 +-
 pandas/tests/generic/test_duplicate_labels.py |  2 +-
 pandas/tests/generic/test_finalize.py         | 12 ++--
 pandas/tests/groupby/methods/test_quantile.py |  2 +-
 .../groupby/methods/test_value_counts.py      | 14 ++---
 pandas/tests/groupby/test_categorical.py      |  8 +--
 pandas/tests/groupby/test_function.py         |  4 +-
 pandas/tests/groupby/test_groupby_dropna.py   |  2 +-
 .../tests/groupby/transform/test_transform.py |  4 +-
 pandas/tests/indexes/datetimes/test_ops.py    |  2 +-
 pandas/tests/indexes/test_common.py           |  4 +-
 pandas/tests/indexes/test_index_new.py        |  2 +-
 pandas/tests/indexes/test_setops.py           |  4 +-
 pandas/tests/indexing/test_loc.py             |  2 +-
 pandas/tests/io/excel/test_readers.py         | 54 ++++++++--------
 pandas/tests/io/excel/test_writers.py         |  2 +-
 pandas/tests/io/json/test_pandas.py           |  4 +-
 pandas/tests/io/json/test_readlines.py        | 16 ++---
 pandas/tests/io/parser/common/test_float.py   |  2 +-
 .../io/parser/common/test_read_errors.py      |  4 +-
 pandas/tests/io/parser/conftest.py            |  2 +-
 .../io/parser/dtypes/test_dtypes_basic.py     |  2 +-
 pandas/tests/io/parser/test_comment.py        |  4 +-
 pandas/tests/io/parser/test_compression.py    |  2 +-
 pandas/tests/io/parser/test_encoding.py       |  4 +-
 pandas/tests/io/parser/test_index_col.py      |  2 +-
 pandas/tests/io/parser/test_parse_dates.py    |  6 +-
 pandas/tests/io/parser/test_skiprows.py       |  2 +-
 pandas/tests/io/parser/test_unsupported.py    |  2 +-
 pandas/tests/io/pytables/test_round_trip.py   |  2 +-
 pandas/tests/io/test_parquet.py               |  6 +-
 pandas/tests/io/test_sql.py                   | 34 +++++-----
 .../reshape/concat/test_append_common.py      |  4 +-
 .../scalar/timestamp/test_constructors.py     |  2 +-
 pandas/tests/series/methods/test_astype.py    |  4 +-
 .../tests/series/methods/test_interpolate.py  |  2 +-
 pandas/tests/series/methods/test_map.py       |  2 +-
 pandas/tests/series/test_arithmetic.py        |  2 +-
 pandas/tests/series/test_constructors.py      |  4 +-
 pandas/tests/series/test_ufunc.py             |  2 +-
 pandas/tests/strings/test_api.py              |  2 +-
 pandas/tests/tools/test_to_datetime.py        |  2 +-
 pandas/tests/tools/test_to_numeric.py         |  2 +-
 pandas/tests/tseries/offsets/test_common.py   |  4 +-
 pandas/tests/tseries/offsets/test_offsets.py  |  4 +-
 .../test_moments_consistency_expanding.py     |  2 +-
 .../test_moments_consistency_rolling.py       |  2 +-
 pandas/util/_test_decorators.py               |  2 +-
 74 files changed, 216 insertions(+), 242 deletions(-)

diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index e0aa8be066914..e22b57dbbff17 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -540,7 +540,7 @@ xfail during the testing phase. To do so, use the ``request`` fixture:
 
     def test_xfail(request):
         mark = pytest.mark.xfail(raises=TypeError, reason="Indicate why here")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
 xfail is not to be used for tests involving failure due to invalid user arguments.
 For these tests, we need to verify the correct exception type and error message
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index be988594ebf58..232cfceb3b6d6 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -325,7 +325,7 @@ def test_apply_empty_infer_type(ax, func, raw, axis, engine, request):
             mark = pytest.mark.xfail(
                 reason="numba engine only supports raw=True at the moment"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         result = df.apply(func, axis=axis, engine=engine, raw=raw)
         if is_reduction:
diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py
index 2d57515882aed..558d76ae8fdc4 100644
--- a/pandas/tests/apply/test_frame_transform.py
+++ b/pandas/tests/apply/test_frame_transform.py
@@ -156,7 +156,7 @@ def func(x):
 def test_transform_bad_dtype(op, frame_or_series, request):
     # GH 35964
     if op == "ngroup":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
         )
 
@@ -185,7 +185,7 @@ def test_transform_failure_typeerror(request, op):
     # GH 35964
 
     if op == "ngroup":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
         )
 
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 643b9220999f7..b8026d771baed 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -321,7 +321,7 @@ def test_transform(string_series, by_row):
 def test_transform_partial_failure(op, request):
     # GH 35964
     if op in ("ffill", "bfill", "pad", "backfill", "shift"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason=f"{op} is successful on any dtype")
         )
 
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index 363d0285cabbc..c046c60e174b3 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -31,7 +31,7 @@
 @pytest.mark.parametrize("how", ["agg", "apply"])
 def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
     if len(args) > 1 and how == "agg":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 raises=TypeError,
                 reason="agg/apply signature mismatch - agg passes 2nd "
@@ -256,7 +256,7 @@ def test_agg_cython_table_transform_frame(df, func, expected, axis):
 def test_transform_groupby_kernel_series(request, string_series, op):
     # GH 35964
     if op == "ngroup":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
         )
     args = [0.0] if op == "fillna" else []
@@ -269,7 +269,7 @@ def test_transform_groupby_kernel_series(request, string_series, op):
 @pytest.mark.parametrize("op", frame_transform_kernels)
 def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
     if op == "ngroup":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
         )
 
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 693b8d9483407..67bcafd583086 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1084,7 +1084,7 @@ def test_dt64arr_addsub_intlike(
         # GH#19959, GH#19123, GH#19012
         tz = tz_naive_fixture
         if box_with_array is pd.DataFrame:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(raises=ValueError, reason="Axis alignment fails")
             )
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 89cc31ec5ecc8..451136225a612 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -151,7 +151,7 @@ def test_add_2d(dtype, request, arrow_string_storage):
     if dtype.storage in arrow_string_storage:
         reason = "Failed: DID NOT RAISE <class 'ValueError'>"
         mark = pytest.mark.xfail(raises=None, reason=reason)
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     a = pd.array(["a", "b", "c"], dtype=dtype)
     b = np.array([["a", "b", "c"]], dtype=object)
@@ -180,7 +180,7 @@ def test_mul(dtype, request, arrow_string_storage):
     if dtype.storage in arrow_string_storage:
         reason = "unsupported operand type(s) for *: 'ArrowStringArray' and 'int'"
         mark = pytest.mark.xfail(raises=NotImplementedError, reason=reason)
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     a = pd.array(["a", "b", None], dtype=dtype)
     result = a * 2
@@ -446,7 +446,7 @@ def test_min_max_numpy(method, box, dtype, request, arrow_string_storage):
         else:
             reason = "'ArrowStringArray' object has no attribute 'max'"
         mark = pytest.mark.xfail(raises=TypeError, reason=reason)
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     arr = box(["a", "b", "c", None], dtype=dtype)
     result = getattr(np, method)(arr)
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index 3e0b0dbeb5624..c0f65c23c6d35 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -337,7 +337,7 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
 
     if arr.dtype.name == "int64" and box is pd.array:
         mark = pytest.mark.xfail(reason="thing is Int64 and to_numpy() returns object")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     result = thing.to_numpy()
     tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
index 3ca53c4010449..c6fd4955d2d63 100644
--- a/pandas/tests/base/test_misc.py
+++ b/pandas/tests/base/test_misc.py
@@ -141,7 +141,7 @@ def test_searchsorted(request, index_or_series_obj):
 
     if isinstance(obj, pd.MultiIndex):
         # See gh-14833
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
             )
@@ -150,7 +150,7 @@ def test_searchsorted(request, index_or_series_obj):
         # TODO: Should Series cases also raise? Looks like they use numpy
         #  comparison semantics https://github.com/numpy/numpy/issues/15981
         mark = pytest.mark.xfail(reason="complex objects are not comparable")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     max_obj = max(obj, default=0)
     index = np.searchsorted(obj, max_obj)
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 9c630e29ea8e6..f336ef34cae0a 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -194,7 +194,7 @@ def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser):
                 reason="Looks like expected is negative, unclear whether "
                 "expected is incorrect or result is incorrect"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         skip_these = ["in", "not in"]
         ex = f"~(lhs {op} rhs)"
 
@@ -860,7 +860,7 @@ def test_basic_series_frame_alignment(
                 f"parser={parser}, index_name={index_name}, "
                 f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}"
             )
-            request.node.add_marker(pytest.mark.xfail(reason=reason, strict=False))
+            request.applymarker(pytest.mark.xfail(reason=reason, strict=False))
         df = tm.makeCustomDataframe(
             10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
         )
@@ -1883,7 +1883,7 @@ def test_negate_lt_eq_le(engine, parser):
 def test_eval_no_support_column_name(request, column):
     # GH 44603
     if column in ["True", "False", "inf", "Inf"]:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 raises=KeyError,
                 reason=f"GH 47859 DataFrame eval not supported with {column}",
diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py
index 5c177465d2fa4..bfdf2b92fd326 100644
--- a/pandas/tests/copy_view/test_core_functionalities.py
+++ b/pandas/tests/copy_view/test_core_functionalities.py
@@ -57,7 +57,7 @@ def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, reques
         mark = pytest.mark.xfail(
             reason="blk.delete does not track references correctly"
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
         assert np.shares_memory(arr, get_array(df, "a"))
 
 
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 8dbd1c4c511d0..5ccffd1d25b3d 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -85,14 +85,14 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
     def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
         if all_numeric_reductions in ["kurt", "skew", "sem", "median"]:
             mark = pytest.mark.xfail(raises=NotImplementedError)
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
 
     def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
         op_name = all_numeric_reductions
         if op_name in ["skew", "median"]:
             mark = pytest.mark.xfail(raises=NotImplementedError)
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         return super().test_reduce_frame(data, all_numeric_reductions, skipna)
 
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 68a27a28b160f..71133030a5c18 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -237,7 +237,7 @@ def test_equals_same_data_different_object(
     ):
         if using_copy_on_write:
             mark = pytest.mark.xfail(reason="Fails with CoW")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_equals_same_data_different_object(data)
 
 
@@ -300,7 +300,7 @@ class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
         if len(data[0]) != 1:
             mark = pytest.mark.xfail(reason="raises in coercing to Series")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
 
 
@@ -308,7 +308,7 @@ class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests):
     def test_compare_array(self, data, comparison_op, request):
         if comparison_op.__name__ in ["eq", "ne"]:
             mark = pytest.mark.xfail(reason="Comparison methods not implemented")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_compare_array(data, comparison_op)
 
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 41312f45838a9..86aef2642750e 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -78,7 +78,7 @@ def _require_timezone_database(request):
                 "on CI to path to the tzdata for pyarrow."
             ),
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
 
 @pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
@@ -271,7 +271,7 @@ class TestArrowArray(base.ExtensionTests):
     def test_astype_str(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_binary(pa_dtype):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"For {pa_dtype} .astype(str) decodes.",
                 )
@@ -286,7 +286,7 @@ def test_from_dtype(self, data, request):
             else:
                 reason = f"pyarrow.type_for_alias cannot infer {pa_dtype}"
 
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=reason,
                 )
@@ -313,7 +313,7 @@ def test_from_sequence_pa_array_notimplemented(self, request):
     def test_from_sequence_of_strings_pa_array(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_time64(pa_dtype) and pa_dtype.equals("time64[ns]") and not PY311:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Nanosecond time parsing not supported.",
                 )
@@ -321,7 +321,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
         elif pa_version_under11p0 and (
             pa.types.is_duration(pa_dtype) or pa.types.is_decimal(pa_dtype)
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=pa.ArrowNotImplementedError,
                     reason=f"pyarrow doesn't support parsing {pa_dtype}",
@@ -402,12 +402,12 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
             mark = pytest.mark.xfail(
                 reason=f"{all_numeric_accumulations} not implemented for pyarrow < 9"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         elif all_numeric_accumulations == "cumsum" and (
             pa.types.is_boolean(pa_type) or pa.types.is_decimal(pa_type)
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"{all_numeric_accumulations} not implemented for {pa_type}",
                     raises=NotImplementedError,
@@ -496,19 +496,19 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque
         if all_numeric_reductions in {"skew", "kurt"} and (
             dtype._is_numeric or dtype.kind == "b"
         ):
-            request.node.add_marker(xfail_mark)
+            request.applymarker(xfail_mark)
         elif (
             all_numeric_reductions in {"var", "std", "median"}
             and pa_version_under7p0
             and pa.types.is_decimal(pa_dtype)
         ):
-            request.node.add_marker(xfail_mark)
+            request.applymarker(xfail_mark)
         elif (
             all_numeric_reductions == "sem"
             and pa_version_under8p0
             and (dtype._is_numeric or pa.types.is_temporal(pa_dtype))
         ):
-            request.node.add_marker(xfail_mark)
+            request.applymarker(xfail_mark)
 
         elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in {
             "sem",
@@ -516,7 +516,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque
             "var",
             "median",
         }:
-            request.node.add_marker(xfail_mark)
+            request.applymarker(xfail_mark)
         super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
 
     @pytest.mark.parametrize("skipna", [True, False])
@@ -532,7 +532,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna, reque
         if pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype):
             # We *might* want to make this behave like the non-pyarrow cases,
             #  but have not yet decided.
-            request.node.add_marker(xfail_mark)
+            request.applymarker(xfail_mark)
 
         return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)
 
@@ -560,7 +560,7 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
         if op_name == "skew":
             if data.dtype._is_numeric:
                 mark = pytest.mark.xfail(reason="skew not implemented")
-                request.node.add_marker(mark)
+                request.applymarker(mark)
         return super().test_reduce_frame(data, all_numeric_reductions, skipna)
 
     @pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
@@ -592,7 +592,7 @@ def test_in_numeric_groupby(self, data_for_grouping):
     def test_construct_from_string_own_name(self, dtype, request):
         pa_dtype = dtype.pyarrow_dtype
         if pa.types.is_decimal(pa_dtype):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=NotImplementedError,
                     reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
@@ -616,7 +616,7 @@ def test_is_dtype_from_name(self, dtype, request):
             assert not type(dtype).is_dtype(dtype.name)
         else:
             if pa.types.is_decimal(pa_dtype):
-                request.node.add_marker(
+                request.applymarker(
                     pytest.mark.xfail(
                         raises=NotImplementedError,
                         reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
@@ -638,7 +638,7 @@ def test_get_common_dtype(self, dtype, request):
             or pa.types.is_binary(pa_dtype)
             or pa.types.is_decimal(pa_dtype)
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=(
                         f"{pa_dtype} does not have associated numpy "
@@ -690,21 +690,21 @@ def test_setitem_preserves_views(self, data):
     def test_EA_types(self, engine, data, dtype_backend, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_decimal(pa_dtype):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=NotImplementedError,
                     reason=f"Parameterized types {pa_dtype} not supported.",
                 )
             )
         elif pa.types.is_timestamp(pa_dtype) and pa_dtype.unit in ("us", "ns"):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=ValueError,
                     reason="https://github.com/pandas-dev/pandas/issues/49767",
                 )
             )
         elif pa.types.is_binary(pa_dtype):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
             )
         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
@@ -725,7 +725,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
     def test_invert(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if not (pa.types.is_boolean(pa_dtype) or pa.types.is_integer(pa_dtype)):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=pa.ArrowNotImplementedError,
                     reason=f"pyarrow.compute.invert does support {pa_dtype}",
@@ -737,7 +737,7 @@ def test_invert(self, data, request):
     def test_diff(self, data, periods, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_unsigned_integer(pa_dtype) and periods == 1:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=pa.ArrowInvalid,
                     reason=(
@@ -756,7 +756,7 @@ def test_value_counts_returns_pyarrow_int64(self, data):
     def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, request):
         pa_dtype = data_for_sorting.dtype.pyarrow_dtype
         if pa.types.is_decimal(pa_dtype) and pa_version_under7p0:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"No pyarrow kernel for {pa_dtype}",
                     raises=pa.ArrowNotImplementedError,
@@ -782,7 +782,7 @@ def test_argreduce_series(
     ):
         pa_dtype = data_missing_for_sorting.dtype.pyarrow_dtype
         if pa.types.is_decimal(pa_dtype) and pa_version_under7p0 and skipna:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"No pyarrow kernel for {pa_dtype}",
                     raises=pa.ArrowNotImplementedError,
@@ -1036,7 +1036,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         super().test_arith_series_with_scalar(data, all_arithmetic_operators)
 
@@ -1050,7 +1050,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
 
@@ -1066,7 +1066,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
             and pa.types.is_unsigned_integer(pa_dtype)
             and not pa_version_under7p0
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=pa.ArrowInvalid,
                     reason=(
@@ -1078,7 +1078,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         op_name = all_arithmetic_operators
         ser = pd.Series(data)
@@ -1092,7 +1092,7 @@ def test_add_series_with_extension_array(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype
 
         if pa_dtype.equals("int8"):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=pa.ArrowInvalid,
                     reason=f"raises on overflow for {pa_dtype}",
@@ -1368,7 +1368,7 @@ def test_quantile(data, interpolation, quantile, request):
     elif pa.types.is_temporal(data._pa_array.type):
         pass
     else:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 raises=pa.ArrowNotImplementedError,
                 reason=f"quantile not supported by pyarrow for {pa_dtype}",
@@ -2830,7 +2830,7 @@ def test_infer_dtype_pyarrow_dtype(data, request):
             reason="in infer_dtype pd.NA is not ignored in these cases "
             "even with skipna=True in the list(data) check below"
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     assert res == lib.infer_dtype(list(data), skipna=True)
 
@@ -2881,7 +2881,7 @@ def test_arithmetic_temporal(pa_type, request):
             raises=pa.ArrowNotImplementedError,
             reason="Function 'subtract_checked' has no kernel matching input types",
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type))
     unit = pa_type.unit
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 82b6c54bc3106..5cde5df4bc007 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -148,7 +148,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
         # frame & scalar
         op_name = all_arithmetic_operators
         if op_name == "__rmod__":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="rmod never called when string is first argument"
                 )
@@ -158,7 +158,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
         op_name = all_arithmetic_operators
         if op_name == "__rmod__":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="rmod never called when string is first argument"
                 )
diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
index d27e9b8b9e983..bd12bcced1448 100644
--- a/pandas/tests/extension/test_masked.py
+++ b/pandas/tests/extension/test_masked.py
@@ -234,7 +234,7 @@ def test_divmod_series_array(self, data, data_for_twos, request):
                 "floordiv but not for divmod. This matches what we do for "
                 "non-masked bool dtype."
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_divmod_series_array(data, data_for_twos)
 
     def test_combine_le(self, data_repeated):
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 542e938d1a40a..04b25fd0566da 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -169,7 +169,7 @@ def skip_numpy_object(dtype, request):
     """
     if dtype == "object":
         mark = pytest.mark.xfail(reason="Fails for object dtype")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
 
 skip_nested = pytest.mark.usefixtures("skip_numpy_object")
@@ -198,7 +198,7 @@ def test_series_constructor_scalar_with_index(self, data, dtype):
 class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
     def test_check_dtype(self, data, request):
         if data.dtype.numpy_dtype == "object":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"NumpyExtensionArray expectedly clashes with a "
                     f"NumPy name: {data.dtype.numpy_dtype}"
@@ -261,7 +261,7 @@ def test_diff(self, data, periods):
     def test_insert(self, data, request):
         if data.dtype.numpy_dtype == object:
             mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         super().test_insert(data)
 
@@ -289,7 +289,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
         opname = all_arithmetic_operators
         if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
             mark = pytest.mark.xfail(reason="Fails for object dtype")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_arith_series_with_array(data, all_arithmetic_operators)
 
     @skip_nested
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index f56dea3f43de7..003f50e8e23a2 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -214,7 +214,7 @@ def test_fillna_limit_backfill(self, data_missing):
 
     def test_fillna_no_op_returns_copy(self, data, request):
         if np.isnan(data.fill_value):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="returns array with different fill value")
             )
         super().test_fillna_no_op_returns_copy(data)
@@ -392,7 +392,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
             "rmod",
         ]:
             mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
 
 
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 67aa07dd83764..bb12d7e202e09 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -54,7 +54,7 @@ def test_interpolate_inplace(self, frame_or_series, using_array_manager, request
         # GH#44749
         if using_array_manager and frame_or_series is DataFrame:
             mark = pytest.mark.xfail(reason=".values-based in-place check is invalid")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         obj = frame_or_series([1, np.nan, 2])
         orig = obj.values
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 0bdf9a0e5c007..1196f8cd3886a 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -169,7 +169,7 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
         if Version(np.__version__) >= Version("1.25") and (
             (order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=(
                         "pandas default unstable sorting of duplicates"
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
index 61b253b24a7ec..4bfe364e5eafc 100644
--- a/pandas/tests/frame/methods/test_quantile.py
+++ b/pandas/tests/frame/methods/test_quantile.py
@@ -63,7 +63,7 @@ def test_quantile(
             tm.assert_series_equal(result, expected)
         else:
             tm.assert_index_equal(result.index, expected.index)
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     using_array_manager, reason="Name set incorrectly for arraymanager"
                 )
@@ -83,7 +83,7 @@ def test_quantile(
             tm.assert_series_equal(result, expected)
         else:
             tm.assert_index_equal(result.index, expected.index)
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     using_array_manager, reason="Name set incorrectly for arraymanager"
                 )
@@ -107,9 +107,7 @@ def test_non_numeric_exclusion(self, interp_method, request, using_array_manager
         if interpolation == "nearest":
             xp = (xp + 0.5).astype(np.int64)
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         tm.assert_series_equal(rs, xp)
 
     def test_axis(self, interp_method, request, using_array_manager):
@@ -121,9 +119,7 @@ def test_axis(self, interp_method, request, using_array_manager):
         if interpolation == "nearest":
             expected = expected.astype(np.int64)
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         tm.assert_series_equal(result, expected)
 
         result = df.quantile(
@@ -151,9 +147,7 @@ def test_axis_numeric_only_true(self, interp_method, request, using_array_manage
         if interpolation == "nearest":
             expected = expected.astype(np.int64)
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         tm.assert_series_equal(result, expected)
 
     def test_quantile_date_range(self, interp_method, request, using_array_manager):
@@ -170,9 +164,7 @@ def test_quantile_date_range(self, interp_method, request, using_array_manager):
             ["2016-01-02 00:00:00"], name=0.5, dtype="datetime64[ns, US/Pacific]"
         )
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
 
         tm.assert_series_equal(result, expected)
 
@@ -194,9 +186,7 @@ def test_quantile_axis_mixed(self, interp_method, request, using_array_manager):
         if interpolation == "nearest":
             expected -= 0.5
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         tm.assert_series_equal(result, expected)
 
         # must raise
@@ -208,9 +198,7 @@ def test_quantile_axis_parameter(self, interp_method, request, using_array_manag
         # GH 9543/9544
         interpolation, method = interp_method
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
 
         result = df.quantile(0.5, axis=0, interpolation=interpolation, method=method)
@@ -336,9 +324,7 @@ def test_quantile_multi(self, interp_method, request, using_array_manager):
         if interpolation == "nearest":
             expected = expected.astype(np.int64)
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         tm.assert_frame_equal(result, expected)
 
     def test_quantile_multi_axis_1(self, interp_method, request, using_array_manager):
@@ -353,9 +339,7 @@ def test_quantile_multi_axis_1(self, interp_method, request, using_array_manager
         if interpolation == "nearest":
             expected = expected.astype(np.int64)
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         tm.assert_frame_equal(result, expected)
 
     def test_quantile_multi_empty(self, interp_method):
@@ -458,9 +442,7 @@ def test_quantile_invalid(self, invalid, datetime_frame, interp_method):
     def test_quantile_box(self, interp_method, request, using_array_manager):
         interpolation, method = interp_method
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         df = DataFrame(
             {
                 "A": [
@@ -591,9 +573,7 @@ def test_quantile_box_nat(self):
     def test_quantile_nan(self, interp_method, request, using_array_manager):
         interpolation, method = interp_method
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         # GH 14357 - float block where some cols have missing values
         df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)})
         df.iloc[-1, 1] = np.nan
@@ -640,9 +620,7 @@ def test_quantile_nan(self, interp_method, request, using_array_manager):
     def test_quantile_nat(self, interp_method, request, using_array_manager):
         interpolation, method = interp_method
         if method == "table" and using_array_manager:
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Axis name incorrectly set.")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Axis name incorrectly set."))
         # full NaT column
         df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]})
 
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index bd7d882f6d94a..f2f02058a534e 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -863,7 +863,7 @@ def test_sort_index_level_and_column_label(
             Version(np.__version__) >= Version("1.25")
             and request.node.callspec.id == "df_idx0-inner-True"
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=(
                         "pandas default unstable sorting of duplicates"
@@ -907,7 +907,7 @@ def test_sort_column_level_and_index_label(
         result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
 
         if Version(np.__version__) >= Version("1.25"):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=(
                         "pandas default unstable sorting of duplicates"
diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
index 906e74230a762..9d90111be6075 100644
--- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py
+++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
@@ -52,7 +52,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write):
 
 def test_to_dict_of_blocks_item_cache(request, using_copy_on_write):
     if using_copy_on_write:
-        request.node.add_marker(pytest.mark.xfail(reason="CoW - not yet implemented"))
+        request.applymarker(pytest.mark.xfail(reason="CoW - not yet implemented"))
     # Calling to_dict_of_blocks should not poison item_cache
     df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})
     df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object))
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 09a5cda4b3458..1d1a4dbe83a9c 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -508,7 +508,7 @@ def test_floordiv_axis0_numexpr_path(self, opname, request):
             and opname == "pow"
             and "python" in request.node.callspec.id
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="https://github.com/pydata/numexpr/issues/454")
             )
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 3d8053703e906..4307cfc1e1d4e 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -3171,7 +3171,7 @@ def test_from_out_of_bounds_ns_datetime(
                 "non-nano, but DatetimeArray._from_sequence has not",
                 strict=True,
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         scalar = datetime(9999, 1, 1)
         exp_dtype = "M8[us]"  # pydatetime objects default to this reso
@@ -3207,7 +3207,7 @@ def test_from_out_of_bounds_ns_timedelta(
                 "to non-nano, but TimedeltaArray._from_sequence has not",
                 strict=True,
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1)
         exp_dtype = "m8[us]"  # smallest reso that fits
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index a17dc4a789fe3..b42f2148f90d5 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -729,7 +729,7 @@ def test_std_datetime64_with_nat(
             mark = pytest.mark.xfail(
                 reason="GH#51446: Incorrect type inference on NaT in reduction result"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         df = DataFrame({"a": to_datetime(values)})
         result = df.std(skipna=skipna)
         if not skipna or all(value is pd.NaT for value in values):
@@ -1594,7 +1594,7 @@ def test_reductions_skipna_none_raises(
         self, request, frame_or_series, all_reductions
     ):
         if all_reductions == "count":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="Count does not accept skipna")
             )
         obj = frame_or_series([1, 2, 3])
@@ -1822,7 +1822,7 @@ def test_sum_timedelta64_skipna_false(using_array_manager, request):
         mark = pytest.mark.xfail(
             reason="Incorrect type inference on NaT in reduction result"
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2)
     arr[-1, -1] = "Nat"
diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py
index 305c0f8bba8ce..88c62da2b0a73 100644
--- a/pandas/tests/frame/test_ufunc.py
+++ b/pandas/tests/frame/test_ufunc.py
@@ -31,7 +31,7 @@ def test_unary_unary(dtype):
 def test_unary_binary(request, dtype):
     # unary input, binary output
     if is_extension_array_dtype(dtype) or isinstance(dtype, dict):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="Extension / mixed with multiple outputs not implemented."
             )
@@ -106,7 +106,7 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b):
         or is_extension_array_dtype(dtype_b)
         or isinstance(dtype_b, dict)
     ):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="Extension / mixed with multiple inputs not implemented."
             )
@@ -135,7 +135,7 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b):
 @pytest.mark.parametrize("dtype", dtypes)
 def test_binary_input_aligns_index(request, dtype):
     if is_extension_array_dtype(dtype) or isinstance(dtype, dict):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="Extension / mixed with multiple inputs not implemented."
             )
diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py
index a81e013290b64..cb21ac6b83ee9 100644
--- a/pandas/tests/generic/test_duplicate_labels.py
+++ b/pandas/tests/generic/test_duplicate_labels.py
@@ -92,7 +92,7 @@ def test_preserve_getitem(self):
 
     def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write):
         if not using_copy_on_write:
-            request.node.add_marker(pytest.mark.xfail(reason="Unclear behavior."))
+            request.applymarker(pytest.mark.xfail(reason="Unclear behavior."))
         # NDFrame.__getitem__ will cache the first df['A']. May need to
         # invalidate that cache? Update the cached entries?
         df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py
index 0f7ae998a4b2b..68746b9e9a803 100644
--- a/pandas/tests/generic/test_finalize.py
+++ b/pandas/tests/generic/test_finalize.py
@@ -490,7 +490,7 @@ def test_binops(request, args, annotate, all_binary_operators):
     if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both":
         if not all_binary_operators.__name__.startswith("r"):
             if annotate == "right" and isinstance(left, type(right)):
-                request.node.add_marker(
+                request.applymarker(
                     pytest.mark.xfail(
                         reason=f"{all_binary_operators} doesn't work when right has "
                         f"attrs and both are {type(left)}"
@@ -498,14 +498,14 @@ def test_binops(request, args, annotate, all_binary_operators):
                 )
             if not isinstance(left, type(right)):
                 if annotate == "left" and isinstance(left, pd.Series):
-                    request.node.add_marker(
+                    request.applymarker(
                         pytest.mark.xfail(
                             reason=f"{all_binary_operators} doesn't work when the "
                             "objects are different Series has attrs"
                         )
                     )
                 elif annotate == "right" and isinstance(right, pd.Series):
-                    request.node.add_marker(
+                    request.applymarker(
                         pytest.mark.xfail(
                             reason=f"{all_binary_operators} doesn't work when the "
                             "objects are different Series has attrs"
@@ -513,7 +513,7 @@ def test_binops(request, args, annotate, all_binary_operators):
                     )
         else:
             if annotate == "left" and isinstance(left, type(right)):
-                request.node.add_marker(
+                request.applymarker(
                     pytest.mark.xfail(
                         reason=f"{all_binary_operators} doesn't work when left has "
                         f"attrs and both are {type(left)}"
@@ -521,14 +521,14 @@ def test_binops(request, args, annotate, all_binary_operators):
                 )
             if not isinstance(left, type(right)):
                 if annotate == "right" and isinstance(right, pd.Series):
-                    request.node.add_marker(
+                    request.applymarker(
                         pytest.mark.xfail(
                             reason=f"{all_binary_operators} doesn't work when the "
                             "objects are different Series has attrs"
                         )
                     )
                 elif annotate == "left" and isinstance(left, pd.Series):
-                    request.node.add_marker(
+                    request.applymarker(
                         pytest.mark.xfail(
                             reason=f"{all_binary_operators} doesn't work when the "
                             "objects are different Series has attrs"
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 4e7c09b70feb0..fcb9701e9881b 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -45,7 +45,7 @@ def test_quantile(interpolation, a_vals, b_vals, q, request):
         and isinstance(b_vals, list)
         and b_vals == [4, 3, 2, 1]
     ):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="Unclear numpy expectation for nearest "
                 "result with equidistant data"
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index 45a33d3b70f71..c1ee107715b71 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -249,7 +249,7 @@ def test_bad_subset(education_df):
 def test_basic(education_df, request):
     # gh43564
     if Version(np.__version__) >= Version("1.25"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
@@ -306,7 +306,7 @@ def test_against_frame_and_seriesgroupby(
     #   - apply with :meth:`~DataFrame.value_counts`
     #   - `~SeriesGroupBy.value_counts`
     if Version(np.__version__) >= Version("1.25") and frame and sort and normalize:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
@@ -482,7 +482,7 @@ def test_dropna_combinations(
     nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
 ):
     if Version(np.__version__) >= Version("1.25") and not group_dropna:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
@@ -586,7 +586,7 @@ def test_categorical_single_grouper_with_only_observed_categories(
     # Test single categorical grouper with only observed grouping categories
     # when non-groupers are also categorical
     if Version(np.__version__) >= Version("1.25"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
@@ -695,7 +695,7 @@ def test_categorical_single_grouper_observed_true(
     # GH#46357
 
     if Version(np.__version__) >= Version("1.25"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
@@ -776,7 +776,7 @@ def test_categorical_single_grouper_observed_false(
     # GH#46357
 
     if Version(np.__version__) >= Version("1.25"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
@@ -929,7 +929,7 @@ def test_categorical_non_groupers(
     # regardless of `observed`
 
     if Version(np.__version__) >= Version("1.25"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "pandas default unstable sorting of duplicates"
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 11291bb89b604..939dd176ae90e 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1443,7 +1443,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
         mark = pytest.mark.xfail(
             reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293"
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     df = DataFrame(
         {
@@ -1912,7 +1912,7 @@ def test_category_order_reducer(
     # GH#48749
     if reduction_func == "corrwith" and not as_index:
         msg = "GH#49950 - corrwith with as_index=False may not have grouping column"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
+        request.applymarker(pytest.mark.xfail(reason=msg))
     elif index_kind != "range" and not as_index:
         pytest.skip(reason="Result doesn't have categories, nothing to test")
     df = DataFrame(
@@ -2123,7 +2123,7 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
         pytest.skip("corrwith not implemented for SeriesGroupBy")
     elif reduction_func == "corrwith":
         msg = "GH#32293: attempts to call SeriesGroupBy.corrwith"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
+        request.applymarker(pytest.mark.xfail(reason=msg))
     elif (
         reduction_func == "nunique"
         and not test_series
@@ -2132,7 +2132,7 @@ def test_agg_list(request, as_index, observed, reduction_func, test_series, keys
         and not as_index
     ):
         msg = "GH#52848 - raises a ValueError"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
+        request.applymarker(pytest.mark.xfail(reason=msg))
 
     df = DataFrame({"a1": [0, 0, 1], "a2": [2, 3, 3], "b": [4, 5, 6]})
     df = df.astype({"a1": "category", "a2": "category"})
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 4876267c72f12..b840443aab347 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -418,7 +418,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only):
         pytest.skip("idxmax and idx_min tested in test_idxmin_idxmax_axis1")
     if groupby_func in ("corrwith", "skew"):
         msg = "GH#47723 groupby.corrwith and skew do not correctly implement axis=1"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
+        request.applymarker(pytest.mark.xfail(reason=msg))
 
     df = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]
@@ -822,7 +822,7 @@ def test_duplicate_columns(request, groupby_func, as_index):
     # GH#50806
     if groupby_func == "corrwith":
         msg = "GH#50845 - corrwith fails when there are duplicate columns"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
+        request.applymarker(pytest.mark.xfail(reason=msg))
     df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
     args = get_groupby_method_args(groupby_func, df)
     gb = df.groupby("a", as_index=as_index)
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 8065aa63dff81..ab3920d18374b 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -591,7 +591,7 @@ def test_categorical_transformers(
     # GH#36327
     if transformation_func == "fillna":
         msg = "GH#49651 fillna may incorrectly reorders results when dropna=False"
-        request.node.add_marker(pytest.mark.xfail(reason=msg, strict=False))
+        request.applymarker(pytest.mark.xfail(reason=msg, strict=False))
 
     values = np.append(np.random.default_rng(2).choice([1, 2, None], size=19), None)
     df = pd.DataFrame(
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 4a493ef3fd52c..add3c94dcd36a 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -203,7 +203,7 @@ def test_transform_axis_1_reducer(request, reduction_func):
         "nth",
     ):
         marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986")
-        request.node.add_marker(marker)
+        request.applymarker(marker)
 
     df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"])
     msg = "DataFrame.groupby with axis=1 is deprecated"
@@ -1455,7 +1455,7 @@ def test_null_group_str_reducer(request, dropna, reduction_func):
     # GH 17093
     if reduction_func == "corrwith":
         msg = "incorrectly raises"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
+        request.applymarker(pytest.mark.xfail(reason=msg))
 
     index = [1, 2, 3, 4]  # test transform preserves non-standard index
     df = DataFrame({"A": [1, 1, np.nan, np.nan], "B": [1, 2, 2, 3]}, index=index)
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index 7eea05c753b8a..c58d55ad6371b 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -34,7 +34,7 @@ class TestDatetimeIndexOps:
     def test_resolution(self, request, tz_naive_fixture, freq, expected):
         tz = tz_naive_fixture
         if freq == "Y" and not IS64 and isinstance(tz, tzlocal):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
             )
 
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index 6245a129afedc..412a59d15307d 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -258,7 +258,7 @@ def test_searchsorted_monotonic(self, index_flat, request):
                 reason="IntervalIndex.searchsorted does not support Interval arg",
                 raises=NotImplementedError,
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         # nothing to test if the index is empty
         if index.empty:
@@ -459,7 +459,7 @@ def test_sort_values_with_missing(index_with_missing, na_position, request):
     # sort non-missing and place missing according to na_position
 
     if isinstance(index_with_missing, CategoricalIndex):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="missing value sorting order not well-defined", strict=False
             )
diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py
index d35c35661051a..b6de73266dc34 100644
--- a/pandas/tests/indexes/test_index_new.py
+++ b/pandas/tests/indexes/test_index_new.py
@@ -146,7 +146,7 @@ def test_constructor_infer_nat_dt_like(
         if nulls_fixture is NA:
             expected = Index([NA, NaT])
             mark = pytest.mark.xfail(reason="Broken with np.NaT ctor; see GH 31884")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
             # GH#35942 numpy will emit a DeprecationWarning within the
             #  assert_index_equal calls.  Since we can't do anything
             #  about it until GH#31884 is fixed, we suppress that warning.
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index d6304774b87c4..1dfeb4f2c6b5b 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -64,7 +64,7 @@ def test_union_different_types(index_flat, index_flat2, request):
         mark = pytest.mark.xfail(
             reason="GH#44000 True==1", raises=ValueError, strict=False
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     common_dtype = find_common_type([idx1.dtype, idx2.dtype])
 
@@ -89,7 +89,7 @@ def test_union_different_types(index_flat, index_flat2, request):
             raises=AssertionError,
             strict=False,
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     any_uint64 = np.uint64 in (idx1.dtype, idx2.dtype)
     idx1_signed = is_signed_integer_dtype(idx1.dtype)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 70eada188f3c8..6836e9a7c390e 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1124,7 +1124,7 @@ def test_loc_copy_vs_view(self, request, using_copy_on_write):
 
         if not using_copy_on_write:
             mark = pytest.mark.xfail(reason="accidental fix reverted - GH37497")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         x = DataFrame(zip(range(3), range(3)), columns=["a", "b"])
 
         y = x.copy()
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 8dd9f96a05a90..c5bf935b0d54d 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -197,7 +197,7 @@ def test_usecols_int(self, read_ext):
 
     def test_usecols_list(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -221,7 +221,7 @@ def test_usecols_list(self, request, engine, read_ext, df_ref):
 
     def test_usecols_str(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -279,7 +279,7 @@ def test_usecols_diff_positional_int_columns_order(
         self, request, engine, read_ext, usecols, df_ref
     ):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -301,7 +301,7 @@ def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_r
 
     def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -313,7 +313,7 @@ def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
 
     def test_usecols_excel_range_str(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -401,7 +401,7 @@ def test_excel_stop_iterator(self, read_ext):
 
     def test_excel_cell_error_na(self, request, engine, read_ext):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -409,7 +409,7 @@ def test_excel_cell_error_na(self, request, engine, read_ext):
 
         # https://github.com/tafia/calamine/issues/355
         if engine == "calamine" and read_ext == ".ods":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="Calamine can't extract error from ods files")
             )
 
@@ -419,7 +419,7 @@ def test_excel_cell_error_na(self, request, engine, read_ext):
 
     def test_excel_table(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -440,7 +440,7 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
 
     def test_reader_special_dtypes(self, request, engine, read_ext):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -778,7 +778,7 @@ def test_exception_message_includes_sheet_name(self, read_ext):
     def test_date_conversion_overflow(self, request, engine, read_ext):
         # GH 10001 : pandas.ExcelFile ignore parse_dates=False
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -794,13 +794,13 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
         )
 
         if engine == "openpyxl":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="Maybe not supported by openpyxl")
             )
 
         if engine is None and read_ext in (".xlsx", ".xlsm"):
             # GH 35029
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="Defaults to openpyxl, maybe not supported")
             )
 
@@ -809,7 +809,7 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
 
     def test_sheet_name(self, request, read_ext, engine, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -975,7 +975,7 @@ def test_close_from_py_localpath(self, read_ext):
 
     def test_reader_seconds(self, request, engine, read_ext):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -983,7 +983,7 @@ def test_reader_seconds(self, request, engine, read_ext):
 
         # GH 55045
         if engine == "calamine" and read_ext == ".ods":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="ODS file contains bad datetime (seconds as text)"
                 )
@@ -1017,7 +1017,7 @@ def test_reader_seconds(self, request, engine, read_ext):
     def test_read_excel_multiindex(self, request, engine, read_ext):
         # see gh-4679
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -1025,9 +1025,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
 
         # https://github.com/tafia/calamine/issues/354
         if engine == "calamine" and read_ext == ".ods":
-            request.node.add_marker(
-                pytest.mark.xfail(reason="Last test fails in calamine")
-            )
+            request.applymarker(pytest.mark.xfail(reason="Last test fails in calamine"))
 
         mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
         mi_file = "testmultiindex" + read_ext
@@ -1118,7 +1116,7 @@ def test_read_excel_multiindex_blank_after_name(
     ):
         # GH34673
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb (GH4679"
                 )
@@ -1241,7 +1239,7 @@ def test_read_excel_bool_header_arg(self, read_ext):
     def test_read_excel_skiprows(self, request, engine, read_ext):
         # GH 4903
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -1296,7 +1294,7 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
     def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
         # GH 4903
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -1426,7 +1424,7 @@ def test_ignore_chartsheets_by_str(self, request, engine, read_ext):
         if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="pyxlsb can't distinguish chartsheets from worksheets"
                 )
@@ -1439,7 +1437,7 @@ def test_ignore_chartsheets_by_int(self, request, engine, read_ext):
         if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="pyxlsb can't distinguish chartsheets from worksheets"
                 )
@@ -1568,7 +1566,7 @@ def test_excel_passes_na_filter(self, read_ext, na_filter):
 
     def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -1597,7 +1595,7 @@ def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
 
     def test_sheet_name(self, request, engine, read_ext, df_ref):
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -1689,7 +1687,7 @@ def test_header_with_index_col(self, filename):
     def test_read_datetime_multiindex(self, request, engine, read_ext):
         # GH 34748
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
@@ -1720,7 +1718,7 @@ def test_ignore_chartsheets(self, request, engine, read_ext):
         if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="pyxlsb can't distinguish chartsheets from worksheets"
                 )
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 18af18ade85f4..946c621ae2b6e 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -217,7 +217,7 @@ def test_excel_multindex_roundtrip(
                     reason="Column index name cannot be serialized unless "
                     "it's a MultiIndex"
                 )
-                request.node.add_marker(mark)
+                request.applymarker(mark)
 
             # Empty name case current read in as
             # unnamed levels, not Nones.
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 2767078674632..7312facc44c26 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -242,7 +242,7 @@ def test_roundtrip_categorical(
     ):
         # TODO: create a better frame to test with and improve coverage
         if orient in ("index", "columns"):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"Can't have duplicate index values for orient '{orient}')"
                 )
@@ -1893,7 +1893,7 @@ def test_json_pandas_nulls(self, nulls_fixture, request):
         # GH 31615
         if isinstance(nulls_fixture, Decimal):
             mark = pytest.mark.xfail(reason="not implemented")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         result = DataFrame([[nulls_fixture]]).to_json()
         assert result == '{"0":{"0":null}}'
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index d7baba87bba31..124d6890886a8 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -43,7 +43,7 @@ def test_read_datetime(request, engine):
     if engine == "pyarrow":
         # GH 48893
         reason = "Pyarrow only supports a file path as an input and line delimited json"
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     df = DataFrame(
         [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
@@ -121,7 +121,7 @@ def test_readjson_chunks(request, lines_json_df, chunksize, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     unchunked = read_json(StringIO(lines_json_df), lines=True)
     with read_json(
@@ -148,7 +148,7 @@ def test_readjson_chunks_series(request, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.applymarker(pytest.mark.xfail(reason=reason))
 
     # Test reading line-format JSON to Series with chunksize param
     s = pd.Series({"A": 1, "B": 2})
@@ -172,7 +172,7 @@ def test_readjson_each_chunk(request, lines_json_df, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     # Other tests check that the final result of read_json(chunksize=True)
     # is correct. This checks the intermediate chunks.
@@ -191,7 +191,7 @@ def test_readjson_chunks_from_file(request, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     with tm.ensure_clean("test.json") as path:
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
@@ -274,7 +274,7 @@ def test_readjson_unicode(request, monkeypatch, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     with tm.ensure_clean("test.json") as path:
         monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
@@ -309,7 +309,7 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
@@ -351,7 +351,7 @@ def test_readjson_lines_chunks_fileurl(request, datapath, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     df_list_expected = [
         DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py
index 2ca98de914f9e..8ec372420a0f0 100644
--- a/pandas/tests/io/parser/common/test_float.py
+++ b/pandas/tests/io/parser/common/test_float.py
@@ -55,7 +55,7 @@ def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
     if precision == "round_trip":
         if exp == 999999999999999999 and is_platform_linux():
             mark = pytest.mark.xfail(reason="GH38794, on Linux gives object result")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         value = np.inf if exp > 0 else 0.0
         expected = DataFrame({"data": [value]})
diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py
index 4e82dca83e2d0..ff1af6e2dc81b 100644
--- a/pandas/tests/io/parser/common/test_read_errors.py
+++ b/pandas/tests/io/parser/common/test_read_errors.py
@@ -210,7 +210,7 @@ def test_null_byte_char(request, all_parsers):
 
     if parser.engine == "c" or (parser.engine == "python" and PY311):
         if parser.engine == "python" and PY311:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="In Python 3.11, this is read as an empty character not null"
                 )
@@ -230,7 +230,7 @@ def test_open_file(request, all_parsers):
     # GH 39024
     parser = all_parsers
     if parser.engine == "c":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=f"{parser.engine} engine does not support sep=None "
                 f"with delim_whitespace=False"
diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py
index 3ab40ff846cb6..591defdde7df9 100644
--- a/pandas/tests/io/parser/conftest.py
+++ b/pandas/tests/io/parser/conftest.py
@@ -278,7 +278,7 @@ def pyarrow_xfail(request):
         return
     if parser.engine == "pyarrow":
         mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 97a32ad79a67c..8fb25fe3ee47e 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -234,7 +234,7 @@ def decimal_number_check(request, parser, numeric_decimal, thousands, float_prec
     # GH#31920
     value = numeric_decimal[0]
     if thousands is None and value in ("1_,", "1_234,56", "1_234,56e0"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason=f"thousands={thousands} and sep is in {value}")
         )
     df = parser.read_csv(
diff --git a/pandas/tests/io/parser/test_comment.py b/pandas/tests/io/parser/test_comment.py
index 9a14e67c154b6..5b738446ea441 100644
--- a/pandas/tests/io/parser/test_comment.py
+++ b/pandas/tests/io/parser/test_comment.py
@@ -45,7 +45,7 @@ def test_line_comment(all_parsers, read_kwargs, request):
             mark = pytest.mark.xfail(
                 reason="Custom terminator not supported with Python engine"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         data = data.replace("\n", read_kwargs.get("lineterminator"))
 
@@ -146,7 +146,7 @@ def test_comment_char_in_default_value(all_parsers, request):
     if all_parsers.engine == "c":
         reason = "see gh-34002: works on the python engine but not the c engine"
         # NA value containing comment char is interpreted as comment
-        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=AssertionError))
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=AssertionError))
     parser = all_parsers
 
     data = (
diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py
index d150b52258d47..1c67ec7c3066c 100644
--- a/pandas/tests/io/parser/test_compression.py
+++ b/pandas/tests/io/parser/test_compression.py
@@ -105,7 +105,7 @@ def test_compression(
     filename = filename if filename is None else filename.format(ext=ext)
 
     if filename and buffer:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="Cannot deduce compression from buffer of compressed data."
             )
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
index 3873bf31c1ed4..c09ab7898c67c 100644
--- a/pandas/tests/io/parser/test_encoding.py
+++ b/pandas/tests/io/parser/test_encoding.py
@@ -127,9 +127,7 @@ def _encode_data_with_bom(_data):
         and kwargs.get("skip_blank_lines", True)
     ):
         # Manually xfail, since we don't have mechanism to xfail specific version
-        request.node.add_marker(
-            pytest.mark.xfail(reason="Pyarrow can't read blank lines")
-        )
+        request.applymarker(pytest.mark.xfail(reason="Pyarrow can't read blank lines"))
 
     result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
index a7ded00e758b7..8213ea006614f 100644
--- a/pandas/tests/io/parser/test_index_col.py
+++ b/pandas/tests/io/parser/test_index_col.py
@@ -341,7 +341,7 @@ def test_specify_dtype_for_index_col(all_parsers, dtype, val, request):
     data = "a,b\n01,2"
     parser = all_parsers
     if dtype == object and parser.engine == "pyarrow":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="Cannot disable type-inference for pyarrow engine")
         )
     result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype})
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 9f7840588f89e..bd08dd3a0c5d2 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -157,7 +157,7 @@ def test_multiple_date_col_custom(all_parsers, keep_date_col, request):
         mark = pytest.mark.xfail(
             reason="pyarrow doesn't support disabling auto-inference on column numbers."
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     def date_parser(*date_cols):
         """
@@ -326,7 +326,7 @@ def test_multiple_date_col(all_parsers, keep_date_col, request):
         mark = pytest.mark.xfail(
             reason="pyarrow doesn't support disabling auto-inference on column numbers."
         )
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     kwds = {
         "header": None,
@@ -1836,7 +1836,7 @@ def test_hypothesis_delimited_date(
     request, date_format, dayfirst, delimiter, test_datetime
 ):
     if date_format == "%m %Y" and delimiter == ".":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="parse_datetime_string cannot reliably tell whether "
                 "e.g. %m.%Y is a float or a date"
diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py
index c58e27aacfa00..4b509edc36925 100644
--- a/pandas/tests/io/parser/test_skiprows.py
+++ b/pandas/tests/io/parser/test_skiprows.py
@@ -204,7 +204,7 @@ def test_skiprows_lineterminator(all_parsers, lineterminator, request):
 
     if parser.engine == "python" and lineterminator == "\r":
         mark = pytest.mark.xfail(reason="'CR' not respect with the Python parser yet")
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     data = data.replace("\n", lineterminator)
     result = parser.read_csv(
diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
index b489c09e917af..f201f6c394566 100644
--- a/pandas/tests/io/parser/test_unsupported.py
+++ b/pandas/tests/io/parser/test_unsupported.py
@@ -190,7 +190,7 @@ def test_invalid_file_inputs(request, all_parsers):
     # GH#45957
     parser = all_parsers
     if parser.engine == "python":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason=f"{parser.engine} engine supports lists.")
         )
 
diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
index 48983cbb5ec28..af24a5cf7e0ab 100644
--- a/pandas/tests/io/pytables/test_round_trip.py
+++ b/pandas/tests/io/pytables/test_round_trip.py
@@ -331,7 +331,7 @@ def test_timeseries_preepoch(setup_path, request):
         _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
     except OverflowError:
         if is_platform_windows():
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail("known failure on some windows platforms")
             )
         raise
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index b043f9fab23ae..1538275e6af73 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -453,7 +453,7 @@ def test_read_filters(self, engine, tmp_path):
     def test_write_index(self, engine, using_copy_on_write, request):
         check_names = engine != "fastparquet"
         if using_copy_on_write and engine == "fastparquet":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="fastparquet write into index")
             )
 
@@ -626,7 +626,7 @@ def test_dtype_backend(self, engine, request):
             mark = pytest.mark.xfail(
                 reason="Fastparquet nullable dtype support is disabled"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         table = pyarrow.table(
             {
@@ -988,7 +988,7 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list):
             not pa_version_under7p0
             and timezone_aware_date_list.tzinfo != datetime.timezone.utc
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason="temporary skip this test until it is properly resolved: "
                     "https://github.com/pandas-dev/pandas/issues/37286"
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 11f95ff104767..63546b44e92be 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -817,7 +817,7 @@ def sample(pd_table, conn, keys, data_iter):
 def test_default_type_conversion(conn, request):
     conn_name = conn
     if conn_name == "sqlite_buildin_iris":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="sqlite_buildin connection does not implement read_sql_table"
             )
@@ -1155,7 +1155,7 @@ def test_read_sql_iris_named_parameter(conn, request, sql_strings, flavor):
 @pytest.mark.parametrize("conn", all_connectable_iris)
 def test_read_sql_iris_no_parameter_with_percent(conn, request, sql_strings, flavor):
     if "mysql" in conn or "postgresql" in conn:
-        request.node.add_marker(pytest.mark.xfail(reason="broken test"))
+        request.applymarker(pytest.mark.xfail(reason="broken test"))
 
     conn_name = conn
     conn = request.getfixturevalue(conn)
@@ -1399,7 +1399,7 @@ def test_api_custom_dateparsing_error(
     conn_name = conn
     conn = request.getfixturevalue(conn)
     if text == "types" and conn_name == "sqlite_buildin_iris":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="failing combination of arguments")
         )
 
@@ -1497,7 +1497,7 @@ def test_api_to_sql_index_label(conn, request, index_name, index_label, expected
 def test_api_to_sql_index_label_multiindex(conn, request):
     conn_name = conn
     if "mysql" in conn_name:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="MySQL can fail using TEXT without length as key", strict=False
             )
@@ -1802,7 +1802,7 @@ def test_read_table_columns(conn, request, test_frame1):
     # test columns argument in read_table
     conn_name = conn
     if conn_name == "sqlite_buildin":
-        request.node.add_marker(pytest.mark.xfail(reason="Not Implemented"))
+        request.applymarker(pytest.mark.xfail(reason="Not Implemented"))
 
     conn = request.getfixturevalue(conn)
     sql.to_sql(test_frame1, "test_frame", conn)
@@ -1818,7 +1818,7 @@ def test_read_table_index_col(conn, request, test_frame1):
     # test columns argument in read_table
     conn_name = conn
     if conn_name == "sqlite_buildin":
-        request.node.add_marker(pytest.mark.xfail(reason="Not Implemented"))
+        request.applymarker(pytest.mark.xfail(reason="Not Implemented"))
 
     conn = request.getfixturevalue(conn)
     sql.to_sql(test_frame1, "test_frame", conn)
@@ -1839,7 +1839,7 @@ def test_read_table_index_col(conn, request, test_frame1):
 @pytest.mark.parametrize("conn", all_connectable_iris)
 def test_read_sql_delegate(conn, request):
     if conn == "sqlite_buildin_iris":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="sqlite_buildin connection does not implement read_sql_table"
             )
@@ -1884,7 +1884,7 @@ def test_not_reflect_all_tables(sqlite_conn):
 def test_warning_case_insensitive_table_name(conn, request, test_frame1):
     conn_name = conn
     if conn_name == "sqlite_buildin":
-        request.node.add_marker(pytest.mark.xfail(reason="Does not raise warning"))
+        request.applymarker(pytest.mark.xfail(reason="Does not raise warning"))
 
     conn = request.getfixturevalue(conn)
     # see gh-7815
@@ -2034,7 +2034,7 @@ def test_column_with_percentage(conn, request):
     # GH 37157
     conn_name = conn
     if conn_name == "sqlite_buildin":
-        request.node.add_marker(pytest.mark.xfail(reason="Not Implemented"))
+        request.applymarker(pytest.mark.xfail(reason="Not Implemented"))
 
     conn = request.getfixturevalue(conn)
     df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]})
@@ -2226,7 +2226,7 @@ def test_sqlalchemy_default_type_conversion(conn, request):
     if conn_name == "sqlite_str":
         pytest.skip("types tables not created in sqlite_str fixture")
     elif "mysql" in conn_name or "sqlite" in conn_name:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="boolean dtype not inferred properly")
         )
 
@@ -2260,7 +2260,7 @@ def test_default_date_load(conn, request):
     if conn_name == "sqlite_str":
         pytest.skip("types tables not created in sqlite_str fixture")
     elif "sqlite" in conn_name:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="sqlite does not read date properly")
         )
 
@@ -2310,7 +2310,7 @@ def check(col):
     conn = request.getfixturevalue(conn)
     df = read_sql_query("select * from types", conn)
     if not hasattr(df, "DateColWithTz"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="no column with datetime with time zone")
         )
 
@@ -2322,7 +2322,7 @@ def check(col):
 
     df = read_sql_query("select * from types", conn, parse_dates=["DateColWithTz"])
     if not hasattr(df, "DateColWithTz"):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="no column with datetime with time zone")
         )
     col = df.DateColWithTz
@@ -2689,7 +2689,7 @@ def test_get_schema_create_table(conn, request, test_frame3):
     # TINYINT (which read_sql_table returns as an int and causes a dtype
     # mismatch)
     if conn == "sqlite_str":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="test does not support sqlite_str fixture")
         )
 
@@ -2901,7 +2901,7 @@ def test_to_sql_with_negative_npinf(conn, request, input):
 
         if Version(pymysql.__version__) < Version("1.0.3") and "infe0" in df.columns:
             mark = pytest.mark.xfail(reason="GH 36465")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         msg = "inf cannot be used with MySQL"
         with pytest.raises(ValueError, match=msg):
@@ -2953,7 +2953,7 @@ class Temporary(Base):
 @pytest.mark.parametrize("conn", all_connectable)
 def test_invalid_engine(conn, request, test_frame1):
     if conn == "sqlite_buildin":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="SQLiteDatabase does not raise for bad engine")
         )
 
@@ -3078,7 +3078,7 @@ def test_read_sql_dtype_backend_table(
     dtype_backend_expected,
 ):
     if "sqlite" in conn:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason=(
                     "SQLite actually returns proper boolean values via "
diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
index df5ca2f27c15d..a87042180df8f 100644
--- a/pandas/tests/reshape/concat/test_append_common.py
+++ b/pandas/tests/reshape/concat/test_append_common.py
@@ -197,11 +197,11 @@ def test_concatlike_dtypes_coercion(self, item, item2, request):
             # index doesn't because bool is object dtype
             exp_series_dtype = typ2
             mark = pytest.mark.xfail(reason="GH#39187 casting to object")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         elif typ2 == "bool" and typ1 in ("int64", "float64"):
             exp_series_dtype = typ1
             mark = pytest.mark.xfail(reason="GH#39187 casting to object")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         elif typ1 in {"datetime64[ns, US/Eastern]", "timedelta64[ns]"} or typ2 in {
             "datetime64[ns, US/Eastern]",
             "timedelta64[ns]",
diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py
index b65b34f748260..5eddb4b83f44c 100644
--- a/pandas/tests/scalar/timestamp/test_constructors.py
+++ b/pandas/tests/scalar/timestamp/test_constructors.py
@@ -353,7 +353,7 @@ def test_constructor_positional_keyword_mixed_with_tzinfo(self, kwd, request):
         if kwd != "nanosecond":
             # nanosecond is keyword-only as of 2.0, others are not
             mark = pytest.mark.xfail(reason="GH#45307")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         kwargs = {kwd: 4}
         ts = Timestamp(2020, 12, 31, tzinfo=timezone.utc, **kwargs)
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index 03fc6cba2902a..faa3978038dd5 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -170,7 +170,7 @@ def test_astype_generic_timestamp_no_frequency(self, dtype, request):
 
         if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
             mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         msg = (
             rf"The '{dtype.__name__}' dtype has no unit\. "
@@ -485,7 +485,7 @@ def test_astype_string_to_extension_dtype_roundtrip(
             mark = pytest.mark.xfail(
                 reason="TODO StringArray.astype() with missing values #GH40566"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         # GH-40351
         ser = Series(data, dtype=dtype)
 
diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py
index f8bbd4c25a4c0..426885bf41a1f 100644
--- a/pandas/tests/series/methods/test_interpolate.py
+++ b/pandas/tests/series/methods/test_interpolate.py
@@ -831,7 +831,7 @@ def test_interpolate_timedelta_index(self, request, interp_methods_ind):
         method, kwargs = interp_methods_ind
 
         if method in {"cubic", "zero"}:
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"{method} interpolation is not supported for TimedeltaIndex"
                 )
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index ae6c62e95f696..6b357281c831b 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -204,7 +204,7 @@ def test_map(datetime_series):
 
 def test_map_empty(request, index):
     if isinstance(index, MultiIndex):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 reason="Initializing a Series from a MultiIndex is not supported"
             )
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index e9eb906a9cf10..656f6736f03ee 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -881,7 +881,7 @@ def test_none_comparison(request, series_with_simple_index):
     series = series_with_simple_index
 
     if len(series) < 1:
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="Test doesn't make sense on empty data")
         )
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 4f9050be100ca..dc5158cb5813c 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -1362,7 +1362,7 @@ def test_constructor_dict_extension(self, ea_scalar_and_dtype, request):
                 reason="Construction from dict goes through "
                 "maybe_convert_objects which casts to nano"
             )
-            request.node.add_marker(mark)
+            request.applymarker(mark)
         d = {"a": ea_scalar}
         result = Series(d, index=["a"])
         expected = Series(ea_scalar, index=["a"], dtype=ea_dtype)
@@ -1688,7 +1688,7 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype, request):
 
         if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
             mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         with pytest.raises(ValueError, match=msg):
             Series([], dtype=dtype)
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
index 698c727f1beb8..a75e77a5e2714 100644
--- a/pandas/tests/series/test_ufunc.py
+++ b/pandas/tests/series/test_ufunc.py
@@ -274,7 +274,7 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request):
 
         if isinstance(values, pd.core.arrays.SparseArray):
             mark = pytest.mark.xfail(reason="SparseArray has no 'prod'")
-            request.node.add_marker(mark)
+            request.applymarker(mark)
 
         if values.dtype.kind in "iuf":
             result = np.multiply.reduce(obj)
diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py
index c439a5f006922..0d2f220e70c56 100644
--- a/pandas/tests/strings/test_api.py
+++ b/pandas/tests/strings/test_api.py
@@ -92,7 +92,7 @@ def test_api_per_method(
 
     if reason is not None:
         mark = pytest.mark.xfail(raises=raises, reason=reason)
-        request.node.add_marker(mark)
+        request.applymarker(mark)
 
     t = box(values, dtype=dtype)  # explicit dtype to avoid casting
     method = getattr(t.str, method_name)
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index b0406dbfa3469..aefaba1aed058 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1831,7 +1831,7 @@ def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
 
         # TODO: this should also work
         if isinstance(item, float):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     reason=f"{type(item).__name__} in np.array should work"
                 )
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 1d969e648b752..da8e2fe9abc16 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -396,7 +396,7 @@ def test_period(request, transform_assert_equal):
     inp = transform(idx)
 
     if not isinstance(inp, Index):
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(reason="Missing PeriodDtype support in to_numeric")
         )
     result = to_numeric(inp)
diff --git a/pandas/tests/tseries/offsets/test_common.py b/pandas/tests/tseries/offsets/test_common.py
index 1b90b94d8a9da..5b80b8b1c4ab4 100644
--- a/pandas/tests/tseries/offsets/test_common.py
+++ b/pandas/tests/tseries/offsets/test_common.py
@@ -142,7 +142,7 @@ def test_apply_out_of_range(request, tz_naive_fixture, _offset):
         if isinstance(tz, tzlocal) and not IS64 and _offset is not DateOffset:
             # If we hit OutOfBoundsDatetime on non-64 bit machines
             # we'll drop out of the try clause before the next test
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
             )
         elif (
@@ -150,7 +150,7 @@ def test_apply_out_of_range(request, tz_naive_fixture, _offset):
             and is_platform_windows()
             and _offset in (QuarterEnd, BQuarterBegin, BQuarterEnd)
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="After GH#49737 t.tzinfo is None on CI")
             )
         assert str(t.tzinfo) == str(result.tzinfo)
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 5678dd1fb511e..7cefd93851b0e 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -602,7 +602,7 @@ def test_mul(self):
     @pytest.mark.parametrize("kwd", sorted(liboffsets._relativedelta_kwds))
     def test_constructor(self, kwd, request):
         if kwd == "millisecond":
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(
                     raises=NotImplementedError,
                     reason="Constructing DateOffset object with `millisecond` is not "
@@ -916,7 +916,7 @@ def test_month_offset_name(month_classes):
 @pytest.mark.parametrize("kwd", sorted(liboffsets._relativedelta_kwds))
 def test_valid_relativedelta_kwargs(kwd, request):
     if kwd == "millisecond":
-        request.node.add_marker(
+        request.applymarker(
             pytest.mark.xfail(
                 raises=NotImplementedError,
                 reason="Constructing DateOffset object with `millisecond` is not "
diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py
index dafc60a057c0f..7d2fa1ad5d211 100644
--- a/pandas/tests/window/moments/test_moments_consistency_expanding.py
+++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py
@@ -19,7 +19,7 @@ def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f)
         if not no_nans(all_data) and not (
             all_na(all_data) and not all_data.empty and min_periods > 0
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
             )
     expanding_f_result = all_data.expanding(min_periods=min_periods).sum()
diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py
index 62bfc66b124f3..be22338c00cb2 100644
--- a/pandas/tests/window/moments/test_moments_consistency_rolling.py
+++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py
@@ -29,7 +29,7 @@ def test_rolling_apply_consistency_sum(
         if not no_nans(all_data) and not (
             all_na(all_data) and not all_data.empty and min_periods > 0
         ):
-            request.node.add_marker(
+            request.applymarker(
                 pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
             )
     rolling_f_result = all_data.rolling(
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 9be0c3edaa998..3292b701c18d7 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -229,7 +229,7 @@ def async_mark():
 
 def mark_array_manager_not_yet_implemented(request) -> None:
     mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager")
-    request.node.add_marker(mark)
+    request.applymarker(mark)
 
 
 skip_array_manager_not_yet_implemented = pytest.mark.xfail(