From 13395f0c151ce151a5963f78202309786a53095b Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 2 Jun 2022 14:25:10 -0500
Subject: [PATCH 01/13] FIX-#4522: Correct multiindex metadata with groupby

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/dataframe/algebra/groupby.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modin/core/dataframe/algebra/groupby.py b/modin/core/dataframe/algebra/groupby.py
index 78bb5dbc9dd..e8fef51f6f1 100644
--- a/modin/core/dataframe/algebra/groupby.py
+++ b/modin/core/dataframe/algebra/groupby.py
@@ -133,6 +133,12 @@ def map(
                     axis=1,
                 )
                 other = list(other.columns)
+                # GH#4522: Vile as this may be, it is necessary to avoid the case where we are
+                # grouping by columns that were recently added to the data via
+                # `from_labels`. The internal dataframe doesn't know what to do when
+                # the label matches a column name.
+                if len(df.columns.intersection(df.index.names)) > 0:
+                    df = df.reset_index(drop=True)
             by_part = other
         else:
             by_part = by

From 8040915c29bc19b9c60fc3619fbb99b9f7f46d0b Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 2 Jun 2022 14:32:49 -0500
Subject: [PATCH 02/13] Add test

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/test/test_groupby.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 6f885199d95..1965d59b098 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2003,3 +2003,25 @@ def test_sum_with_level():
     }
     modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
     eval_general(modin_df, pandas_df, lambda df: df.set_index("C").groupby("C").sum())
+
+
+def test_reset_index_groupby():
+    frame_data = np.random.randint(97, 198, size=(2**6, 2**4))
+    pandas_df = pandas.DataFrame(
+        frame_data,
+        index=pandas.MultiIndex.from_tuples(
+            [(i // 4, i // 2, i) for i in range(2**6)]
+        ),
+    ).add_prefix("col")
+    pandas_df.index.names = [f"index_{i}" for i in range(len(pandas_df.index.names))]
+    # Convert every other column to string
+    for col in pandas_df.iloc[
+        :, [i for i in range(len(pandas_df.columns)) if i % 2 == 0]
+    ]:
+        pandas_df[col] = [str(chr(i)) for i in pandas_df[col]]
+    modin_df = from_pandas(pandas_df)
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.reset_index().groupby(["index_0", "index_1"]).count(),
+    )

From 6e8466e647ed9c3aca3aff81f727a461fc866fa6 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 2 Jun 2022 14:33:54 -0500
Subject: [PATCH 03/13] Add release note

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 docs/release_notes/release_notes-0.15.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/release_notes/release_notes-0.15.0.rst b/docs/release_notes/release_notes-0.15.0.rst
index 45c9b023b9a..ce22e5dfd1f 100644
--- a/docs/release_notes/release_notes-0.15.0.rst
+++ b/docs/release_notes/release_notes-0.15.0.rst
@@ -27,6 +27,7 @@ Key Features and Updates
   * FIX-#4481: Allow clipping with a Modin Series of bounds (#4486)  
   * FIX-#4504: Support na_action in applymap (#4505)
   * FIX-#4503: Stop the memory logging thread after session exit (#4515)
+  * FIX-#4522: Correct multiindex metadata with groupby (#4523)
 * Performance enhancements
   * FEAT-#4320: Add connectorx as an alternative engine for read_sql (#4346)
   * PERF-#4493: Use partition size caches more in Modin dataframe (#4495)

From 95f551001e11bd908f40c02f0e83b0337788ed42 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Fri, 3 Jun 2022 08:50:38 -0500
Subject: [PATCH 04/13] Fix error checking for groupby

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/dataframe.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 6d3cc6812b4..85ec58d6cdc 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -415,7 +415,6 @@ def groupby(
             )
         else:
             squeeze = False
-
         axis = self._get_axis_number(axis)
         idx_name = None
         # Drop here indicates whether or not to drop the data column before doing the
@@ -448,6 +447,14 @@ def groupby(
             idx_name = by.name
             by = by._query_compiler
         elif is_list_like(by):
+            for k in by:
+                if k in self.index.names and k in self.axes[axis]:
+                    level_name, index_name = "an index", "a column"
+                    if axis == 1:
+                        level_name, index_name = index_name, level_name
+                    raise ValueError(
+                        f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
+                    )
             # fastpath for multi column groupby
             if axis == 0 and all(
                 (

From d2e8c5f6802d35484f9856753d91e6d484e4f52a Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Tue, 7 Jun 2022 11:18:33 -0500
Subject: [PATCH 05/13] Fix error checking

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/dataframe.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 85ec58d6cdc..7d2c05410ac 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -423,7 +423,16 @@ def groupby(
         # strings is passed in, the data used for the groupby is dropped before the
         # groupby takes place.
         drop = False
-
+        # Check that there is no ambiguity in the parameter we were given.
+        _by_check = by if is_list_like(by) else [by]
+        for k in _by_check:
+            if k in self.index.names and k in self.axes[axis]:
+                level_name, index_name = "an index", "a column"
+                if axis == 1:
+                    level_name, index_name = index_name, level_name
+                raise ValueError(
+                    f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
+                )
         if (
             not isinstance(by, (pandas.Series, Series))
             and is_list_like(by)
@@ -447,14 +456,6 @@ def groupby(
             idx_name = by.name
             by = by._query_compiler
         elif is_list_like(by):
-            for k in by:
-                if k in self.index.names and k in self.axes[axis]:
-                    level_name, index_name = "an index", "a column"
-                    if axis == 1:
-                        level_name, index_name = index_name, level_name
-                    raise ValueError(
-                        f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
-                    )
             # fastpath for multi column groupby
             if axis == 0 and all(
                 (

From a5bb81e5c337391a5e9ec838e3aaf6180343ae45 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 11:32:04 -0700
Subject: [PATCH 06/13] Add test for when by is in intersection of index names
 and columns

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/core/dataframe/algebra/groupby.py |  2 ++
 modin/pandas/test/test_groupby.py       | 47 ++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/modin/core/dataframe/algebra/groupby.py b/modin/core/dataframe/algebra/groupby.py
index e8fef51f6f1..c2b7e38ad37 100644
--- a/modin/core/dataframe/algebra/groupby.py
+++ b/modin/core/dataframe/algebra/groupby.py
@@ -137,6 +137,8 @@ def map(
                 # grouping by columns that were recently added to the data via
                 # `from_labels`. The internal dataframe doesn't know what to do when
                 # the label matches a column name.
+                # We ensure that the columns, index, and by don't intersect in the API level,
+                # so if we hit this if statement, we know its a result of a deferred re-index.
                 if len(df.columns.intersection(df.index.names)) > 0:
                     df = df.reset_index(drop=True)
             by_part = other
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 1965d59b098..997a953e22b 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -1566,7 +1566,7 @@ def test_agg_exceptions(operation):
         },
     ],
 )
-def test_to_pandas_convertion(kwargs):
+def test_to_pandas_conversion(kwargs):
     data = {"a": [1, 2], "b": [3, 4], "c": [5, 6]}
     by = ["a", "b"]
 
@@ -2025,3 +2025,48 @@ def test_reset_index_groupby():
         pandas_df,
         lambda df: df.reset_index().groupby(["index_0", "index_1"]).count(),
     )
+
+def test_by_in_index_and_columns():
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=['a', 'b', 'c']
+    )
+    modin_df = from_pandas(pandas_df)
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by='a').count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=['a', 'b']).count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", 'b']), columns=['a', 'b', 'c']
+    )
+    modin_df = from_pandas(pandas_df)
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by='a').count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=['a', 'c']).count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=['a', 'b']).count(),
+        raising_exceptions=True,
+        check_exception_type=True,
+    )

From 75c60891ac3ecdd3717cea1f076106056325063c Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 11:41:16 -0700
Subject: [PATCH 07/13] lint

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/test/test_groupby.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 997a953e22b..f1b8176553f 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2026,47 +2026,48 @@ def test_reset_index_groupby():
         lambda df: df.reset_index().groupby(["index_0", "index_1"]).count(),
     )
 
+
 def test_by_in_index_and_columns():
     pandas_df = pandas.DataFrame(
-        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=['a', 'b', 'c']
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
     )
     modin_df = from_pandas(pandas_df)
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: df.groupby(by='a').count(),
+        lambda df: df.groupby(by="a").count(),
         raising_exceptions=True,
         check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: df.groupby(by=['a', 'b']).count(),
+        lambda df: df.groupby(by=["a", "b"]).count(),
         raising_exceptions=True,
         check_exception_type=True,
     )
     pandas_df = pandas.DataFrame(
-        [[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", 'b']), columns=['a', 'b', 'c']
+        [[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", "b"]), columns=["a", "b", "c"]
     )
     modin_df = from_pandas(pandas_df)
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: df.groupby(by='a').count(),
+        lambda df: df.groupby(by="a").count(),
         raising_exceptions=True,
         check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: df.groupby(by=['a', 'c']).count(),
+        lambda df: df.groupby(by=["a", "c"]).count(),
         raising_exceptions=True,
         check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
-        lambda df: df.groupby(by=['a', 'b']).count(),
+        lambda df: df.groupby(by=["a", "b"]).count(),
         raising_exceptions=True,
         check_exception_type=True,
     )

From a547d323bfcf6ae336c1ea5a2740255a5fb2f106 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 11:45:40 -0700
Subject: [PATCH 08/13] remove unused line

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/dataframe.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 7d2c05410ac..79bbd5147c6 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -415,6 +415,7 @@ def groupby(
             )
         else:
             squeeze = False
+
         axis = self._get_axis_number(axis)
         idx_name = None
         # Drop here indicates whether or not to drop the data column before doing the

From 3ce19eafc0781d96a9110b0e9a3645197e507928 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 12:15:48 -0700
Subject: [PATCH 09/13] add comments to test

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/test/test_groupby.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index df6f00f2baf..d7f7445e053 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2035,6 +2035,14 @@ def test_sum_with_level():
 
 
 def test_reset_index_groupby():
+    # Due to `reset_index` deferring the actual reindexing of partitions,
+    # when we call groupby after a `reset_index` with a `by` column name
+    # that was moved from the index to the columns via `from_labels` the
+    # algebra layer incorrectly thinks that the `by` key is duplicated
+    # across both the columns and labels, and fails, when it should
+    # succeed. We have this test to ensure that that case is correctly
+    # handled, and passes. For more details, checkout
+    # https://github.com/modin-project/modin/issues/4522.
     frame_data = np.random.randint(97, 198, size=(2**6, 2**4))
     pandas_df = pandas.DataFrame(
         frame_data,
@@ -2043,11 +2051,14 @@ def test_reset_index_groupby():
         ),
     ).add_prefix("col")
     pandas_df.index.names = [f"index_{i}" for i in range(len(pandas_df.index.names))]
-    # Convert every other column to string
+    # Convert every even column to string
     for col in pandas_df.iloc[
         :, [i for i in range(len(pandas_df.columns)) if i % 2 == 0]
     ]:
         pandas_df[col] = [str(chr(i)) for i in pandas_df[col]]
+    # The `pandas_df` contains a multi-index with 3 levels, named `index_0`, `index_1`,
+    # and `index_2`, and 16 columns, named `col0` through `col15`. Every even column
+    # has dtype `str`, while odd columns have dtype `int64`.
     modin_df = from_pandas(pandas_df)
     eval_general(
         modin_df,

From 0e4a52101d750717440e987f093c4bc622cb58c3 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 14:08:51 -0700
Subject: [PATCH 10/13] fix typo and add solution for series + index

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/dataframe.py         | 29 +++++++++++------
 modin/pandas/test/test_groupby.py | 54 +++++++++++++++++++++++++------
 2 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 79bbd5147c6..4fa2ecdfb3b 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -425,17 +425,21 @@ def groupby(
         # groupby takes place.
         drop = False
         # Check that there is no ambiguity in the parameter we were given.
-        _by_check = by if is_list_like(by) else [by]
-        for k in _by_check:
-            if k in self.index.names and k in self.axes[axis]:
-                level_name, index_name = "an index", "a column"
-                if axis == 1:
-                    level_name, index_name = index_name, level_name
-                raise ValueError(
-                    f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
-                )
+        # We don't need to check if `by` is a Series or Index, since those
+        # won't be referencing labels
+        if not isinstance(by, (pandas.Series, Series, pandas.Index)):
+            _by_check = by if is_list_like(by) else [by]
+            for k in _by_check:
+                if not isinstance(k, (Series, pandas.Series, pandas.Index)):
+                    if k in self.index.names and k in self.axes[axis ^ 1]:
+                        level_name, index_name = "an index", "a column"
+                        if axis == 1:
+                            level_name, index_name = index_name, level_name
+                        raise ValueError(
+                            f"{k} is both {level_name} level and {index_name} label, which is ambiguous."
+                        )
         if (
-            not isinstance(by, (pandas.Series, Series))
+            not isinstance(by, (pandas.Series, Series, pandas.Index))
             and is_list_like(by)
             and len(by) == 1
         ):
@@ -452,6 +456,11 @@ def groupby(
                 level, by = by, None
             elif level is None:
                 by = self.__getitem__(by)._query_compiler
+        elif isinstance(by, (pandas.Series, pandas.Index)):
+            if isinstance(by, pandas.Index) and len(by) != len(self.axes[axis]):
+                raise ValueError("Grouper and axis must be same length")
+            idx_name = by.name
+            by = Series(by)._query_compiler
         elif isinstance(by, Series):
             drop = by._parent is self
             idx_name = by.name
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index d7f7445e053..8cc518a3ec3 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2076,15 +2076,16 @@ def test_by_in_index_and_columns():
         modin_df,
         pandas_df,
         lambda df: df.groupby(by="a").count(),
-        raising_exceptions=True,
-        check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
         lambda df: df.groupby(by=["a", "b"]).count(),
-        raising_exceptions=True,
-        check_exception_type=True,
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=[df["b"], "a"]).count(),
     )
     pandas_df = pandas.DataFrame(
         [[1, 2, 3]], index=pd.Index([(0, 1)], names=["a", "b"]), columns=["a", "b", "c"]
@@ -2094,20 +2095,53 @@ def test_by_in_index_and_columns():
         modin_df,
         pandas_df,
         lambda df: df.groupby(by="a").count(),
-        raising_exceptions=True,
-        check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
         lambda df: df.groupby(by=["a", "c"]).count(),
-        raising_exceptions=True,
-        check_exception_type=True,
     )
     eval_general(
         modin_df,
         pandas_df,
         lambda df: df.groupby(by=["a", "b"]).count(),
-        raising_exceptions=True,
-        check_exception_type=True,
+    )
+
+
+def test_by_series():
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
+    )
+    modin_df = from_pandas(pandas_df)
+
+    def make_appropriately_typed_series(df, values=["a"]):
+        """Return a Series from either pandas or modin.pandas depending on type of `df`."""
+        if isinstance(df, pd.DataFrame):
+            return pd.Series(values)
+        return pandas.Series(values)
+
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=make_appropriately_typed_series(df)).count(),
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(
+            by=make_appropriately_typed_series(df, ["a", "b"])
+        ).count(),
+    )
+
+
+def test_by_index():
+    pandas_df = pandas.DataFrame(
+        [[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
+    )
+    modin_df = from_pandas(pandas_df)
+    eval_general(modin_df, pandas_df, lambda df: df.groupby(by=pd.Index(["a"])).count())
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=pd.Index(["a", "b"])).count(),
     )

From 639a201ecaa7bc17454cdab178091f82b46dae20 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 14:12:26 -0700
Subject: [PATCH 11/13] update test

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/test/test_groupby.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index 8cc518a3ec3..5dc438c79d3 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -2113,6 +2113,16 @@ def test_by_series():
         [[1, 2, 3]], index=pd.Index([0], name="a"), columns=["a", "b", "c"]
     )
     modin_df = from_pandas(pandas_df)
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=pandas.Series(["a"])).count(),
+    )
+    eval_general(
+        modin_df,
+        pandas_df,
+        lambda df: df.groupby(by=pandas.Series(["a", "b"])).count(),
+    )
 
     def make_appropriately_typed_series(df, values=["a"]):
         """Return a Series from either pandas or modin.pandas depending on type of `df`."""

From d2c729543537c20db0c302b7e2c19826ae7dadfc Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 18:34:25 -0700
Subject: [PATCH 12/13] Fix value counts to call groupby with appropriate
 arguments

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/base.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 30f43189eac..c220b7291cb 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3461,7 +3461,9 @@ def value_counts(
         Count unique values in the `BasePandasDataset`.
         """
         if subset is None:
-            subset = self._query_compiler.columns
+            # Need to get column names as array rather than as Index, since `groupby` does not
+            # treat `Index` arguments to `by` as a list of labels.
+            subset = self._query_compiler.columns.values
         counted_values = self.groupby(by=subset, dropna=dropna, observed=True).size()
         if sort:
             counted_values.sort_values(ascending=ascending, inplace=True)

From 55ec3a1e54590366d6313ecca87087d7ae8ac3eb Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan@ponder.io>
Date: Tue, 7 Jun 2022 18:41:53 -0700
Subject: [PATCH 13/13] Fix naming

Signed-off-by: Rehan Durrani <rehan@ponder.io>
---
 modin/pandas/dataframe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 4fa2ecdfb3b..887ac139af3 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -428,8 +428,8 @@ def groupby(
         # We don't need to check if `by` is a Series or Index, since those
         # won't be referencing labels
         if not isinstance(by, (pandas.Series, Series, pandas.Index)):
-            _by_check = by if is_list_like(by) else [by]
-            for k in _by_check:
+            _by_list = by if is_list_like(by) else [by]
+            for k in _by_list:
                 if not isinstance(k, (Series, pandas.Series, pandas.Index)):
                     if k in self.index.names and k in self.axes[axis ^ 1]:
                         level_name, index_name = "an index", "a column"