From 853387804708c314ddd1bc074ccb6df6d92af736 Mon Sep 17 00:00:00 2001
From: Krasen Samardzhiev <krasensam@gmail.com>
Date: Thu, 20 Jun 2024 17:56:14 +0200
Subject: [PATCH] remove result_index

---
 momepy/functional/_diversity.py           | 23 ++-------
 momepy/functional/tests/test_diversity.py | 57 +++++++++--------------
 2 files changed, 26 insertions(+), 54 deletions(-)

diff --git a/momepy/functional/_diversity.py b/momepy/functional/_diversity.py
index 4a0202c0..2ad8bd8d 100644
--- a/momepy/functional/_diversity.py
+++ b/momepy/functional/_diversity.py
@@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)):
 def describe_agg(
     y: NDArray[np.float64] | Series,
     aggregation_key: NDArray[np.float64] | Series,
-    result_index: pd.Index | None = None,
     q: tuple[float, float] | list[float] | None = None,
     statistics: list[str] | None = None,
 ) -> DataFrame:
@@ -96,8 +95,6 @@ def describe_agg(
 
     Notes
     -----
-    The index of ``y`` must match the index along which the ``graph`` is
-    built.
 
     The numba package is used extensively in this function to accelerate the computation
     of statistics. Without numba, these computations may become slow on large data.
@@ -109,10 +106,6 @@ def describe_agg(
     aggregation_key : Series | numpy.array
         The unique ID that specifies the aggregation
         of ``y`` objects to groups.
-    result_index : pd.Index (default None)
-        An index that specifies how to order the results.
-        Use to align the results from the grouping to an external index.
-        If ``None`` the index from the computations is used.
     q : tuple[float, float] | None, optional
         Tuple of percentages for the percentiles to compute. Values must be between 0
         and 100 inclusive. When set, values below and above the percentiles will be
@@ -188,21 +181,11 @@ def describe_agg(
 
     stats = _compute_stats(grouper, to_compute=statistics)
 
-    if result_index is None:
-        result_index = stats.index
-
-    # post processing to have the same behaviour as describe_reached_agg
-    result = pd.DataFrame(
-        np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index
-    )
-    result.loc[stats.index.values] = stats.values
-    result.columns = stats.columns
     # fill only counts with zeros, other stats are NA
-    if "count" in result.columns:
-        result.loc[:, "count"] = result.loc[:, "count"].fillna(0)
-    result.index.names = result_index.names
+    if "count" in stats.columns:
+        stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0)
 
-    return result
+    return stats
 
 
 def describe_reached_agg(
diff --git a/momepy/functional/tests/test_diversity.py b/momepy/functional/tests/test_diversity.py
index 5e08999b..8d53b939 100644
--- a/momepy/functional/tests/test_diversity.py
+++ b/momepy/functional/tests/test_diversity.py
@@ -412,14 +412,9 @@ def test_describe_agg(self):
         df = mm.describe_agg(
             self.df_buildings["area"],
             self.df_buildings["nID"],
-            self.df_streets.index,
-        )
-
-        df_noindex = mm.describe_agg(
-            self.df_buildings["area"],
-            self.df_buildings["nID"],
         )
 
+        result_index = self.df_buildings["nID"].value_counts().sort_index()
         # not testing std, there are different implementations:
         # OO momepy uses ddof=0, functional momepy - ddof=1
         expected_area_sum = {
@@ -435,17 +430,14 @@ def test_describe_agg(self):
             "mean": 746.7028417890866,
         }
         expected_area_count = {
-            "min": 0,
+            "min": 1,
             "max": 18,
-            "count": 35,
-            "mean": 4.114285714285714,
+            "count": 22,
+            "mean": 6.545454545454546,
         }
-        assert_result(df["count"], expected_area_count, self.df_streets)
-        assert_result(df["sum"], expected_area_sum, self.df_streets)
-        assert_result(df["mean"], expected_area_mean, self.df_streets)
-
-        assert df_noindex.shape[0] == 22
-        assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False)
+        assert_result(df["count"], expected_area_count, result_index, check_names=False)
+        assert_result(df["sum"], expected_area_sum, result_index, check_names=False)
+        assert_result(df["mean"], expected_area_mean, result_index, check_names=False)
 
         filtered_counts = mm.describe_agg(
             self.df_buildings["area"],
@@ -459,12 +451,16 @@ def test_describe_agg(self):
             "count": 22,
             "mean": 4.727272,
         }
-        assert_result(filtered_counts, expected_filtered_area_count, df_noindex)
+        assert_result(
+            filtered_counts,
+            expected_filtered_area_count,
+            result_index,
+            check_names=False,
+        )
 
         df = mm.describe_agg(
             self.df_buildings["fl_area"].values,
             self.df_buildings["nID"],
-            self.df_streets.index,
         )
 
         expected_fl_area_sum = {
@@ -479,15 +475,10 @@ def test_describe_agg(self):
             "count": 22,
             "mean": 3995.8307750062318,
         }
-        expected_fl_area_count = {
-            "min": 0,
-            "max": 18,
-            "count": 35,
-            "mean": 4.114285714285714,
-        }
-        assert_result(df["count"], expected_fl_area_count, self.df_streets)
-        assert_result(df["sum"], expected_fl_area_sum, self.df_streets)
-        assert_result(df["mean"], expected_fl_area_mean, self.df_streets)
+
+        assert_result(df["count"], expected_area_count, result_index)
+        assert_result(df["sum"], expected_fl_area_sum, result_index)
+        assert_result(df["mean"], expected_fl_area_mean, result_index)
 
     @pytest.mark.skipif(
         not PD_210, reason="aggregation is different in previous pandas versions"
@@ -496,7 +487,6 @@ def test_describe_cols(self):
         df = mm.describe_agg(
             self.df_buildings["area"],
             self.df_buildings["nID"],
-            self.df_streets.index,
             statistics=["min", "max"],
         )
         assert list(df.columns) == ["min", "max"]
@@ -538,13 +528,12 @@ def test_describe_reached_agg(self):
     )
     def test_describe_reached_input_equality(self):
         island_result_df = mm.describe_agg(
-            self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
+            self.df_buildings["area"], self.df_buildings["nID"]
         )
 
         island_result_ndarray = mm.describe_agg(
             self.df_buildings["area"].values,
             self.df_buildings["nID"].values,
-            self.df_streets.index,
         )
 
         assert np.allclose(
@@ -574,11 +563,10 @@ def test_na_results(self):
         pandas_agg_vals = mm.describe_agg(
             nan_areas,
             self.df_buildings["nID"],
-            self.df_streets.index,
         )
 
         numba_agg_vals = mm.describe_agg(
-            nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100)
+            nan_areas, self.df_buildings["nID"], q=(0, 100)
         )
 
         assert_frame_equal(pandas_agg_vals, numba_agg_vals)
@@ -849,24 +837,25 @@ def _distance_decay_weights(group):
         not PD_210, reason="aggregation is different in previous pandas versions"
     )
     def test_describe_reached_equality(self):
-        new_df = mm.describe_agg(
-            self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
-        )
+        new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"])
 
         new_count = new_df["count"]
         old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series
+        old_count = old_count[old_count > 0]
         assert_series_equal(new_count, old_count, check_names=False, check_dtype=False)
 
         new_area = new_df["sum"]
         old_area = mm.Reached(
             self.df_streets, self.df_buildings, "nID", "nID", mode="sum"
         ).series
+        old_area = old_area[old_area.notna()]
         assert_series_equal(new_area, old_area, check_names=False, check_dtype=False)
 
         new_area_mean = new_df["mean"]
         old_area_mean = mm.Reached(
             self.df_streets, self.df_buildings, "nID", "nID", mode="mean"
         ).series
+        old_area_mean = old_area_mean[old_area_mean.notna()]
         assert_series_equal(
             new_area_mean, old_area_mean, check_names=False, check_dtype=False
         )