pysal · martinfleis · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/momepy/functional/_diversity.py b/momepy/functional/_diversity.py
@@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)):
 def describe_agg(
     y: NDArray[np.float64] | Series,
     aggregation_key: NDArray[np.float64] | Series,
-    result_index: pd.Index | None = None,
     q: tuple[float, float] | list[float] | None = None,
     statistics: list[str] | None = None,
 ) -> DataFrame:
@@ -96,8 +95,6 @@ def describe_agg(
 
     Notes
     -----
-    The index of ``y`` must match the index along which the ``graph`` is
-    built.
 
     The numba package is used extensively in this function to accelerate the computation
     of statistics. Without numba, these computations may become slow on large data.
@@ -109,10 +106,6 @@ def describe_agg(
     aggregation_key : Series | numpy.array
         The unique ID that specifies the aggregation
         of ``y`` objects to groups.
-    result_index : pd.Index (default None)
-        An index that specifies how to order the results.
-        Use to align the results from the grouping to an external index.
-        If ``None`` the index from the computations is used.
     q : tuple[float, float] | None, optional
         Tuple of percentages for the percentiles to compute. Values must be between 0
         and 100 inclusive. When set, values below and above the percentiles will be
@@ -188,21 +181,11 @@ def describe_agg(
 
     stats = _compute_stats(grouper, to_compute=statistics)
 
-    if result_index is None:
-        result_index = stats.index
-
-    # post processing to have the same behaviour as describe_reached_agg
-    result = pd.DataFrame(
-        np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index
-    )
-    result.loc[stats.index.values] = stats.values
-    result.columns = stats.columns
     # fill only counts with zeros, other stats are NA
-    if "count" in result.columns:
-        result.loc[:, "count"] = result.loc[:, "count"].fillna(0)
-    result.index.names = result_index.names
+    if "count" in stats.columns:
+        stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0)
 
-    return result
+    return stats
 
 
 def describe_reached_agg(

diff --git a/momepy/functional/tests/test_diversity.py b/momepy/functional/tests/test_diversity.py
@@ -412,14 +412,9 @@ def test_describe_agg(self):
         df = mm.describe_agg(
             self.df_buildings["area"],
             self.df_buildings["nID"],
-            self.df_streets.index,
-        )
-
-        df_noindex = mm.describe_agg(
-            self.df_buildings["area"],
-            self.df_buildings["nID"],
         )
 
+        result_index = self.df_buildings["nID"].value_counts().sort_index()
         # not testing std, there are different implementations:
         # OO momepy uses ddof=0, functional momepy - ddof=1
         expected_area_sum = {
@@ -435,17 +430,14 @@ def test_describe_agg(self):
             "mean": 746.7028417890866,
         }
         expected_area_count = {
-            "min": 0,
+            "min": 1,
             "max": 18,
-            "count": 35,
-            "mean": 4.114285714285714,
+            "count": 22,
+            "mean": 6.545454545454546,
         }
-        assert_result(df["count"], expected_area_count, self.df_streets)
-        assert_result(df["sum"], expected_area_sum, self.df_streets)
-        assert_result(df["mean"], expected_area_mean, self.df_streets)
-
-        assert df_noindex.shape[0] == 22
-        assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False)
+        assert_result(df["count"], expected_area_count, result_index, check_names=False)
+        assert_result(df["sum"], expected_area_sum, result_index, check_names=False)
+        assert_result(df["mean"], expected_area_mean, result_index, check_names=False)
 
         filtered_counts = mm.describe_agg(
             self.df_buildings["area"],
@@ -459,12 +451,16 @@ def test_describe_agg(self):
             "count": 22,
             "mean": 4.727272,
         }
-        assert_result(filtered_counts, expected_filtered_area_count, df_noindex)
+        assert_result(
+            filtered_counts,
+            expected_filtered_area_count,
+            result_index,
+            check_names=False,
+        )
 
         df = mm.describe_agg(
             self.df_buildings["fl_area"].values,
             self.df_buildings["nID"],
-            self.df_streets.index,
         )
 
         expected_fl_area_sum = {
@@ -479,15 +475,10 @@ def test_describe_agg(self):
             "count": 22,
             "mean": 3995.8307750062318,
         }
-        expected_fl_area_count = {
-            "min": 0,
-            "max": 18,
-            "count": 35,
-            "mean": 4.114285714285714,
-        }
-        assert_result(df["count"], expected_fl_area_count, self.df_streets)
-        assert_result(df["sum"], expected_fl_area_sum, self.df_streets)
-        assert_result(df["mean"], expected_fl_area_mean, self.df_streets)
+
+        assert_result(df["count"], expected_area_count, result_index)
+        assert_result(df["sum"], expected_fl_area_sum, result_index)
+        assert_result(df["mean"], expected_fl_area_mean, result_index)
 
     @pytest.mark.skipif(
         not PD_210, reason="aggregation is different in previous pandas versions"
@@ -496,7 +487,6 @@ def test_describe_cols(self):
         df = mm.describe_agg(
             self.df_buildings["area"],
             self.df_buildings["nID"],
-            self.df_streets.index,
             statistics=["min", "max"],
         )
         assert list(df.columns) == ["min", "max"]
@@ -538,13 +528,12 @@ def test_describe_reached_agg(self):
     )
     def test_describe_reached_input_equality(self):
         island_result_df = mm.describe_agg(
-            self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
+            self.df_buildings["area"], self.df_buildings["nID"]
         )
 
         island_result_ndarray = mm.describe_agg(
             self.df_buildings["area"].values,
             self.df_buildings["nID"].values,
-            self.df_streets.index,
         )
 
         assert np.allclose(
@@ -574,11 +563,10 @@ def test_na_results(self):
         pandas_agg_vals = mm.describe_agg(
             nan_areas,
             self.df_buildings["nID"],
-            self.df_streets.index,
         )
 
         numba_agg_vals = mm.describe_agg(
-            nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100)
+            nan_areas, self.df_buildings["nID"], q=(0, 100)
         )
 
         assert_frame_equal(pandas_agg_vals, numba_agg_vals)
@@ -849,24 +837,25 @@ def _distance_decay_weights(group):
         not PD_210, reason="aggregation is different in previous pandas versions"
     )
     def test_describe_reached_equality(self):
-        new_df = mm.describe_agg(
-            self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
-        )
+        new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"])
 
         new_count = new_df["count"]
         old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series
+        old_count = old_count[old_count > 0]
         assert_series_equal(new_count, old_count, check_names=False, check_dtype=False)
 
         new_area = new_df["sum"]
         old_area = mm.Reached(
             self.df_streets, self.df_buildings, "nID", "nID", mode="sum"
         ).series
+        old_area = old_area[old_area.notna()]
         assert_series_equal(new_area, old_area, check_names=False, check_dtype=False)
 
         new_area_mean = new_df["mean"]
         old_area_mean = mm.Reached(
             self.df_streets, self.df_buildings, "nID", "nID", mode="mean"
         ).series
+        old_area_mean = old_area_mean[old_area_mean.notna()]
         assert_series_equal(
             new_area_mean, old_area_mean, check_names=False, check_dtype=False
         )

diff --git a/momepy/functional/tests/test_intensity.py b/momepy/functional/tests/test_intensity.py
@@ -85,10 +85,8 @@ def test_node_density(self):
         not PD_210, reason="aggregation is different in previous pandas versions"
     )
     def test_area_ratio(self):
-        ## change to describe_agg when merged
-
         def area_ratio(overlay, covering, agg_key):
-            res = mm.describe_agg(covering, agg_key, overlay.index)
+            res = mm.describe_agg(covering, agg_key)
             return res["sum"] / overlay
 
         car_block = area_ratio(
@@ -103,7 +101,9 @@ def area_ratio(overlay, covering, agg_key):
             "count": 8,
         }
 
-        assert_result(car_block, car_block_expected, self.blocks)
+        assert_result(
+            car_block, car_block_expected, self.blocks, exact=False, check_names=False
+        )
 
         car = area_ratio(
             self.df_tessellation.geometry.area,
@@ -122,8 +122,16 @@ def area_ratio(overlay, covering, agg_key):
             "min": 0.029097983413141276,
             "count": 144,
         }
-        assert_result(car, car_expected, self.df_tessellation)
-        assert_result(car2, car_expected, self.df_tessellation.set_index("uID"))
+        assert_result(
+            car, car_expected, self.df_tessellation, exact=False, check_names=False
+        )
+        assert_result(
+            car2,
+            car_expected,
+            self.df_tessellation.set_index("uID"),
+            exact=False,
+            check_names=False,
+        )
 
         car_sel = area_ratio(
             self.df_tessellation.iloc[10:20]["area"],
@@ -136,7 +144,13 @@ def area_ratio(overlay, covering, agg_key):
             "min": 0.22057633949526625,
             "count": 10,
         }
-        assert_result(car_sel, car_sel_expected, self.df_tessellation.iloc[10:20])
+        assert_result(
+            car_sel,
+            car_sel_expected,
+            self.df_tessellation.iloc[10:20],
+            exact=False,
+            check_names=False,
+        )
 
         far = area_ratio(
             self.df_tessellation.geometry.area,
@@ -149,7 +163,9 @@ def area_ratio(overlay, covering, agg_key):
             "min": 0.26188185071827147,
             "count": 144,
         }
-        assert_result(far, far_expected, self.df_tessellation)
+        assert_result(
+            far, far_expected, self.df_tessellation, exact=False, check_names=False
+        )
 
 
 class TestIntensityEquality:
@@ -189,7 +205,7 @@ def test_courtyards(self):
     )
     def test_area_ratio(self):
         def area_ratio(overlay, covering, agg_key):
-            res = mm.describe_agg(covering, agg_key, overlay.index)
+            res = mm.describe_agg(covering, agg_key)
             return res["sum"] / overlay
 
         self.blocks["area"] = self.blocks.geometry.area
@@ -202,7 +218,11 @@ def area_ratio(overlay, covering, agg_key):
             self.blocks, self.df_buildings, "area", "area", "bID"
         ).series
         assert_series_equal(
-            car_block_new, car_block_old, check_dtype=False, check_names=False
+            car_block_new,
+            car_block_old,
+            check_dtype=False,
+            check_names=False,
+            check_index_type=False,
         )
 
         car_new = area_ratio(
@@ -218,7 +238,13 @@ def area_ratio(overlay, covering, agg_key):
         car_old = mm.AreaRatio(
             self.df_tessellation, self.df_buildings, "area", "area", "uID"
         ).series
-        assert_series_equal(car_new, car_old, check_dtype=False, check_names=False)
+        assert_series_equal(
+            car_new,
+            car_old,
+            check_dtype=False,
+            check_names=False,
+            check_index_type=False,
+        )
         assert_series_equal(
             car_old,
             car2_new.reset_index(drop=True),
@@ -236,7 +262,13 @@ def area_ratio(overlay, covering, agg_key):
             self.df_tessellation.iloc[10:20]["uID"] - 1,
         )
 
-        assert_series_equal(car_sel_new, car_sel, check_dtype=False, check_names=False)
+        assert_series_equal(
+            car_sel_new,
+            car_sel,
+            check_dtype=False,
+            check_index_type=False,
+            check_names=False,
+        )
 
         far_new = area_ratio(
             self.df_tessellation.geometry.area,
@@ -252,7 +284,13 @@ def area_ratio(overlay, covering, agg_key):
             "uID",
         ).series
 
-        assert_series_equal(far_new, far_old, check_dtype=False, check_names=False)
+        assert_series_equal(
+            far_new,
+            far_old,
+            check_index_type=False,
+            check_dtype=False,
+            check_names=False,
+        )
 
     def test_density(self):
         sw = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")