From 853387804708c314ddd1bc074ccb6df6d92af736 Mon Sep 17 00:00:00 2001 From: Krasen Samardzhiev Date: Thu, 20 Jun 2024 17:56:14 +0200 Subject: [PATCH] remove result_index --- momepy/functional/_diversity.py | 23 ++------- momepy/functional/tests/test_diversity.py | 57 +++++++++-------------- 2 files changed, 26 insertions(+), 54 deletions(-) diff --git a/momepy/functional/_diversity.py b/momepy/functional/_diversity.py index 4a0202c0..2ad8bd8d 100644 --- a/momepy/functional/_diversity.py +++ b/momepy/functional/_diversity.py @@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)): def describe_agg( y: NDArray[np.float64] | Series, aggregation_key: NDArray[np.float64] | Series, - result_index: pd.Index | None = None, q: tuple[float, float] | list[float] | None = None, statistics: list[str] | None = None, ) -> DataFrame: @@ -96,8 +95,6 @@ def describe_agg( Notes ----- - The index of ``y`` must match the index along which the ``graph`` is - built. The numba package is used extensively in this function to accelerate the computation of statistics. Without numba, these computations may become slow on large data. @@ -109,10 +106,6 @@ def describe_agg( aggregation_key : Series | numpy.array The unique ID that specifies the aggregation of ``y`` objects to groups. - result_index : pd.Index (default None) - An index that specifies how to order the results. - Use to align the results from the grouping to an external index. - If ``None`` the index from the computations is used. q : tuple[float, float] | None, optional Tuple of percentages for the percentiles to compute. Values must be between 0 and 100 inclusive. When set, values below and above the percentiles will be @@ -188,21 +181,11 @@ def describe_agg( stats = _compute_stats(grouper, to_compute=statistics) - if result_index is None: - result_index = stats.index - - # post processing to have the same behaviour as describe_reached_agg - result = pd.DataFrame( - np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index - ) - result.loc[stats.index.values] = stats.values - result.columns = stats.columns # fill only counts with zeros, other stats are NA - if "count" in result.columns: - result.loc[:, "count"] = result.loc[:, "count"].fillna(0) - result.index.names = result_index.names + if "count" in stats.columns: + stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0) - return result + return stats def describe_reached_agg( diff --git a/momepy/functional/tests/test_diversity.py b/momepy/functional/tests/test_diversity.py index 5e08999b..8d53b939 100644 --- a/momepy/functional/tests/test_diversity.py +++ b/momepy/functional/tests/test_diversity.py @@ -412,14 +412,9 @@ def test_describe_agg(self): df = mm.describe_agg( self.df_buildings["area"], self.df_buildings["nID"], - self.df_streets.index, - ) - - df_noindex = mm.describe_agg( - self.df_buildings["area"], - self.df_buildings["nID"], ) + result_index = self.df_buildings["nID"].value_counts().sort_index() # not testing std, there are different implementations: # OO momepy uses ddof=0, functional momepy - ddof=1 expected_area_sum = { @@ -435,17 +430,14 @@ def test_describe_agg(self): "mean": 746.7028417890866, } expected_area_count = { - "min": 0, + "min": 1, "max": 18, - "count": 35, - "mean": 4.114285714285714, + "count": 22, + "mean": 6.545454545454546, } - assert_result(df["count"], expected_area_count, self.df_streets) - assert_result(df["sum"], expected_area_sum, self.df_streets) - assert_result(df["mean"], expected_area_mean, self.df_streets) - - assert df_noindex.shape[0] == 22 - assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False) + assert_result(df["count"], expected_area_count, result_index, check_names=False) + assert_result(df["sum"], expected_area_sum, result_index, check_names=False) + assert_result(df["mean"], expected_area_mean, result_index, check_names=False) filtered_counts = mm.describe_agg( self.df_buildings["area"], @@ -459,12 +451,16 @@ def test_describe_agg(self): "count": 22, "mean": 4.727272, } - assert_result(filtered_counts, expected_filtered_area_count, df_noindex) + assert_result( + filtered_counts, + expected_filtered_area_count, + result_index, + check_names=False, + ) df = mm.describe_agg( self.df_buildings["fl_area"].values, self.df_buildings["nID"], - self.df_streets.index, ) expected_fl_area_sum = { @@ -479,15 +475,10 @@ def test_describe_agg(self): "count": 22, "mean": 3995.8307750062318, } - expected_fl_area_count = { - "min": 0, - "max": 18, - "count": 35, - "mean": 4.114285714285714, - } - assert_result(df["count"], expected_fl_area_count, self.df_streets) - assert_result(df["sum"], expected_fl_area_sum, self.df_streets) - assert_result(df["mean"], expected_fl_area_mean, self.df_streets) + + assert_result(df["count"], expected_area_count, result_index) + assert_result(df["sum"], expected_fl_area_sum, result_index) + assert_result(df["mean"], expected_fl_area_mean, result_index) @pytest.mark.skipif( not PD_210, reason="aggregation is different in previous pandas versions" @@ -496,7 +487,6 @@ def test_describe_cols(self): df = mm.describe_agg( self.df_buildings["area"], self.df_buildings["nID"], - self.df_streets.index, statistics=["min", "max"], ) assert list(df.columns) == ["min", "max"] @@ -538,13 +528,12 @@ def test_describe_reached_agg(self): ) def test_describe_reached_input_equality(self): island_result_df = mm.describe_agg( - self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index + self.df_buildings["area"], self.df_buildings["nID"] ) island_result_ndarray = mm.describe_agg( self.df_buildings["area"].values, self.df_buildings["nID"].values, - self.df_streets.index, ) assert np.allclose( @@ -574,11 +563,10 @@ def test_na_results(self): pandas_agg_vals = mm.describe_agg( nan_areas, self.df_buildings["nID"], - self.df_streets.index, ) numba_agg_vals = mm.describe_agg( - nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100) + nan_areas, self.df_buildings["nID"], q=(0, 100) ) assert_frame_equal(pandas_agg_vals, numba_agg_vals) @@ -849,24 +837,25 @@ def _distance_decay_weights(group): not PD_210, reason="aggregation is different in previous pandas versions" ) def test_describe_reached_equality(self): - new_df = mm.describe_agg( - self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index - ) + new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"]) new_count = new_df["count"] old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series + old_count = old_count[old_count > 0] assert_series_equal(new_count, old_count, check_names=False, check_dtype=False) new_area = new_df["sum"] old_area = mm.Reached( self.df_streets, self.df_buildings, "nID", "nID", mode="sum" ).series + old_area = old_area[old_area.notna()] assert_series_equal(new_area, old_area, check_names=False, check_dtype=False) new_area_mean = new_df["mean"] old_area_mean = mm.Reached( self.df_streets, self.df_buildings, "nID", "nID", mode="mean" ).series + old_area_mean = old_area_mean[old_area_mean.notna()] assert_series_equal( new_area_mean, old_area_mean, check_names=False, check_dtype=False )