Skip to content

Commit

Permalink
remove result_index
Browse files Browse the repository at this point in the history
  • Loading branch information
u3ks committed Jun 20, 2024
1 parent 9a64a1a commit 8533878
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 54 deletions.
23 changes: 3 additions & 20 deletions momepy/functional/_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)):
def describe_agg(
y: NDArray[np.float64] | Series,
aggregation_key: NDArray[np.float64] | Series,
result_index: pd.Index | None = None,
q: tuple[float, float] | list[float] | None = None,
statistics: list[str] | None = None,
) -> DataFrame:
Expand All @@ -96,8 +95,6 @@ def describe_agg(
Notes
-----
The index of ``y`` must match the index along which the ``graph`` is
built.
The numba package is used extensively in this function to accelerate the computation
of statistics. Without numba, these computations may become slow on large data.
Expand All @@ -109,10 +106,6 @@ def describe_agg(
aggregation_key : Series | numpy.array
The unique ID that specifies the aggregation
of ``y`` objects to groups.
result_index : pd.Index (default None)
An index that specifies how to order the results.
Use to align the results from the grouping to an external index.
If ``None`` the index from the computations is used.
q : tuple[float, float] | None, optional
Tuple of percentages for the percentiles to compute. Values must be between 0
and 100 inclusive. When set, values below and above the percentiles will be
Expand Down Expand Up @@ -188,21 +181,11 @@ def describe_agg(

stats = _compute_stats(grouper, to_compute=statistics)

if result_index is None:
result_index = stats.index

# post processing to have the same behaviour as describe_reached_agg
result = pd.DataFrame(
np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index
)
result.loc[stats.index.values] = stats.values
result.columns = stats.columns
# fill only counts with zeros, other stats are NA
if "count" in result.columns:
result.loc[:, "count"] = result.loc[:, "count"].fillna(0)
result.index.names = result_index.names
if "count" in stats.columns:
stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0)

return result
return stats


def describe_reached_agg(
Expand Down
57 changes: 23 additions & 34 deletions momepy/functional/tests/test_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,14 +412,9 @@ def test_describe_agg(self):
df = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
self.df_streets.index,
)

df_noindex = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
)

result_index = self.df_buildings["nID"].value_counts().sort_index()
# not testing std, there are different implementations:
# OO momepy uses ddof=0, functional momepy - ddof=1
expected_area_sum = {
Expand All @@ -435,17 +430,14 @@ def test_describe_agg(self):
"mean": 746.7028417890866,
}
expected_area_count = {
"min": 0,
"min": 1,
"max": 18,
"count": 35,
"mean": 4.114285714285714,
"count": 22,
"mean": 6.545454545454546,
}
assert_result(df["count"], expected_area_count, self.df_streets)
assert_result(df["sum"], expected_area_sum, self.df_streets)
assert_result(df["mean"], expected_area_mean, self.df_streets)

assert df_noindex.shape[0] == 22
assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False)
assert_result(df["count"], expected_area_count, result_index, check_names=False)
assert_result(df["sum"], expected_area_sum, result_index, check_names=False)
assert_result(df["mean"], expected_area_mean, result_index, check_names=False)

filtered_counts = mm.describe_agg(
self.df_buildings["area"],
Expand All @@ -459,12 +451,16 @@ def test_describe_agg(self):
"count": 22,
"mean": 4.727272,
}
assert_result(filtered_counts, expected_filtered_area_count, df_noindex)
assert_result(
filtered_counts,
expected_filtered_area_count,
result_index,
check_names=False,
)

df = mm.describe_agg(
self.df_buildings["fl_area"].values,
self.df_buildings["nID"],
self.df_streets.index,
)

expected_fl_area_sum = {
Expand All @@ -479,15 +475,10 @@ def test_describe_agg(self):
"count": 22,
"mean": 3995.8307750062318,
}
expected_fl_area_count = {
"min": 0,
"max": 18,
"count": 35,
"mean": 4.114285714285714,
}
assert_result(df["count"], expected_fl_area_count, self.df_streets)
assert_result(df["sum"], expected_fl_area_sum, self.df_streets)
assert_result(df["mean"], expected_fl_area_mean, self.df_streets)

assert_result(df["count"], expected_area_count, result_index)
assert_result(df["sum"], expected_fl_area_sum, result_index)
assert_result(df["mean"], expected_fl_area_mean, result_index)

@pytest.mark.skipif(
not PD_210, reason="aggregation is different in previous pandas versions"
Expand All @@ -496,7 +487,6 @@ def test_describe_cols(self):
df = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
self.df_streets.index,
statistics=["min", "max"],
)
assert list(df.columns) == ["min", "max"]
Expand Down Expand Up @@ -538,13 +528,12 @@ def test_describe_reached_agg(self):
)
def test_describe_reached_input_equality(self):
island_result_df = mm.describe_agg(
self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
self.df_buildings["area"], self.df_buildings["nID"]
)

island_result_ndarray = mm.describe_agg(
self.df_buildings["area"].values,
self.df_buildings["nID"].values,
self.df_streets.index,
)

assert np.allclose(
Expand Down Expand Up @@ -574,11 +563,10 @@ def test_na_results(self):
pandas_agg_vals = mm.describe_agg(
nan_areas,
self.df_buildings["nID"],
self.df_streets.index,
)

numba_agg_vals = mm.describe_agg(
nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100)
nan_areas, self.df_buildings["nID"], q=(0, 100)
)

assert_frame_equal(pandas_agg_vals, numba_agg_vals)
Expand Down Expand Up @@ -849,24 +837,25 @@ def _distance_decay_weights(group):
not PD_210, reason="aggregation is different in previous pandas versions"
)
def test_describe_reached_equality(self):
new_df = mm.describe_agg(
self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
)
new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"])

new_count = new_df["count"]
old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series
old_count = old_count[old_count > 0]
assert_series_equal(new_count, old_count, check_names=False, check_dtype=False)

new_area = new_df["sum"]
old_area = mm.Reached(
self.df_streets, self.df_buildings, "nID", "nID", mode="sum"
).series
old_area = old_area[old_area.notna()]
assert_series_equal(new_area, old_area, check_names=False, check_dtype=False)

new_area_mean = new_df["mean"]
old_area_mean = mm.Reached(
self.df_streets, self.df_buildings, "nID", "nID", mode="mean"
).series
old_area_mean = old_area_mean[old_area_mean.notna()]
assert_series_equal(
new_area_mean, old_area_mean, check_names=False, check_dtype=False
)
Expand Down

0 comments on commit 8533878

Please sign in to comment.