Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: remove result_index attribute from describe_agg #626

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 3 additions & 20 deletions momepy/functional/_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def _percentile_limited_group_grouper(y, group_index, q=(25, 75)):
def describe_agg(
y: NDArray[np.float64] | Series,
aggregation_key: NDArray[np.float64] | Series,
result_index: pd.Index | None = None,
q: tuple[float, float] | list[float] | None = None,
statistics: list[str] | None = None,
) -> DataFrame:
Expand All @@ -96,8 +95,6 @@ def describe_agg(

Notes
-----
The index of ``y`` must match the index along which the ``graph`` is
built.

The numba package is used extensively in this function to accelerate the computation
of statistics. Without numba, these computations may become slow on large data.
Expand All @@ -109,10 +106,6 @@ def describe_agg(
aggregation_key : Series | numpy.array
The unique ID that specifies the aggregation
of ``y`` objects to groups.
result_index : pd.Index (default None)
An index that specifies how to order the results.
Use to align the results from the grouping to an external index.
If ``None`` the index from the computations is used.
q : tuple[float, float] | None, optional
Tuple of percentages for the percentiles to compute. Values must be between 0
and 100 inclusive. When set, values below and above the percentiles will be
Expand Down Expand Up @@ -188,21 +181,11 @@ def describe_agg(

stats = _compute_stats(grouper, to_compute=statistics)

if result_index is None:
result_index = stats.index

# post processing to have the same behaviour as describe_reached_agg
result = pd.DataFrame(
np.full((result_index.shape[0], stats.shape[1]), np.nan), index=result_index
)
result.loc[stats.index.values] = stats.values
result.columns = stats.columns
# fill only counts with zeros, other stats are NA
if "count" in result.columns:
result.loc[:, "count"] = result.loc[:, "count"].fillna(0)
result.index.names = result_index.names
if "count" in stats.columns:
stats.loc[:, "count"] = stats.loc[:, "count"].fillna(0)

return result
return stats


def describe_reached_agg(
Expand Down
57 changes: 23 additions & 34 deletions momepy/functional/tests/test_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,14 +412,9 @@ def test_describe_agg(self):
df = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
self.df_streets.index,
)

df_noindex = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
)

result_index = self.df_buildings["nID"].value_counts().sort_index()
# not testing std, there are different implementations:
# OO momepy uses ddof=0, functional momepy - ddof=1
expected_area_sum = {
Expand All @@ -435,17 +430,14 @@ def test_describe_agg(self):
"mean": 746.7028417890866,
}
expected_area_count = {
"min": 0,
"min": 1,
"max": 18,
"count": 35,
"mean": 4.114285714285714,
"count": 22,
"mean": 6.545454545454546,
}
assert_result(df["count"], expected_area_count, self.df_streets)
assert_result(df["sum"], expected_area_sum, self.df_streets)
assert_result(df["mean"], expected_area_mean, self.df_streets)

assert df_noindex.shape[0] == 22
assert_frame_equal(df_noindex, df[df["sum"].notna()], check_names=False)
assert_result(df["count"], expected_area_count, result_index, check_names=False)
assert_result(df["sum"], expected_area_sum, result_index, check_names=False)
assert_result(df["mean"], expected_area_mean, result_index, check_names=False)

filtered_counts = mm.describe_agg(
self.df_buildings["area"],
Expand All @@ -459,12 +451,16 @@ def test_describe_agg(self):
"count": 22,
"mean": 4.727272,
}
assert_result(filtered_counts, expected_filtered_area_count, df_noindex)
assert_result(
filtered_counts,
expected_filtered_area_count,
result_index,
check_names=False,
)

df = mm.describe_agg(
self.df_buildings["fl_area"].values,
self.df_buildings["nID"],
self.df_streets.index,
)

expected_fl_area_sum = {
Expand All @@ -479,15 +475,10 @@ def test_describe_agg(self):
"count": 22,
"mean": 3995.8307750062318,
}
expected_fl_area_count = {
"min": 0,
"max": 18,
"count": 35,
"mean": 4.114285714285714,
}
assert_result(df["count"], expected_fl_area_count, self.df_streets)
assert_result(df["sum"], expected_fl_area_sum, self.df_streets)
assert_result(df["mean"], expected_fl_area_mean, self.df_streets)

assert_result(df["count"], expected_area_count, result_index)
assert_result(df["sum"], expected_fl_area_sum, result_index)
assert_result(df["mean"], expected_fl_area_mean, result_index)

@pytest.mark.skipif(
not PD_210, reason="aggregation is different in previous pandas versions"
Expand All @@ -496,7 +487,6 @@ def test_describe_cols(self):
df = mm.describe_agg(
self.df_buildings["area"],
self.df_buildings["nID"],
self.df_streets.index,
statistics=["min", "max"],
)
assert list(df.columns) == ["min", "max"]
Expand Down Expand Up @@ -538,13 +528,12 @@ def test_describe_reached_agg(self):
)
def test_describe_reached_input_equality(self):
island_result_df = mm.describe_agg(
self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
self.df_buildings["area"], self.df_buildings["nID"]
)

island_result_ndarray = mm.describe_agg(
self.df_buildings["area"].values,
self.df_buildings["nID"].values,
self.df_streets.index,
)

assert np.allclose(
Expand Down Expand Up @@ -574,11 +563,10 @@ def test_na_results(self):
pandas_agg_vals = mm.describe_agg(
nan_areas,
self.df_buildings["nID"],
self.df_streets.index,
)

numba_agg_vals = mm.describe_agg(
nan_areas, self.df_buildings["nID"], self.df_streets.index, q=(0, 100)
nan_areas, self.df_buildings["nID"], q=(0, 100)
)

assert_frame_equal(pandas_agg_vals, numba_agg_vals)
Expand Down Expand Up @@ -849,24 +837,25 @@ def _distance_decay_weights(group):
not PD_210, reason="aggregation is different in previous pandas versions"
)
def test_describe_reached_equality(self):
new_df = mm.describe_agg(
self.df_buildings["area"], self.df_buildings["nID"], self.df_streets.index
)
new_df = mm.describe_agg(self.df_buildings["area"], self.df_buildings["nID"])

new_count = new_df["count"]
old_count = mm.Reached(self.df_streets, self.df_buildings, "nID", "nID").series
old_count = old_count[old_count > 0]
assert_series_equal(new_count, old_count, check_names=False, check_dtype=False)

new_area = new_df["sum"]
old_area = mm.Reached(
self.df_streets, self.df_buildings, "nID", "nID", mode="sum"
).series
old_area = old_area[old_area.notna()]
assert_series_equal(new_area, old_area, check_names=False, check_dtype=False)

new_area_mean = new_df["mean"]
old_area_mean = mm.Reached(
self.df_streets, self.df_buildings, "nID", "nID", mode="mean"
).series
old_area_mean = old_area_mean[old_area_mean.notna()]
assert_series_equal(
new_area_mean, old_area_mean, check_names=False, check_dtype=False
)
Expand Down
64 changes: 51 additions & 13 deletions momepy/functional/tests/test_intensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,8 @@ def test_node_density(self):
not PD_210, reason="aggregation is different in previous pandas versions"
)
def test_area_ratio(self):
## change to describe_agg when merged

def area_ratio(overlay, covering, agg_key):
res = mm.describe_agg(covering, agg_key, overlay.index)
res = mm.describe_agg(covering, agg_key)
return res["sum"] / overlay

car_block = area_ratio(
Expand All @@ -103,7 +101,9 @@ def area_ratio(overlay, covering, agg_key):
"count": 8,
}

assert_result(car_block, car_block_expected, self.blocks)
assert_result(
car_block, car_block_expected, self.blocks, exact=False, check_names=False
)

car = area_ratio(
self.df_tessellation.geometry.area,
Expand All @@ -122,8 +122,16 @@ def area_ratio(overlay, covering, agg_key):
"min": 0.029097983413141276,
"count": 144,
}
assert_result(car, car_expected, self.df_tessellation)
assert_result(car2, car_expected, self.df_tessellation.set_index("uID"))
assert_result(
car, car_expected, self.df_tessellation, exact=False, check_names=False
)
assert_result(
car2,
car_expected,
self.df_tessellation.set_index("uID"),
exact=False,
check_names=False,
)

car_sel = area_ratio(
self.df_tessellation.iloc[10:20]["area"],
Expand All @@ -136,7 +144,13 @@ def area_ratio(overlay, covering, agg_key):
"min": 0.22057633949526625,
"count": 10,
}
assert_result(car_sel, car_sel_expected, self.df_tessellation.iloc[10:20])
assert_result(
car_sel,
car_sel_expected,
self.df_tessellation.iloc[10:20],
exact=False,
check_names=False,
)

far = area_ratio(
self.df_tessellation.geometry.area,
Expand All @@ -149,7 +163,9 @@ def area_ratio(overlay, covering, agg_key):
"min": 0.26188185071827147,
"count": 144,
}
assert_result(far, far_expected, self.df_tessellation)
assert_result(
far, far_expected, self.df_tessellation, exact=False, check_names=False
)


class TestIntensityEquality:
Expand Down Expand Up @@ -189,7 +205,7 @@ def test_courtyards(self):
)
def test_area_ratio(self):
def area_ratio(overlay, covering, agg_key):
res = mm.describe_agg(covering, agg_key, overlay.index)
res = mm.describe_agg(covering, agg_key)
return res["sum"] / overlay

self.blocks["area"] = self.blocks.geometry.area
Expand All @@ -202,7 +218,11 @@ def area_ratio(overlay, covering, agg_key):
self.blocks, self.df_buildings, "area", "area", "bID"
).series
assert_series_equal(
car_block_new, car_block_old, check_dtype=False, check_names=False
car_block_new,
car_block_old,
check_dtype=False,
check_names=False,
check_index_type=False,
)

car_new = area_ratio(
Expand All @@ -218,7 +238,13 @@ def area_ratio(overlay, covering, agg_key):
car_old = mm.AreaRatio(
self.df_tessellation, self.df_buildings, "area", "area", "uID"
).series
assert_series_equal(car_new, car_old, check_dtype=False, check_names=False)
assert_series_equal(
car_new,
car_old,
check_dtype=False,
check_names=False,
check_index_type=False,
)
assert_series_equal(
car_old,
car2_new.reset_index(drop=True),
Expand All @@ -236,7 +262,13 @@ def area_ratio(overlay, covering, agg_key):
self.df_tessellation.iloc[10:20]["uID"] - 1,
)

assert_series_equal(car_sel_new, car_sel, check_dtype=False, check_names=False)
assert_series_equal(
car_sel_new,
car_sel,
check_dtype=False,
check_index_type=False,
check_names=False,
)

far_new = area_ratio(
self.df_tessellation.geometry.area,
Expand All @@ -252,7 +284,13 @@ def area_ratio(overlay, covering, agg_key):
"uID",
).series

assert_series_equal(far_new, far_old, check_dtype=False, check_names=False)
assert_series_equal(
far_new,
far_old,
check_index_type=False,
check_dtype=False,
check_names=False,
)

def test_density(self):
sw = mm.sw_high(k=3, gdf=self.df_tessellation, ids="uID")
Expand Down
Loading