refactor it all

pysal · Mar 8, 2024 · 46a967e · 46a967e
1 parent 5de36e8
commit 46a967e
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 62 deletions.
diff --git a/momepy/functional/_distribution.py b/momepy/functional/_distribution.py
@@ -1,14 +1,12 @@
-import warnings
-
 import geopandas as gpd
-import networkx as nx
 import numpy as np
 import pandas as pd
 import shapely
 from geopandas import GeoDataFrame, GeoSeries
 from libpysal.graph import Graph
 from packaging.version import Version
 from pandas import Series
+from scipy import sparse
 
 __all__ = [
     "orientation",
@@ -166,75 +164,76 @@ def neighbor_distance(geometry: GeoDataFrame | GeoSeries, graph: Graph) -> Serie
 
 
 def mean_interbuilding_distance(
-    geometry: GeoDataFrame | GeoSeries, graph: Graph, order: int = 3
+    geometry: GeoDataFrame | GeoSeries,
+    adjacency_graph: Graph,
+    neighborhood_graph: Graph,
 ) -> Series:
     """Calculate the mean distance between adjacent geometries within a set neighborhood
 
-    For each building, this function defines a neighborhood (ego graph) based on the
-    neighbors within a defined ``order`` of contigity along the graph. It then
-    calculates the mean distance between adjacent buildings within this neighborhood.
-    Typically, ``graph`` represents contiguity derived from tessellation cells or plots
-    linked to buildings.
+    For each building, this function takes a neighborhood based on the neighbors within
+    a ``neighborhood_graph`` and calculates the mean distance between adjacent buildings
+    within this neighborhood where adjacency is captured by ``adjacency_graph``.
 
     Notes
     -----
-    The index of ``geometry`` must match the index along which the ``graph`` is
+    The index of ``geometry`` must match the index along which both of the graphs are
     built.
 
     Parameters
     ----------
     geometry : GeoDataFrame | GeoSeries
         A GeoDataFrame or GeoSeries containing geometries to analyse.
-    graph : libpysal.graph.Graph
-        Graph representing spatial relationships between elements.
-    order : int
-        The order of contiguity defining the extent of the neighborhood.
+    adjacency_graph : libpysal.graph.Graph
+        Graph representing the adjacency of geometries. Typically, this is a contiguity
+        graph derived from tessellation cells linked to buildings.
+    neighborhood_graph : libpysal.graph.Graph
+        Graph representing the extent around each geometry within which to calculate
+        the mean interbuilding distance. This can be a distance based graph, KNN graph,
+        higher order contiguity, etc.
 
     Returns
     -------
     Series
     """
     distance = pd.Series(
         shapely.distance(
-            geometry.geometry.loc[graph._adjacency.index.get_level_values(0)].values,
-            geometry.geometry.loc[graph._adjacency.index.get_level_values(1)].values,
+            geometry.geometry.loc[
+                adjacency_graph._adjacency.index.get_level_values(0)
+            ].values,
+            geometry.geometry.loc[
+                adjacency_graph._adjacency.index.get_level_values(1)
+            ].values,
         ),
-        index=graph._adjacency.index,
+        index=adjacency_graph._adjacency.index,
         name="distance",
     )
 
-    nx_graph = nx.from_pandas_edgelist(
-        distance.reset_index(), source="focal", target="neighbor", edge_attr="distance"
+    distance_matrix = (
+        distance.astype("Sparse[float]").sparse.to_coo(sort_labels=True)[0].tocsr()
     )
+    neighborhood_matrix = sparse.coo_matrix(neighborhood_graph.sparse).tocsr()
 
-    results_list = []
-    for uid in geometry.index:
-        try:
-            sub = nx.ego_graph(nx_graph, uid, radius=order)
-            results_list.append(
-                np.mean(
-                    np.array([data["distance"] for _, _, data in sub.edges(data=True)])
-                )
-            )
-        # this may happen if the graph comes from tessellation thad does not fully match
-        except nx.NodeNotFound:
-            warnings.warn(
-                f"Geometry with the index {uid} not found in the graph.",
-                UserWarning,
-                stacklevel=2,
-            )
-            results_list.append(np.nan)
+    mean_distances = np.zeros(distance_matrix.shape[0], dtype=float)
+
+    for i in range(distance_matrix.shape[0]):
+        neighborhood_indices = np.append(neighborhood_matrix[i].indices, i)
+
+        if len(neighborhood_indices) == 0:
+            mean_distances[i] = np.nan
+
+        sub_matrix = distance_matrix[neighborhood_indices][:, neighborhood_indices]
+        mean_distances[i] = sub_matrix.sum() / sub_matrix.nnz
 
     return Series(
-        results_list, index=geometry.index, name="mean_interbuilding_distance"
+        mean_distances, index=geometry.index, name="mean_interbuilding_distance"
     )
-    # 57.4 s ± 1.57 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
-    # 1min 2s ± 3.78 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
+    # 35s new
+    # 57s old
 
 
 def building_adjacency(
-    neighborhood_graph: Graph,
     contiguity_graph: Graph,
+    neighborhood_graph: Graph,
 ) -> Series:
     """Calculate the level of building adjacency.
 
@@ -249,25 +248,22 @@ def building_adjacency(
     -----
     Both graphs must be built on the same index.
 
-    If you want to consider the geometry
-    part of its own neighborhood and include it in calculation, ensure you assign
-    self-weights to the ``contiguity_graph`` using
-    ``contiguity_graph.assign_self_weight()``.
-
     Parameters
     ----------
-    neighborhood_graph : Graph
+    contiguity_graph : libpysal.graph.Graph
+        Graph representing contiguity between geometries, typically a rook contiguity
+        graph derived from buildings.
+    neighborhood_graph : libpysal.graph.Graph
         Graph representing the extent around each geometry within which to calculate
         the level of building adjacency. This can be a distance based graph, KNN graph,
         higher order contiguity, etc.
-    contiguity_graph : Graph
-        Graph representing contiguity between geometries.
 
     Returns
     -------
     Series
     """
     components = contiguity_graph.component_labels
+    neighborhood_graph = neighborhood_graph.assign_self_weight()
 
     grouper = components.loc[
         neighborhood_graph._adjacency.index.get_level_values(1)
@@ -278,4 +274,6 @@ def building_adjacency(
     return result
 
     # old: 251 ms ± 14.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-    # new: 57.3 ms ± 1.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+    # new: 422 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+    #  of which 330 ms is assign_self_weight which will be used in other functins so
+    # I'm wondering if it is worth adding a keyword `self_weighted=True` to skip this.
diff --git a/momepy/functional/tests/test_distribution.py b/momepy/functional/tests/test_distribution.py
@@ -14,6 +14,7 @@ def setup_method(self):
         self.df_streets = gpd.read_file(test_file_path, layer="streets")
         self.graph = Graph.build_knn(self.df_buildings.centroid, k=5)
         self.contiguity = Graph.build_contiguity(self.df_buildings)
+        self.neighborhood_graph = self.graph.higher_order(3, lower_order=True)
 
     def test_orientation(self):
         expected = {
@@ -67,22 +68,24 @@ def test_neighbor_distance(self):
 
     def test_mean_interbuilding_distance(self):
         expected = {
-            "mean": 16.46438739026651,
-            "sum": 2370.871784198377,
-            "min": 12.279734781239485,
-            "max": 25.45874022563638,
+            "mean": 13.018190603684694,
+            "sum": 1874.6194469305958,
+            "min": 6.623582625492466,
+            "max": 22.513464171665948,
         }
-        r = mm.mean_interbuilding_distance(self.df_buildings, self.graph)
+        r = mm.mean_interbuilding_distance(
+            self.df_buildings, self.graph, self.neighborhood_graph
+        )
         assert_result(r, expected, self.df_buildings)
 
     def test_building_adjacency(self):
         expected = {
-            "mean": 0.4402777777777778,
-            "sum": 63.4,
-            "min": 0.2,
-            "max": 1,
+            "mean": 0.3784722222222222,
+            "sum": 54.5,
+            "min": 0.16666666666666666,
+            "max": 0.8333333333333334,
         }
-        r = mm.building_adjacency(self.graph, self.contiguity.assign_self_weight())
+        r = mm.building_adjacency(self.contiguity, self.graph)
         assert_result(r, expected, self.df_buildings, exact=False)
 
 
@@ -92,9 +95,16 @@ def setup_method(self):
         self.df_buildings = gpd.read_file(test_file_path, layer="buildings").set_index(
             "uID"
         )
+        self.df_tessellation = gpd.read_file(
+            test_file_path, layer="tessellation"
+        ).set_index("uID")
         self.graph = Graph.build_knn(self.df_buildings.centroid, k=5)
         self.df_buildings["orientation"] = mm.orientation(self.df_buildings)
         self.contiguity = Graph.build_contiguity(self.df_buildings)
+        self.tessellation_contiguity = Graph.build_contiguity(self.df_tessellation)
+        self.neighborhood_graph = self.tessellation_contiguity.higher_order(
+            3, lower_order=True
+        )
 
     def test_alignment(self):
         new = mm.alignment(self.df_buildings["orientation"], self.graph)
@@ -115,14 +125,19 @@ def test_neighbor_distance(self):
         assert_series_equal(new, old, check_names=False, check_index=False)
 
     def test_mean_interbuilding_distance(self):
-        new = mm.mean_interbuilding_distance(self.df_buildings, self.graph)
+        new = mm.mean_interbuilding_distance(
+            self.df_buildings, self.tessellation_contiguity, self.neighborhood_graph
+        )
         old = mm.MeanInterbuildingDistance(
-            self.df_buildings.reset_index(), self.graph.to_W(), "uID", verbose=False
+            self.df_buildings.reset_index(),
+            self.tessellation_contiguity.to_W(),
+            "uID",
+            verbose=False,
         ).series
         assert_series_equal(new, old, check_names=False, check_index=False)
 
     def test_building_adjacency(self):
-        new = mm.building_adjacency(self.graph.assign_self_weight(), self.contiguity)
+        new = mm.building_adjacency(self.contiguity, self.graph)
         old = mm.BuildingAdjacency(
             self.df_buildings.reset_index(), self.graph.to_W(), "uID", verbose=False
         ).series