From 58869d0ed7fa8b3b65e65c2762cdf898dd63c939 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Wed, 13 Mar 2024 14:20:50 -0700 Subject: [PATCH 01/13] move functionality out of speed and utils_graph modules into better homes --- docs/source/internals-reference.rst | 24 +- docs/source/user-reference.rst | 18 +- osmnx/__init__.py | 12 +- osmnx/_osm_xml.py | 7 +- osmnx/bearing.py | 2 +- osmnx/distance.py | 6 +- osmnx/elevation.py | 4 +- osmnx/graph.py | 3 +- osmnx/io.py | 8 +- osmnx/plot.py | 10 +- osmnx/projection.py | 10 +- osmnx/routing.py | 291 ++++++++++++- osmnx/simplification.py | 8 +- osmnx/speed.py | 263 ------------ osmnx/stats.py | 4 +- osmnx/truncate.py | 77 +++- osmnx/utils_geo.py | 4 +- osmnx/utils_graph.py | 620 ---------------------------- tests/test_osmnx.py | 26 +- 19 files changed, 424 insertions(+), 973 deletions(-) delete mode 100644 osmnx/speed.py delete mode 100644 osmnx/utils_graph.py diff --git a/docs/source/internals-reference.rst b/docs/source/internals-reference.rst index 61a8f5211..bbea80c90 100644 --- a/docs/source/internals-reference.rst +++ b/docs/source/internals-reference.rst @@ -11,6 +11,14 @@ osmnx.bearing module :private-members: :noindex: +osmnx.convert module +--------------------- + +.. automodule:: osmnx.convert + :members: + :private-members: + :noindex: + osmnx.distance module --------------------- @@ -139,14 +147,6 @@ osmnx.simplification module :private-members: :noindex: -osmnx.speed module ------------------- - -.. automodule:: osmnx.speed - :members: - :private-members: - :noindex: - osmnx.stats module ------------------ @@ -179,14 +179,6 @@ osmnx.utils_geo module :private-members: :noindex: -osmnx.utils_graph module ------------------------- - -.. automodule:: osmnx.utils_graph - :members: - :private-members: - :noindex: - osmnx._version module --------------------- diff --git a/docs/source/user-reference.rst b/docs/source/user-reference.rst index 6d16e212f..fd9d343d7 100644 --- a/docs/source/user-reference.rst +++ b/docs/source/user-reference.rst @@ -13,6 +13,12 @@ osmnx.bearing module .. automodule:: osmnx.bearing :members: +osmnx.convert module +-------------------- + +.. automodule:: osmnx.convert + :members: + osmnx.distance module --------------------- @@ -79,12 +85,6 @@ osmnx.simplification module .. automodule:: osmnx.simplification :members: -osmnx.speed module ------------------- - -.. automodule:: osmnx.speed - :members: - osmnx.stats module ------------------ @@ -108,9 +108,3 @@ osmnx.utils_geo module .. automodule:: osmnx.utils_geo :members: - -osmnx.utils_graph module ------------------------- - -.. automodule:: osmnx.utils_graph - :members: diff --git a/osmnx/__init__.py b/osmnx/__init__.py index ccf41e870..502d0082f 100644 --- a/osmnx/__init__.py +++ b/osmnx/__init__.py @@ -9,6 +9,10 @@ # by exposing these functions directly in the package's namespace. from .bearing import add_edge_bearings as add_edge_bearings from .bearing import orientation_entropy as orientation_entropy +from .convert import get_digraph as get_digraph +from .convert import get_undirected as get_undirected +from .convert import graph_from_gdfs as graph_from_gdfs +from .convert import graph_to_gdfs as graph_to_gdfs from .distance import nearest_edges as nearest_edges from .distance import nearest_nodes as nearest_nodes from .elevation import add_edge_grades as add_edge_grades @@ -40,17 +44,13 @@ from .plot import plot_orientation as plot_orientation from .projection import project_gdf as project_gdf from .projection import project_graph as project_graph +from .routing import add_edge_speeds as add_edge_speeds +from .routing import add_edge_travel_times as add_edge_travel_times from .routing import k_shortest_paths as k_shortest_paths from .routing import shortest_path as shortest_path from .simplification import consolidate_intersections as consolidate_intersections from .simplification import simplify_graph as simplify_graph -from .speed import add_edge_speeds as add_edge_speeds -from .speed import add_edge_travel_times as add_edge_travel_times from .stats import basic_stats as basic_stats from .utils import citation as citation from .utils import log as log from .utils import ts as ts -from .utils_graph import get_digraph as get_digraph -from .utils_graph import get_undirected as get_undirected -from .utils_graph import graph_from_gdfs as graph_from_gdfs -from .utils_graph import graph_to_gdfs as graph_to_gdfs diff --git a/osmnx/_osm_xml.py b/osmnx/_osm_xml.py index f228996e0..e97e0a871 100644 --- a/osmnx/_osm_xml.py +++ b/osmnx/_osm_xml.py @@ -23,10 +23,11 @@ import networkx as nx import pandas as pd +from . import convert from . import projection from . import settings +from . import truncate from . import utils -from . import utils_graph from ._errors import GraphSimplificationError from ._version import __version__ as osmnx_version @@ -209,7 +210,7 @@ def _save_graph_xml( filepath.parent.mkdir(parents=True, exist_ok=True) # convert graph to node/edge gdfs and create dict of spatial bounds - gdf_nodes, gdf_edges = utils_graph.graph_to_gdfs(G, fill_edge_geometry=False) + gdf_nodes, gdf_edges = convert.graph_to_gdfs(G, fill_edge_geometry=False) coords = [str(round(c, PRECISION)) for c in gdf_nodes.unary_union.bounds] bounds = dict(zip(["minlon", "minlat", "maxlon", "maxlat"], coords)) @@ -414,7 +415,7 @@ def _sort_nodes(G: nx.MultiDiGraph, osmid: int) -> list[int]: # note this is destructive and will be missing in the saved data. G_ = G.copy() G_.remove_edges_from(nx.find_cycle(G_)) - G_ = utils_graph.remove_isolated_nodes(G_) + G_ = truncate.remove_isolated_nodes(G_) ordered_nodes = _sort_nodes(G_, osmid) msg = f"Had to remove a cycle from way {str(osmid)!r} for topological sort" utils.log(msg, level=lg.WARNING) diff --git a/osmnx/bearing.py b/osmnx/bearing.py index d2ebffaf1..dc7fe18ec 100644 --- a/osmnx/bearing.py +++ b/osmnx/bearing.py @@ -226,7 +226,7 @@ def _extract_edge_bearings( msg = ( "`G` is a MultiDiGraph, so edge bearings will be directional (one per " "edge). If you want bidirectional edge bearings (two reciprocal bearings " - "per edge), pass a MultiGraph instead. Use `utils_graph.get_undirected`." + "per edge), pass a MultiGraph instead. Use `convert.get_undirected`." ) warn(msg, category=UserWarning, stacklevel=2) return bearings_array diff --git a/osmnx/distance.py b/osmnx/distance.py index 3b0848365..8a83300b0 100644 --- a/osmnx/distance.py +++ b/osmnx/distance.py @@ -13,9 +13,9 @@ from shapely.geometry import Point from shapely.strtree import STRtree +from . import convert from . import projection from . import utils -from . import utils_graph # scipy is optional dependency for projected nearest-neighbor search try: @@ -350,7 +350,7 @@ def nearest_nodes( msg = "`X` and `Y` cannot contain nulls." raise ValueError(msg) - nodes = utils_graph.graph_to_gdfs(G, edges=False, node_geometry=False)[["x", "y"]] + nodes = convert.graph_to_gdfs(G, edges=False, node_geometry=False)[["x", "y"]] nn_array: npt.NDArray[np.int64] dist_array: npt.NDArray[np.float64] @@ -502,7 +502,7 @@ def nearest_edges( if np.isnan(X_arr).any() or np.isnan(Y_arr).any(): # pragma: no cover msg = "`X` and `Y` cannot contain nulls." raise ValueError(msg) - geoms = utils_graph.graph_to_gdfs(G, nodes=False)["geometry"] + geoms = convert.graph_to_gdfs(G, nodes=False)["geometry"] ne_array: npt.NDArray[np.object_] # array of tuple[int, int, int] dist_array: npt.NDArray[np.float64] diff --git a/osmnx/elevation.py b/osmnx/elevation.py index c82dfcf02..8f4322c07 100644 --- a/osmnx/elevation.py +++ b/osmnx/elevation.py @@ -16,9 +16,9 @@ import requests from . import _http +from . import convert from . import settings from . import utils -from . import utils_graph from ._errors import InsufficientResponseError if TYPE_CHECKING: @@ -155,7 +155,7 @@ def add_node_elevations_raster( gdal.UseExceptions() gdal.BuildVRT(filepath, filepaths).FlushCache() - nodes = utils_graph.graph_to_gdfs(G, edges=False, node_geometry=False)[["x", "y"]] + nodes = convert.graph_to_gdfs(G, edges=False, node_geometry=False)[["x", "y"]] if cpus == 1: elevs = dict(_query_raster(nodes, filepath, band)) else: diff --git a/osmnx/graph.py b/osmnx/graph.py index f1b5fbe99..9641561c7 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -28,7 +28,6 @@ from . import truncate from . import utils from . import utils_geo -from . import utils_graph from ._errors import CacheOnlyInterruptError from ._errors import InsufficientResponseError from ._version import __version__ @@ -622,7 +621,7 @@ def _create_graph( # retain only the largest connected component if retain_all=False if not retain_all: - G = utils_graph.get_largest_component(G) + G = truncate.get_largest_component(G) msg = f"Created graph with {len(G):,} nodes and {len(G.edges):,} edges" utils.log(msg, level=lg.INFO) diff --git a/osmnx/io.py b/osmnx/io.py index fc9dfe7a0..866c06b2c 100644 --- a/osmnx/io.py +++ b/osmnx/io.py @@ -1,4 +1,4 @@ -"""Read/write graphs from/to files on disk.""" +"""File I/O functions to save/load graphs to/from files on disk.""" from __future__ import annotations @@ -14,9 +14,9 @@ from shapely import wkt from . import _osm_xml +from . import convert from . import settings from . import utils -from . import utils_graph if TYPE_CHECKING: import geopandas as gpd @@ -58,9 +58,9 @@ def save_graph_geopackage( # convert graph to gdfs and stringify non-numeric columns if directed: - gdf_nodes, gdf_edges = utils_graph.graph_to_gdfs(G) + gdf_nodes, gdf_edges = convert.graph_to_gdfs(G) else: - gdf_nodes, gdf_edges = utils_graph.graph_to_gdfs(utils_graph.get_undirected(G)) + gdf_nodes, gdf_edges = convert.graph_to_gdfs(convert.get_undirected(G)) gdf_nodes = _stringify_nonnumeric_cols(gdf_nodes) gdf_edges = _stringify_nonnumeric_cols(gdf_edges) diff --git a/osmnx/plot.py b/osmnx/plot.py index 472e2a58e..d4b58a514 100644 --- a/osmnx/plot.py +++ b/osmnx/plot.py @@ -16,11 +16,11 @@ import pandas as pd from . import bearing +from . import convert from . import projection from . import settings from . import utils from . import utils_geo -from . import utils_graph if TYPE_CHECKING: import geopandas as gpd @@ -252,12 +252,12 @@ def plot_graph( # noqa: PLR0913 if max_edge_lw > 0: # plot the edges' geometries - gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False)["geometry"] + gdf_edges = convert.graph_to_gdfs(G, nodes=False)["geometry"] ax = gdf_edges.plot(ax=ax, color=edge_color, lw=edge_linewidth, alpha=edge_alpha, zorder=1) if max_node_size > 0: # scatter plot the nodes' x/y coordinates - gdf_nodes = utils_graph.graph_to_gdfs(G, edges=False, node_geometry=False)[["x", "y"]] + gdf_nodes = convert.graph_to_gdfs(G, edges=False, node_geometry=False)[["x", "y"]] ax.scatter( # type: ignore[union-attr] x=gdf_nodes["x"], y=gdf_nodes["y"], @@ -510,7 +510,7 @@ def plot_figure_ground( } # we need an undirected graph to find every edge incident on a node - Gu = utils_graph.get_undirected(G) + Gu = convert.get_undirected(G) # for each edge, get a linewidth according to street type edge_linewidths = [] @@ -553,7 +553,7 @@ def plot_figure_ground( node_sizes: list[float] | float = [node_widths[node] for node in Gu.nodes] # define the view extents of the plotting figure - node_geoms = utils_graph.graph_to_gdfs(Gu, edges=False, node_geometry=True).unary_union + node_geoms = convert.graph_to_gdfs(Gu, edges=False, node_geometry=True).unary_union lonlat_point = node_geoms.centroid.coords[0] latlon_point = tuple(reversed(lonlat_point)) bbox = utils_geo.bbox_from_point(latlon_point, dist=dist, project_utm=False) diff --git a/osmnx/projection.py b/osmnx/projection.py index 187d17401..a5bf34f6f 100644 --- a/osmnx/projection.py +++ b/osmnx/projection.py @@ -8,9 +8,9 @@ import geopandas as gpd +from . import convert from . import settings from . import utils -from . import utils_graph if TYPE_CHECKING: import networkx as nx @@ -161,7 +161,7 @@ def project_graph( to_crs = settings.default_crs # STEP 1: PROJECT THE NODES - gdf_nodes = utils_graph.graph_to_gdfs(G, edges=False) + gdf_nodes = convert.graph_to_gdfs(G, edges=False) # project the nodes GeoDataFrame and extract the projected x/y values gdf_nodes_proj = project_gdf(gdf_nodes, to_crs=to_crs) @@ -172,17 +172,17 @@ def project_graph( # STEP 2: PROJECT THE EDGES if G.graph.get("simplified"): # if graph has previously been simplified, project the edge geometries - gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) + gdf_edges = convert.graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) gdf_edges_proj = project_gdf(gdf_edges, to_crs=to_crs) else: # if not, you don't have to project these edges because the nodes # contain all the spatial data in the graph (unsimplified edges have # no geometry attributes) - gdf_edges_proj = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) + gdf_edges_proj = convert.graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) # STEP 3: REBUILD GRAPH # turn projected node/edge gdfs into a graph and update its CRS attribute - G_proj = utils_graph.graph_from_gdfs(gdf_nodes_proj, gdf_edges_proj, graph_attrs=G.graph) + G_proj = convert.graph_from_gdfs(gdf_nodes_proj, gdf_edges_proj, graph_attrs=G.graph) G_proj.graph["crs"] = to_crs msg = f"Projected graph with {len(G)} nodes and {len(G.edges)} edges" diff --git a/osmnx/routing.py b/osmnx/routing.py index 1a8921d9a..25764d7db 100644 --- a/osmnx/routing.py +++ b/osmnx/routing.py @@ -1,20 +1,55 @@ -"""Calculate weighted shortest paths between graph nodes.""" +"""Calculate edge speeds, travel times, and weighted shortest paths.""" from __future__ import annotations import itertools import logging as lg import multiprocessing as mp +import re from collections.abc import Iterable from collections.abc import Iterator +from typing import TYPE_CHECKING +from typing import Any +from typing import Callable from typing import overload from warnings import warn import networkx as nx import numpy as np +import pandas as pd +from . import convert from . import utils -from . import utils_graph + +if TYPE_CHECKING: + import geopandas as gpd + + +def route_to_gdf( + G: nx.MultiDiGraph, + route: list[int], + *, + weight: str = "length", +) -> gpd.GeoDataFrame: + """ + Return a GeoDataFrame of the edges in a path, in order. + + Parameters + ---------- + G + Input graph. + route + Node IDs constituting the path. + weight + Attribute value to minimize when choosing between parallel edges. + + Returns + ------- + gdf_edges + """ + pairs = zip(route[:-1], route[1:]) + uvk = ((u, v, min(G[u][v].items(), key=lambda i: i[1][weight])[0]) for u, v in pairs) + return convert.graph_to_gdfs(G.subgraph(route), nodes=False).loc[uvk] # orig/dest int, weight present, cpus present @@ -219,7 +254,7 @@ def k_shortest_paths( """ _verify_edge_attribute(G, weight) paths_gen = nx.shortest_simple_paths( - G=utils_graph.get_digraph(G, weight=weight), + G=convert.get_digraph(G, weight=weight), source=orig, target=dest, weight=weight, @@ -290,3 +325,253 @@ def _verify_edge_attribute(G: nx.MultiDiGraph, attr: str) -> None: except ValueError as e: msg = f"The edge attribute {attr!r} contains non-numeric values." raise ValueError(msg) from e + + +def add_edge_speeds( + G: nx.MultiDiGraph, + *, + hwy_speeds: dict[str, float] | None = None, + fallback: float | None = None, + agg: Callable[[Any], Any] = np.mean, +) -> nx.MultiDiGraph: + """ + Add edge speeds (km per hour) to graph as new `speed_kph` edge attributes. + + By default, this imputes free-flow travel speeds for all edges via the + mean `maxspeed` value of the edges of each highway type. For highway types + in the graph that have no `maxspeed` value on any edge, it assigns the + mean of all `maxspeed` values in graph. + + This default mean-imputation can obviously be imprecise, and the user can + override it by passing in `hwy_speeds` and/or `fallback` arguments that + correspond to local speed limit standards. The user can also specify a + different aggregation function (such as the median) to impute missing + values from the observed values. + + If edge `maxspeed` attribute has "mph" in it, value will automatically be + converted from miles per hour to km per hour. Any other speed units should + be manually converted to km per hour prior to running this function, + otherwise there could be unexpected results. If "mph" does not appear in + the edge's maxspeed attribute string, then function assumes kph, per OSM + guidelines: https://wiki.openstreetmap.org/wiki/Map_Features/Units + + Parameters + ---------- + G + Input graph. + hwy_speeds + Dict keys are OSM highway types and values are typical speeds (km per + hour) to assign to edges of that highway type for any edges missing + speed data. Any edges with highway type not in `hwy_speeds` will be + assigned the mean pre-existing speed value of all edges of that + highway type. + fallback + Default speed value (km per hour) to assign to edges whose highway + type did not appear in `hwy_speeds` and had no pre-existing speed + attribute values on any edge. + agg + Aggregation function to impute missing values from observed values. + The default is `numpy.mean`, but you might also consider for example + `numpy.median`, `numpy.nanmedian`, or your own custom function. + + Returns + ------- + G + Graph with `speed_kph` attributes on all edges. + """ + if fallback is None: + fallback = np.nan + + edges = convert.graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) + + # collapse any highway lists (can happen during graph simplification) + # into string values simply by keeping just the first element of the list + edges["highway"] = edges["highway"].map(lambda x: x[0] if isinstance(x, list) else x) + + if "maxspeed" in edges.columns: + # collapse any maxspeed lists (can happen during graph simplification) + # into a single value + edges["maxspeed"] = edges["maxspeed"].apply(_collapse_multiple_maxspeed_values, agg=agg) + + # create speed_kph by cleaning maxspeed strings and converting mph to + # kph if necessary + edges["speed_kph"] = edges["maxspeed"].astype(str).map(_clean_maxspeed).astype(float) + else: + # if no edges in graph had a maxspeed attribute + edges["speed_kph"] = None + + # if user provided hwy_speeds, use them as default values, otherwise + # initialize an empty series to populate with values + hwy_speed_avg = pd.Series(dtype=float) if hwy_speeds is None else pd.Series(hwy_speeds).dropna() + + # for each highway type that caller did not provide in hwy_speeds, impute + # speed of type by taking the mean of the preexisting speed values of that + # highway type + for hwy, group in edges.groupby("highway"): + if hwy not in hwy_speed_avg: + hwy_speed_avg.loc[hwy] = agg(group["speed_kph"]) + + # if any highway types had no preexisting speed values, impute their speed + # with fallback value provided by caller. if fallback=np.nan, impute speed + # as the mean speed of all highway types that did have preexisting values + hwy_speed_avg = hwy_speed_avg.fillna(fallback).fillna(agg(hwy_speed_avg)) + + # for each edge missing speed data, assign it the imputed value for its + # highway type + speed_kph = ( + edges[["highway", "speed_kph"]].set_index("highway").iloc[:, 0].fillna(hwy_speed_avg) + ) + + # all speeds will be null if edges had no preexisting maxspeed data and + # caller did not pass in hwy_speeds or fallback arguments + if pd.isna(speed_kph).all(): + msg = ( + "This graph's edges have no preexisting 'maxspeed' attribute " + "values so you must pass `hwy_speeds` or `fallback` arguments." + ) + raise ValueError(msg) + + # add speed kph attribute to graph edges + edges["speed_kph"] = speed_kph.to_numpy() + nx.set_edge_attributes(G, values=edges["speed_kph"], name="speed_kph") + + return G + + +def add_edge_travel_times(G: nx.MultiDiGraph) -> nx.MultiDiGraph: + """ + Add edge travel time (seconds) to graph as new `travel_time` edge attributes. + + Calculates free-flow travel time along each edge, based on `length` and + `speed_kph` attributes. Note: run `add_edge_speeds` first to generate the + `speed_kph` attribute. All edges must have `length` and `speed_kph` + attributes and all their values must be non-null. + + Parameters + ---------- + G + Input graph. + + Returns + ------- + G + Graph with `travel_time` attributes on all edges. + """ + edges = convert.graph_to_gdfs(G, nodes=False) + + # verify edge length and speed_kph attributes exist + if not ("length" in edges.columns and "speed_kph" in edges.columns): # pragma: no cover + msg = "All edges must have 'length' and 'speed_kph' attributes." + raise KeyError(msg) + + # verify edge length and speed_kph attributes contain no nulls + if pd.isna(edges["length"]).any() or pd.isna(edges["speed_kph"]).any(): # pragma: no cover + msg = "Edge 'length' and 'speed_kph' values must be non-null." + raise ValueError(msg) + + # convert distance meters to km, and speed km per hour to km per second + distance_km = edges["length"] / 1000 + speed_km_sec = edges["speed_kph"] / (60 * 60) + + # calculate edge travel time in seconds + travel_time = distance_km / speed_km_sec + + # add travel time attribute to graph edges + edges["travel_time"] = travel_time.to_numpy() + nx.set_edge_attributes(G, values=edges["travel_time"], name="travel_time") + + return G + + +def _clean_maxspeed( + maxspeed: str | float, + *, + agg: Callable[[Any], Any] = np.mean, + convert_mph: bool = True, +) -> float | None: + """ + Clean a maxspeed string and convert mph to kph if necessary. + + If present, splits maxspeed on "|" (which denotes that the value contains + different speeds per lane) then aggregates the resulting values. Invalid + inputs return None. See https://wiki.openstreetmap.org/wiki/Key:maxspeed + for details on values and formats. + + Parameters + ---------- + maxspeed + An OSM way "maxspeed" attribute value. Null values are expected to be + of type float (`numpy.nan`), and non-null values are strings. + agg + Aggregation function if `maxspeed` contains multiple values (default + is `numpy.mean`). + convert_mph + If True, convert miles per hour to kilometers per hour. + + Returns + ------- + clean_value + Clean value resulting from `agg` function. + """ + MILES_TO_KM = 1.60934 + if not isinstance(maxspeed, str): + return None + + # regex adapted from OSM wiki + pattern = "^([0-9][\\.,0-9]+?)(?:[ ]?(?:km/h|kmh|kph|mph|knots))?$" + values = re.split(r"\|", maxspeed) # creates a list even if it's a single value + try: + clean_values = [] + for value in values: + match = re.match(pattern, value) + clean_value = float(match.group(1).replace(",", ".")) # type: ignore[union-attr] + if convert_mph and "mph" in maxspeed.lower(): + clean_value = clean_value * MILES_TO_KM + clean_values.append(clean_value) + return float(agg(clean_values)) + + except (ValueError, AttributeError): + # if invalid input, return None + return None + + +def _collapse_multiple_maxspeed_values( + value: str | float | list[str | float], + agg: Callable[[Any], Any], +) -> float | str | None: + """ + Collapse a list of maxspeed values to a single value. + + Returns None if a ValueError is encountered. + + Parameters + ---------- + value + An OSM way "maxspeed" attribute value. Null values are expected to be + of type float (`numpy.nan`), and non-null values are strings. + agg + The aggregation function to reduce the list to a single value. + + Returns + ------- + collapsed + If `value` was a string or null, it is just returned directly. + Otherwise, the return is a float representation of the aggregated + value in the list (converted to kph if original value was in mph). + """ + # if this isn't a list, just return it right back to the caller + if not isinstance(value, list): + return value + + # otherwise, if it is a list, process it + try: + # clean each value in list and convert to kph if it is mph then + # return a single aggregated value + values = [_clean_maxspeed(x) for x in value] + collapsed = float(agg(pd.Series(values).dropna())) + if pd.isna(collapsed): + return None + # otherwise + return collapsed # noqa: TRY300 + except ValueError: + return None diff --git a/osmnx/simplification.py b/osmnx/simplification.py index 860712df6..4e71bbf06 100644 --- a/osmnx/simplification.py +++ b/osmnx/simplification.py @@ -13,10 +13,10 @@ from shapely.geometry import Point from shapely.geometry import Polygon +from . import convert from . import settings from . import stats from . import utils -from . import utils_graph from ._errors import GraphSimplificationError if TYPE_CHECKING: @@ -544,7 +544,7 @@ def _merge_nodes_geometric(G: nx.MultiDiGraph, tolerance: float) -> gpd.GeoSerie The merged overlapping polygons of the buffered nodes. """ # buffer nodes GeoSeries then get unary union to merge overlaps - merged = utils_graph.graph_to_gdfs(G, edges=False)["geometry"].buffer(tolerance).unary_union + merged = convert.graph_to_gdfs(G, edges=False)["geometry"].buffer(tolerance).unary_union # if only a single node results, make it iterable to convert to GeoSeries merged = MultiPolygon([merged]) if isinstance(merged, Polygon) else merged @@ -603,7 +603,7 @@ def _consolidate_intersections_rebuild_graph( # noqa: C901,PLR0912,PLR0915 # attach each node to its cluster of merged nodes. first get the original # graph's node points then spatial join to give each node the label of # cluster it's within. make cluster labels type string. - node_points = utils_graph.graph_to_gdfs(G, edges=False) + node_points = convert.graph_to_gdfs(G, edges=False) cols = set(node_points.columns).intersection(["geometry", *settings.useful_tags_node]) node_points = node_points[list(cols)] gdf = gpd.sjoin(node_points, node_clusters, how="left", predicate="within") @@ -680,7 +680,7 @@ def _consolidate_intersections_rebuild_graph( # noqa: C901,PLR0912,PLR0915 # STEP 6 # create new edge from cluster to cluster for each edge in original graph - gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False) + gdf_edges = convert.graph_to_gdfs(G, nodes=False) for u, v, k, data in G.edges(keys=True, data=True): u2 = gdf.loc[u, "cluster"] v2 = gdf.loc[v, "cluster"] diff --git a/osmnx/speed.py b/osmnx/speed.py deleted file mode 100644 index f8abd5243..000000000 --- a/osmnx/speed.py +++ /dev/null @@ -1,263 +0,0 @@ -"""Calculate graph edge speeds and travel times.""" - -from __future__ import annotations - -import re -from typing import Any -from typing import Callable - -import networkx as nx -import numpy as np -import pandas as pd - -from . import utils_graph - - -def add_edge_speeds( - G: nx.MultiDiGraph, - *, - hwy_speeds: dict[str, float] | None = None, - fallback: float | None = None, - agg: Callable[[Any], Any] = np.mean, -) -> nx.MultiDiGraph: - """ - Add edge speeds (km per hour) to graph as new `speed_kph` edge attributes. - - By default, this imputes free-flow travel speeds for all edges via the - mean `maxspeed` value of the edges of each highway type. For highway types - in the graph that have no `maxspeed` value on any edge, it assigns the - mean of all `maxspeed` values in graph. - - This default mean-imputation can obviously be imprecise, and the user can - override it by passing in `hwy_speeds` and/or `fallback` arguments that - correspond to local speed limit standards. The user can also specify a - different aggregation function (such as the median) to impute missing - values from the observed values. - - If edge `maxspeed` attribute has "mph" in it, value will automatically be - converted from miles per hour to km per hour. Any other speed units should - be manually converted to km per hour prior to running this function, - otherwise there could be unexpected results. If "mph" does not appear in - the edge's maxspeed attribute string, then function assumes kph, per OSM - guidelines: https://wiki.openstreetmap.org/wiki/Map_Features/Units - - Parameters - ---------- - G - Input graph. - hwy_speeds - Dict keys are OSM highway types and values are typical speeds (km per - hour) to assign to edges of that highway type for any edges missing - speed data. Any edges with highway type not in `hwy_speeds` will be - assigned the mean pre-existing speed value of all edges of that - highway type. - fallback - Default speed value (km per hour) to assign to edges whose highway - type did not appear in `hwy_speeds` and had no pre-existing speed - attribute values on any edge. - agg - Aggregation function to impute missing values from observed values. - The default is `numpy.mean`, but you might also consider for example - `numpy.median`, `numpy.nanmedian`, or your own custom function. - - Returns - ------- - G - Graph with `speed_kph` attributes on all edges. - """ - if fallback is None: - fallback = np.nan - - edges = utils_graph.graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) - - # collapse any highway lists (can happen during graph simplification) - # into string values simply by keeping just the first element of the list - edges["highway"] = edges["highway"].map(lambda x: x[0] if isinstance(x, list) else x) - - if "maxspeed" in edges.columns: - # collapse any maxspeed lists (can happen during graph simplification) - # into a single value - edges["maxspeed"] = edges["maxspeed"].apply(_collapse_multiple_maxspeed_values, agg=agg) - - # create speed_kph by cleaning maxspeed strings and converting mph to - # kph if necessary - edges["speed_kph"] = edges["maxspeed"].astype(str).map(_clean_maxspeed).astype(float) - else: - # if no edges in graph had a maxspeed attribute - edges["speed_kph"] = None - - # if user provided hwy_speeds, use them as default values, otherwise - # initialize an empty series to populate with values - hwy_speed_avg = pd.Series(dtype=float) if hwy_speeds is None else pd.Series(hwy_speeds).dropna() - - # for each highway type that caller did not provide in hwy_speeds, impute - # speed of type by taking the mean of the preexisting speed values of that - # highway type - for hwy, group in edges.groupby("highway"): - if hwy not in hwy_speed_avg: - hwy_speed_avg.loc[hwy] = agg(group["speed_kph"]) - - # if any highway types had no preexisting speed values, impute their speed - # with fallback value provided by caller. if fallback=np.nan, impute speed - # as the mean speed of all highway types that did have preexisting values - hwy_speed_avg = hwy_speed_avg.fillna(fallback).fillna(agg(hwy_speed_avg)) - - # for each edge missing speed data, assign it the imputed value for its - # highway type - speed_kph = ( - edges[["highway", "speed_kph"]].set_index("highway").iloc[:, 0].fillna(hwy_speed_avg) - ) - - # all speeds will be null if edges had no preexisting maxspeed data and - # caller did not pass in hwy_speeds or fallback arguments - if pd.isna(speed_kph).all(): - msg = ( - "This graph's edges have no preexisting 'maxspeed' attribute " - "values so you must pass `hwy_speeds` or `fallback` arguments." - ) - raise ValueError(msg) - - # add speed kph attribute to graph edges - edges["speed_kph"] = speed_kph.to_numpy() - nx.set_edge_attributes(G, values=edges["speed_kph"], name="speed_kph") - - return G - - -def add_edge_travel_times(G: nx.MultiDiGraph) -> nx.MultiDiGraph: - """ - Add edge travel time (seconds) to graph as new `travel_time` edge attributes. - - Calculates free-flow travel time along each edge, based on `length` and - `speed_kph` attributes. Note: run `add_edge_speeds` first to generate the - `speed_kph` attribute. All edges must have `length` and `speed_kph` - attributes and all their values must be non-null. - - Parameters - ---------- - G - Input graph. - - Returns - ------- - G - Graph with `travel_time` attributes on all edges. - """ - edges = utils_graph.graph_to_gdfs(G, nodes=False) - - # verify edge length and speed_kph attributes exist - if not ("length" in edges.columns and "speed_kph" in edges.columns): # pragma: no cover - msg = "All edges must have 'length' and 'speed_kph' attributes." - raise KeyError(msg) - - # verify edge length and speed_kph attributes contain no nulls - if pd.isna(edges["length"]).any() or pd.isna(edges["speed_kph"]).any(): # pragma: no cover - msg = "Edge 'length' and 'speed_kph' values must be non-null." - raise ValueError(msg) - - # convert distance meters to km, and speed km per hour to km per second - distance_km = edges["length"] / 1000 - speed_km_sec = edges["speed_kph"] / (60 * 60) - - # calculate edge travel time in seconds - travel_time = distance_km / speed_km_sec - - # add travel time attribute to graph edges - edges["travel_time"] = travel_time.to_numpy() - nx.set_edge_attributes(G, values=edges["travel_time"], name="travel_time") - - return G - - -def _clean_maxspeed( - maxspeed: str | float, - *, - agg: Callable[[Any], Any] = np.mean, - convert_mph: bool = True, -) -> float | None: - """ - Clean a maxspeed string and convert mph to kph if necessary. - - If present, splits maxspeed on "|" (which denotes that the value contains - different speeds per lane) then aggregates the resulting values. Invalid - inputs return None. See https://wiki.openstreetmap.org/wiki/Key:maxspeed - for details on values and formats. - - Parameters - ---------- - maxspeed - An OSM way "maxspeed" attribute value. Null values are expected to be - of type float (`numpy.nan`), and non-null values are strings. - agg - Aggregation function if `maxspeed` contains multiple values (default - is `numpy.mean`). - convert_mph - If True, convert miles per hour to kilometers per hour. - - Returns - ------- - clean_value - Clean value resulting from `agg` function. - """ - MILES_TO_KM = 1.60934 - if not isinstance(maxspeed, str): - return None - - # regex adapted from OSM wiki - pattern = "^([0-9][\\.,0-9]+?)(?:[ ]?(?:km/h|kmh|kph|mph|knots))?$" - values = re.split(r"\|", maxspeed) # creates a list even if it's a single value - try: - clean_values = [] - for value in values: - match = re.match(pattern, value) - clean_value = float(match.group(1).replace(",", ".")) # type: ignore[union-attr] - if convert_mph and "mph" in maxspeed.lower(): - clean_value = clean_value * MILES_TO_KM - clean_values.append(clean_value) - return float(agg(clean_values)) - - except (ValueError, AttributeError): - # if invalid input, return None - return None - - -def _collapse_multiple_maxspeed_values( - value: str | float | list[str | float], - agg: Callable[[Any], Any], -) -> float | str | None: - """ - Collapse a list of maxspeed values to a single value. - - Returns None if a ValueError is encountered. - - Parameters - ---------- - value - An OSM way "maxspeed" attribute value. Null values are expected to be - of type float (`numpy.nan`), and non-null values are strings. - agg - The aggregation function to reduce the list to a single value. - - Returns - ------- - collapsed - If `value` was a string or null, it is just returned directly. - Otherwise, the return is a float representation of the aggregated - value in the list (converted to kph if original value was in mph). - """ - # if this isn't a list, just return it right back to the caller - if not isinstance(value, list): - return value - - # otherwise, if it is a list, process it - try: - # clean each value in list and convert to kph if it is mph then - # return a single aggregated value - values = [_clean_maxspeed(x) for x in value] - collapsed = float(agg(pd.Series(values).dropna())) - if pd.isna(collapsed): - return None - # otherwise - return collapsed # noqa: TRY300 - except ValueError: - return None diff --git a/osmnx/stats.py b/osmnx/stats.py index ea048b6fe..f2e5f83c0 100644 --- a/osmnx/stats.py +++ b/osmnx/stats.py @@ -22,11 +22,11 @@ import networkx as nx import numpy as np +from . import convert from . import distance from . import projection from . import simplification from . import utils -from . import utils_graph if TYPE_CHECKING: from collections.abc import Iterable @@ -372,7 +372,7 @@ def basic_stats( - `streets_per_node_counts` - see `streets_per_node_counts` function documentation - `streets_per_node_proportions` - see `streets_per_node_proportions` function documentation """ - Gu = utils_graph.get_undirected(G) + Gu = convert.get_undirected(G) stats: dict[str, Any] = {} stats["n"] = len(G.nodes) diff --git a/osmnx/truncate.py b/osmnx/truncate.py index f765a7cf8..b8b59b4cd 100644 --- a/osmnx/truncate.py +++ b/osmnx/truncate.py @@ -7,9 +7,9 @@ import networkx as nx +from . import convert from . import utils from . import utils_geo -from . import utils_graph if TYPE_CHECKING: from shapely.geometry import MultiPolygon @@ -65,8 +65,7 @@ def truncate_graph_dist( # remove any isolated nodes and retain only the largest component (if # retain_all is True) if not retain_all: - G = utils_graph.remove_isolated_nodes(G) - G = utils_graph.get_largest_component(G) + G = get_largest_component(remove_isolated_nodes(G)) msg = f"Truncated graph by {weight}-weighted network distance" utils.log(msg, level=lg.INFO) @@ -142,7 +141,7 @@ def truncate_graph_polygon( utils.log(msg, level=lg.INFO) # first identify all nodes whose point geometries lie within the polygon - gs_nodes = utils_graph.graph_to_gdfs(G, edges=False)["geometry"] + gs_nodes = convert.graph_to_gdfs(G, edges=False)["geometry"] to_keep = utils_geo._intersect_index_quadrats(gs_nodes, polygon) if len(to_keep) == 0: @@ -175,10 +174,74 @@ def truncate_graph_polygon( utils.log(msg, level=lg.INFO) if not retain_all: - # remove any isolated nodes and retain only the largest component - G = utils_graph.remove_isolated_nodes(G) - G = utils_graph.get_largest_component(G) + G = get_largest_component(remove_isolated_nodes(G)) msg = "Truncated graph by polygon" utils.log(msg, level=lg.INFO) return G + + +def remove_isolated_nodes(G: nx.MultiDiGraph) -> nx.MultiDiGraph: + """ + Remove from a graph all nodes that have no incident edges. + + Parameters + ---------- + G + Graph from which to remove isolated nodes. + + Returns + ------- + G + Graph with all isolated nodes removed. + """ + # make a copy to not mutate original graph object caller passed in + G = G.copy() + + # get the set of all isolated nodes, then remove them + isolated_nodes = {node for node, degree in G.degree() if degree < 1} + G.remove_nodes_from(isolated_nodes) + + msg = f"Removed {len(isolated_nodes):,} isolated nodes" + utils.log(msg, level=lg.INFO) + return G + + +def get_largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.MultiDiGraph: + """ + Return subgraph of `G`'s largest weakly or strongly connected component. + + Parameters + ---------- + G + Input graph. + strongly + If True, return the largest strongly connected component. Otherwise + return the largest weakly connected component. + + Returns + ------- + G + The largest connected component subgraph of the original graph. + """ + if strongly: + kind = "strongly" + is_connected = nx.is_strongly_connected + connected_components = nx.strongly_connected_components + else: + kind = "weakly" + is_connected = nx.is_weakly_connected + connected_components = nx.weakly_connected_components + + if not is_connected(G): + # get all the connected components in graph then identify the largest + largest_cc = max(connected_components(G), key=len) + n = len(G) + + # induce (frozen) subgraph then unfreeze it by making new MultiDiGraph + G = nx.MultiDiGraph(G.subgraph(largest_cc)) + + msg = f"Got largest {kind} connected component ({len(G):,} of {n:,} total nodes)" + utils.log(msg, level=lg.INFO) + + return G diff --git a/osmnx/utils_geo.py b/osmnx/utils_geo.py index 761c85342..f01919951 100644 --- a/osmnx/utils_geo.py +++ b/osmnx/utils_geo.py @@ -16,10 +16,10 @@ from shapely.geometry import Polygon from shapely.ops import split +from . import convert from . import projection from . import settings from . import utils -from . import utils_graph if TYPE_CHECKING: from collections.abc import Iterator @@ -54,7 +54,7 @@ def sample_points(G: nx.MultiGraph, n: int) -> gpd.GeoSeries: if nx.is_directed(G): # pragma: no cover msg = "`G` should be undirected to avoid oversampling bidirectional edges." warn(msg, category=UserWarning, stacklevel=2) - gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False)[["geometry", "length"]] + gdf_edges = convert.graph_to_gdfs(G, nodes=False)[["geometry", "length"]] weights = gdf_edges["length"] / gdf_edges["length"].sum() idx = np.random.default_rng().choice(gdf_edges.index, size=n, p=weights) lines = gdf_edges.loc[idx, "geometry"] diff --git a/osmnx/utils_graph.py b/osmnx/utils_graph.py deleted file mode 100644 index 53b25e754..000000000 --- a/osmnx/utils_graph.py +++ /dev/null @@ -1,620 +0,0 @@ -"""Graph utility functions.""" - -from __future__ import annotations - -import itertools -import logging as lg -from typing import Any -from typing import Literal -from typing import overload -from warnings import warn - -import geopandas as gpd -import networkx as nx -import pandas as pd -from shapely.geometry import LineString -from shapely.geometry import Point - -from . import utils - - -# nodes and edges are both missing (therefore both default true) -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... - - -# both present/True -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - nodes: Literal[True], - edges: Literal[True], - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... - - -# both present, nodes true, edges false -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - nodes: Literal[True], - edges: Literal[False], - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> gpd.GeoDataFrame: ... - - -# both present, nodes false, edges true -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - nodes: Literal[False], - edges: Literal[True], - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> gpd.GeoDataFrame: ... - - -# nodes missing (therefore default true), edges present/true -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - edges: Literal[True], - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... - - -# nodes missing (therefore default true), edges present/false -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - edges: Literal[False], - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> gpd.GeoDataFrame: ... - - -# nodes present/true, edges missing (therefore default true) -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - nodes: Literal[True], - edges: bool = True, - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... - - -# nodes present/false, edges missing (therefore default true) -@overload -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - nodes: Literal[False], - edges: bool = True, - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> gpd.GeoDataFrame: ... - - -def graph_to_gdfs( - G: nx.MultiGraph | nx.MultiDiGraph, - *, - nodes: bool = True, - edges: bool = True, - node_geometry: bool = True, - fill_edge_geometry: bool = True, -) -> gpd.GeoDataFrame | tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: - """ - Convert a MultiGraph or MultiDiGraph to node and/or edge GeoDataFrames. - - This function is the inverse of `graph_from_gdfs`. - - Parameters - ---------- - G - Input graph. - nodes - If True, convert graph nodes to a GeoDataFrame and return it. - edges - If True, convert graph edges to a GeoDataFrame and return it. - node_geometry - If True, create a geometry column from node "x" and "y" attributes. - fill_edge_geometry - If True, fill missing edge geometry fields using endpoint nodes' - coordinates to create a LineString. - - Returns - ------- - gdf_nodes or gdf_edges or (gdf_nodes, gdf_edges) - `gdf_nodes` is indexed by `osmid` and `gdf_edges` is multi-indexed by - `(u, v, key)` following normal MultiGraph/MultiDiGraph structure. - """ - crs = G.graph["crs"] - - if nodes: - if len(G.nodes) == 0: # pragma: no cover - msg = "Graph contains no nodes." - raise ValueError(msg) - - uvk, data = zip(*G.nodes(data=True)) - - if node_geometry: - # convert node x/y attributes to Points for geometry column - node_geoms = (Point(d["x"], d["y"]) for d in data) - gdf_nodes = gpd.GeoDataFrame(data, index=uvk, crs=crs, geometry=list(node_geoms)) - else: - gdf_nodes = gpd.GeoDataFrame(data, index=uvk) - - gdf_nodes.index = gdf_nodes.index.rename("osmid") - msg = "Created nodes GeoDataFrame from graph" - utils.log(msg, level=lg.INFO) - - if edges: - if len(G.edges) == 0: # pragma: no cover - msg = "Graph contains no edges." - raise ValueError(msg) - - u, v, k, data = zip(*G.edges(keys=True, data=True)) - - if fill_edge_geometry: - # subroutine to get geometry for every edge: if edge already has - # geometry return it, otherwise create it using the incident nodes - x_lookup = nx.get_node_attributes(G, "x") - y_lookup = nx.get_node_attributes(G, "y") - - def _make_edge_geometry( - u: int, - v: int, - data: dict[str, Any], - x: dict[int, float] = x_lookup, - y: dict[int, float] = y_lookup, - ) -> LineString: - if "geometry" in data: - return data["geometry"] - - # otherwise - return LineString((Point((x[u], y[u])), Point((x[v], y[v])))) - - edge_geoms = map(_make_edge_geometry, u, v, data) - gdf_edges = gpd.GeoDataFrame(data, crs=crs, geometry=list(edge_geoms)) - - else: - gdf_edges = gpd.GeoDataFrame(data) - if "geometry" not in gdf_edges.columns: - # if no edges have a geometry attribute, create null column - gdf_edges = gdf_edges.set_geometry([None] * len(gdf_edges)) - gdf_edges = gdf_edges.set_crs(crs) - - # add u, v, key attributes as index - gdf_edges["u"] = u - gdf_edges["v"] = v - gdf_edges["key"] = k - gdf_edges = gdf_edges.set_index(["u", "v", "key"]) - - msg = "Created edges GeoDataFrame from graph" - utils.log(msg, level=lg.INFO) - - if nodes and edges: - return gdf_nodes, gdf_edges - - if nodes: - return gdf_nodes - - if edges: - return gdf_edges - - # otherwise - msg = "You must request nodes or edges or both." - raise ValueError(msg) - - -def graph_from_gdfs( - gdf_nodes: gpd.GeoDataFrame, - gdf_edges: gpd.GeoDataFrame, - *, - graph_attrs: dict[str, Any] | None = None, -) -> nx.MultiDiGraph: - """ - Convert node and edge GeoDataFrames to a MultiDiGraph. - - This function is the inverse of `graph_to_gdfs` and is designed to work in - conjunction with it. However, you can convert arbitrary node and edge - GeoDataFrames as long as 1) `gdf_nodes` is uniquely indexed by `osmid`, 2) - `gdf_nodes` contains `x` and `y` coordinate columns representing node - geometries, 3) `gdf_edges` is uniquely multi-indexed by `(u, v, key)` - (following normal MultiDiGraph structure). This allows you to load any - node/edge Shapefiles or GeoPackage layers as GeoDataFrames then convert - them to a MultiDiGraph for network analysis. - - Note that any `geometry` attribute on `gdf_nodes` is discarded, since `x` - and `y` provide the necessary node geometry information instead. - - Parameters - ---------- - gdf_nodes - GeoDataFrame of graph nodes uniquely indexed by `osmid`. - gdf_edges - GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`. - graph_attrs - The new `G.graph` attribute dictionary. If None, use `gdf_edges`'s CRS - as the only graph-level attribute (`gdf_edges` must have its `crs` - attribute set). - - Returns - ------- - G - """ - if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover - msg = "`gdf_nodes` must contain 'x' and 'y' columns." - raise ValueError(msg) - - if not hasattr(gdf_nodes, "geometry"): - msg = "`gdf_nodes` must have a 'geometry' attribute." - raise ValueError(msg) - - # drop geometry column from gdf_nodes (as we use x and y for geometry - # information), but warn the user if the geometry values differ from the - # coordinates in the x and y columns. this results in a df instead of gdf. - msg = ( - "Discarding the `gdf_nodes` 'geometry' column, though its values " - "differ from the coordinates in the 'x' and 'y' columns." - ) - try: - all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all() - all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all() - if not (all_x_match and all_y_match): - # warn if x/y coords don't match geometry column - warn(msg, category=UserWarning, stacklevel=2) - except ValueError: # pragma: no cover - # warn if geometry column contains non-point geometry types - warn(msg, category=UserWarning, stacklevel=2) - df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name) - - # create graph and add graph-level attribute dict - if graph_attrs is None: - graph_attrs = {"crs": gdf_edges.crs} - G = nx.MultiDiGraph(**graph_attrs) - - # add edges and their attributes to graph, but filter out null attribute - # values so that edges only get attributes with non-null values - attr_names = gdf_edges.columns.to_list() - for (u, v, k), attr_vals in zip(gdf_edges.index, gdf_edges.to_numpy()): - data_all = zip(attr_names, attr_vals) - data = {name: val for name, val in data_all if isinstance(val, list) or pd.notna(val)} - G.add_edge(u, v, key=k, **data) - - # add any nodes with no incident edges, since they wouldn't be added above - G.add_nodes_from(set(df_nodes.index) - set(G.nodes)) - - # now all nodes are added, so set nodes' attributes - for col in df_nodes.columns: - nx.set_node_attributes(G, name=col, values=df_nodes[col].dropna()) - - msg = "Created graph from node/edge GeoDataFrames" - utils.log(msg, level=lg.INFO) - return G - - -def route_to_gdf( - G: nx.MultiDiGraph, - route: list[int], - *, - weight: str = "length", -) -> gpd.GeoDataFrame: - """ - Return a GeoDataFrame of the edges in a path, in order. - - Parameters - ---------- - G - Input graph. - route - Node IDs constituting the path. - weight - Attribute value to minimize when choosing between parallel edges. - - Returns - ------- - gdf_edges - """ - pairs = zip(route[:-1], route[1:]) - uvk = ((u, v, min(G[u][v].items(), key=lambda i: i[1][weight])[0]) for u, v in pairs) - return graph_to_gdfs(G.subgraph(route), nodes=False).loc[uvk] - - -def remove_isolated_nodes(G: nx.MultiDiGraph) -> nx.MultiDiGraph: - """ - Remove from a graph all nodes that have no incident edges. - - Parameters - ---------- - G - Graph from which to remove isolated nodes. - - Returns - ------- - G - Graph with all isolated nodes removed. - """ - # make a copy to not mutate original graph object caller passed in - G = G.copy() - - # get the set of all isolated nodes, then remove them - isolated_nodes = {node for node, degree in G.degree() if degree < 1} - G.remove_nodes_from(isolated_nodes) - - msg = f"Removed {len(isolated_nodes):,} isolated nodes" - utils.log(msg, level=lg.INFO) - return G - - -def get_largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.MultiDiGraph: - """ - Return subgraph of `G`'s largest weakly or strongly connected component. - - Parameters - ---------- - G - Input graph. - strongly - If True, return the largest strongly connected component. Otherwise - return the largest weakly connected component. - - Returns - ------- - G - The largest connected component subgraph of the original graph. - """ - if strongly: - kind = "strongly" - is_connected = nx.is_strongly_connected - connected_components = nx.strongly_connected_components - else: - kind = "weakly" - is_connected = nx.is_weakly_connected - connected_components = nx.weakly_connected_components - - if not is_connected(G): - # get all the connected components in graph then identify the largest - largest_cc = max(connected_components(G), key=len) - n = len(G) - - # induce (frozen) subgraph then unfreeze it by making new MultiDiGraph - G = nx.MultiDiGraph(G.subgraph(largest_cc)) - - msg = f"Got largest {kind} connected component ({len(G):,} of {n:,} total nodes)" - utils.log(msg, level=lg.INFO) - - return G - - -def get_digraph(G: nx.MultiDiGraph, *, weight: str = "length") -> nx.DiGraph: - """ - Convert MultiDiGraph to DiGraph. - - Chooses between parallel edges by minimizing `weight` attribute value. See - also `get_undirected` to convert MultiDiGraph to MultiGraph. - - Parameters - ---------- - G - Input graph. - weight - Attribute value to minimize when choosing between parallel edges. - - Returns - ------- - G - """ - # make a copy to not mutate original graph object caller passed in - G = G.copy() - to_remove: list[tuple[int, int, int]] = [] - - # identify all the parallel edges in the MultiDiGraph - parallels = ((u, v) for u, v in G.edges(keys=False) if len(G.get_edge_data(u, v)) > 1) - - # among all sets of parallel edges, remove all except the one with the - # minimum "weight" attribute value - for u, v in set(parallels): - k_min, _ = min(G.get_edge_data(u, v).items(), key=lambda x: x[1][weight]) - to_remove.extend((u, v, k) for k in G[u][v] if k != k_min) - - G.remove_edges_from(to_remove) - msg = "Converted MultiDiGraph to DiGraph" - utils.log(msg, level=lg.INFO) - - return nx.DiGraph(G) - - -def get_undirected(G: nx.MultiDiGraph) -> nx.MultiGraph: - """ - Convert MultiDiGraph to undirected MultiGraph. - - Maintains parallel edges only if their geometries differ. See also - `get_digraph` to convert MultiDiGraph to DiGraph. - - Parameters - ---------- - G - Input graph. - - Returns - ------- - Gu - """ - # make a copy to not mutate original graph object caller passed in - G = G.copy() - - # set from/to nodes before making graph undirected - for u, v, d in G.edges(data=True): - d["from"] = u - d["to"] = v - - # add geometry if missing, to compare parallel edges' geometries - if "geometry" not in d: - point_u = (G.nodes[u]["x"], G.nodes[u]["y"]) - point_v = (G.nodes[v]["x"], G.nodes[v]["y"]) - d["geometry"] = LineString([point_u, point_v]) - - # increment parallel edges' keys so we don't retain only one edge of sets - # of true parallel edges when we convert from MultiDiGraph to MultiGraph - G = _update_edge_keys(G) - - # convert MultiDiGraph to MultiGraph, retaining edges in both directions - # of parallel edges and self-loops for now - Gu = nx.MultiGraph(**G.graph) - Gu.add_nodes_from(G.nodes(data=True)) - Gu.add_edges_from(G.edges(keys=True, data=True)) - - # the previous operation added all directed edges from G as undirected - # edges in Gu. we now have duplicate edges for each bidirectional parallel - # edge or self-loop. so, look through the edges and remove any duplicates. - duplicate_edges = set() - for u1, v1, key1, data1 in Gu.edges(keys=True, data=True): - # if we haven't already flagged this edge as a duplicate - if (u1, v1, key1) not in duplicate_edges: - # look at every other edge between u and v, one at a time - for key2 in Gu[u1][v1]: - # don't compare this edge to itself - if key1 != key2: - # compare the first edge's data to the second's - # if they match up, flag the duplicate for removal - data2 = Gu.edges[u1, v1, key2] - if _is_duplicate_edge(data1, data2): - duplicate_edges.add((u1, v1, key2)) - - Gu.remove_edges_from(duplicate_edges) - msg = "Converted MultiDiGraph to undirected MultiGraph" - utils.log(msg, level=lg.INFO) - - return Gu - - -def _is_duplicate_edge(data1: dict[str, Any], data2: dict[str, Any]) -> bool: - """ - Check if two graph edge data dicts have the same `osmid` and `geometry`. - - Parameters - ---------- - data1 - The first edge's attribute data. - data2 - The second edge's attribute data. - - Returns - ------- - is_dupe - """ - is_dupe = False - - # if either edge's osmid contains multiple values (due to simplification) - # compare them as sets to see if they contain the same values - osmid1 = set(data1["osmid"]) if isinstance(data1["osmid"], list) else data1["osmid"] - osmid2 = set(data2["osmid"]) if isinstance(data2["osmid"], list) else data2["osmid"] - - # if they contain the same osmid or set of osmids (due to simplification) - if osmid1 == osmid2: - # if both edges have geometry attributes and they match each other - if ("geometry" in data1) and ("geometry" in data2): - if _is_same_geometry(data1["geometry"], data2["geometry"]): - is_dupe = True - - # if neither edge has a geometry attribute - elif ("geometry" not in data1) and ("geometry" not in data2): - is_dupe = True - - # if one edge has geometry attribute but the other doesn't: not dupes - else: - pass - - return is_dupe - - -def _is_same_geometry(ls1: LineString, ls2: LineString) -> bool: - """ - Determine if two LineString geometries are the same (in either direction). - - Check both the normal and reversed orders of their constituent points. - - Parameters - ---------- - ls1 - The first LineString geometry. - ls2 - The second LineString geometry. - - Returns - ------- - is_same - """ - # extract coordinates from each LineString geometry - geom1 = [tuple(coords) for coords in ls1.xy] - geom2 = [tuple(coords) for coords in ls2.xy] - - # reverse the first LineString's coordinates' direction - geom1_r = [tuple(reversed(coords)) for coords in ls1.xy] - - # if second geometry matches first in either direction, return True - return geom2 in (geom1, geom1_r) - - -def _update_edge_keys(G: nx.MultiDiGraph) -> nx.MultiDiGraph: - """ - Increment key of one edge of parallel edges that differ in geometry. - - For example, two streets from `u` to `v` that bow away from each other as - separate streets, rather than opposite direction edges of a single street. - Increment one of these edge's keys so that they do not match across - `(u, v, k)` or `(v, u, k)` so we can add both to an undirected MultiGraph. - - Parameters - ---------- - G - Input graph. - - Returns - ------- - G - """ - # identify all the edges that are duplicates based on a sorted combination - # of their origin, destination, and key. that is, edge uv will match edge vu - # as a duplicate, but only if they have the same key - edges = graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) - edges["uvk"] = ["_".join([*sorted([str(u), str(v)]), str(k)]) for u, v, k in edges.index] - mask = edges["uvk"].duplicated(keep=False) - dupes = edges[mask].dropna(subset=["geometry"]) - - different_streets = [] - groups = dupes[["geometry", "uvk"]].groupby("uvk") - - # for each group of duplicate edges - for _, group in groups: - # for each pair of edges within this group - for geom1, geom2 in itertools.combinations(group["geometry"], 2): - # if they don't have the same geometry, flag them as different - # streets: flag edge uvk, but not edge vuk, otherwise we would - # increment both their keys and they'll still duplicate each other - if not _is_same_geometry(geom1, geom2): - different_streets.append(group.index[0]) - - # for each unique different street, increment its key to make it unique - for u, v, k in set(different_streets): - new_key = max(list(G[u][v]) + list(G[v][u])) + 1 - G.add_edge(u, v, key=new_key, **G.get_edge_data(u, v, k)) - G.remove_edge(u, v, key=k) - - return G diff --git a/tests/test_osmnx.py b/tests/test_osmnx.py index 07c367099..e7b7fdbe0 100644 --- a/tests/test_osmnx.py +++ b/tests/test_osmnx.py @@ -157,7 +157,7 @@ def test_bearings() -> None: bearings = ox.bearing._extract_edge_bearings(G, min_length=0, weight=None) assert list(bearings) == [0.0] # north bearings = ox.bearing._extract_edge_bearings( - ox.utils_graph.get_undirected(G), + ox.convert.get_undirected(G), min_length=0, weight=None, ) @@ -263,20 +263,20 @@ def test_routing() -> None: G = ox.add_edge_travel_times(G) # test value cleaning - assert ox.speed._clean_maxspeed("100,2") == 100.2 - assert ox.speed._clean_maxspeed("100.2") == 100.2 - assert ox.speed._clean_maxspeed("100 km/h") == 100.0 - assert ox.speed._clean_maxspeed("100 mph") == pytest.approx(160.934) - assert ox.speed._clean_maxspeed("60|100") == 80 - assert ox.speed._clean_maxspeed("60|100 mph") == pytest.approx(128.7472) - assert ox.speed._clean_maxspeed("signal") is None - assert ox.speed._clean_maxspeed("100;70") is None + assert ox.routing._clean_maxspeed("100,2") == 100.2 + assert ox.routing._clean_maxspeed("100.2") == 100.2 + assert ox.routing._clean_maxspeed("100 km/h") == 100.0 + assert ox.routing._clean_maxspeed("100 mph") == pytest.approx(160.934) + assert ox.routing._clean_maxspeed("60|100") == 80 + assert ox.routing._clean_maxspeed("60|100 mph") == pytest.approx(128.7472) + assert ox.routing._clean_maxspeed("signal") is None + assert ox.routing._clean_maxspeed("100;70") is None # test collapsing multiple mph values to single kph value - assert ox.speed._collapse_multiple_maxspeed_values(["25 mph", "30 mph"], np.mean) == 44.25685 + assert ox.routing._collapse_multiple_maxspeed_values(["25 mph", "30 mph"], np.mean) == 44.25685 # test collapsing invalid values: should return None - assert ox.speed._collapse_multiple_maxspeed_values(["mph", "kph"], np.mean) is None + assert ox.routing._collapse_multiple_maxspeed_values(["mph", "kph"], np.mean) is None orig_x = np.array([-122.404771]) dest_x = np.array([-122.401429]) @@ -306,7 +306,7 @@ def test_routing() -> None: route5 = ox.shortest_path(G, orig_node, dest_node, weight="travel_time") assert route5 is not None - route_edges = ox.utils_graph.route_to_gdf(G, route5, weight="travel_time") + route_edges = ox.routing.route_to_gdf(G, route5, weight="travel_time") fig, ax = ox.plot_graph_route(G, route5, save=True) @@ -565,7 +565,7 @@ def test_graph_from() -> None: # truncate graph by bounding box bbox = ox.utils_geo.bbox_from_point(location_point, dist=400) G = ox.truncate.truncate_graph_bbox(G, bbox) - G = ox.utils_graph.get_largest_component(G, strongly=True) + G = ox.truncate.get_largest_component(G, strongly=True) # graph from address G = ox.graph_from_address(address=address, dist=500, dist_type="bbox", network_type="bike") From 604f6dcd7f42ff7571926065036f2510fb04ca0e Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Wed, 13 Mar 2024 14:54:17 -0700 Subject: [PATCH 02/13] add covert module --- osmnx/convert.py | 527 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 527 insertions(+) create mode 100644 osmnx/convert.py diff --git a/osmnx/convert.py b/osmnx/convert.py new file mode 100644 index 000000000..a657b27df --- /dev/null +++ b/osmnx/convert.py @@ -0,0 +1,527 @@ +"""Convert NetworkX spatial graphs to/from different data types.""" + +from __future__ import annotations + +import itertools +import logging as lg +from typing import Any +from typing import Literal +from typing import overload +from warnings import warn + +import geopandas as gpd +import networkx as nx +import pandas as pd +from shapely.geometry import LineString +from shapely.geometry import Point + +from . import utils + + +# nodes and edges are both missing (therefore both default true) +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... + + +# both present/True +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + nodes: Literal[True], + edges: Literal[True], + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... + + +# both present, nodes true, edges false +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + nodes: Literal[True], + edges: Literal[False], + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> gpd.GeoDataFrame: ... + + +# both present, nodes false, edges true +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + nodes: Literal[False], + edges: Literal[True], + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> gpd.GeoDataFrame: ... + + +# nodes missing (therefore default true), edges present/true +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + edges: Literal[True], + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... + + +# nodes missing (therefore default true), edges present/false +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + edges: Literal[False], + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> gpd.GeoDataFrame: ... + + +# nodes present/true, edges missing (therefore default true) +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + nodes: Literal[True], + edges: bool = True, + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: ... + + +# nodes present/false, edges missing (therefore default true) +@overload +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + nodes: Literal[False], + edges: bool = True, + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> gpd.GeoDataFrame: ... + + +def graph_to_gdfs( + G: nx.MultiGraph | nx.MultiDiGraph, + *, + nodes: bool = True, + edges: bool = True, + node_geometry: bool = True, + fill_edge_geometry: bool = True, +) -> gpd.GeoDataFrame | tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: + """ + Convert a MultiGraph or MultiDiGraph to node and/or edge GeoDataFrames. + + This function is the inverse of `graph_from_gdfs`. + + Parameters + ---------- + G + Input graph. + nodes + If True, convert graph nodes to a GeoDataFrame and return it. + edges + If True, convert graph edges to a GeoDataFrame and return it. + node_geometry + If True, create a geometry column from node "x" and "y" attributes. + fill_edge_geometry + If True, fill missing edge geometry fields using endpoint nodes' + coordinates to create a LineString. + + Returns + ------- + gdf_nodes or gdf_edges or (gdf_nodes, gdf_edges) + `gdf_nodes` is indexed by `osmid` and `gdf_edges` is multi-indexed by + `(u, v, key)` following normal MultiGraph/MultiDiGraph structure. + """ + crs = G.graph["crs"] + + if nodes: + if len(G.nodes) == 0: # pragma: no cover + msg = "Graph contains no nodes." + raise ValueError(msg) + + uvk, data = zip(*G.nodes(data=True)) + + if node_geometry: + # convert node x/y attributes to Points for geometry column + node_geoms = (Point(d["x"], d["y"]) for d in data) + gdf_nodes = gpd.GeoDataFrame(data, index=uvk, crs=crs, geometry=list(node_geoms)) + else: + gdf_nodes = gpd.GeoDataFrame(data, index=uvk) + + gdf_nodes.index = gdf_nodes.index.rename("osmid") + msg = "Created nodes GeoDataFrame from graph" + utils.log(msg, level=lg.INFO) + + if edges: + if len(G.edges) == 0: # pragma: no cover + msg = "Graph contains no edges." + raise ValueError(msg) + + u, v, k, data = zip(*G.edges(keys=True, data=True)) + + if fill_edge_geometry: + # subroutine to get geometry for every edge: if edge already has + # geometry return it, otherwise create it using the incident nodes + x_lookup = nx.get_node_attributes(G, "x") + y_lookup = nx.get_node_attributes(G, "y") + + def _make_edge_geometry( + u: int, + v: int, + data: dict[str, Any], + x: dict[int, float] = x_lookup, + y: dict[int, float] = y_lookup, + ) -> LineString: + if "geometry" in data: + return data["geometry"] + + # otherwise + return LineString((Point((x[u], y[u])), Point((x[v], y[v])))) + + edge_geoms = map(_make_edge_geometry, u, v, data) + gdf_edges = gpd.GeoDataFrame(data, crs=crs, geometry=list(edge_geoms)) + + else: + gdf_edges = gpd.GeoDataFrame(data) + if "geometry" not in gdf_edges.columns: + # if no edges have a geometry attribute, create null column + gdf_edges = gdf_edges.set_geometry([None] * len(gdf_edges)) + gdf_edges = gdf_edges.set_crs(crs) + + # add u, v, key attributes as index + gdf_edges["u"] = u + gdf_edges["v"] = v + gdf_edges["key"] = k + gdf_edges = gdf_edges.set_index(["u", "v", "key"]) + + msg = "Created edges GeoDataFrame from graph" + utils.log(msg, level=lg.INFO) + + if nodes and edges: + return gdf_nodes, gdf_edges + + if nodes: + return gdf_nodes + + if edges: + return gdf_edges + + # otherwise + msg = "You must request nodes or edges or both." + raise ValueError(msg) + + +def graph_from_gdfs( + gdf_nodes: gpd.GeoDataFrame, + gdf_edges: gpd.GeoDataFrame, + *, + graph_attrs: dict[str, Any] | None = None, +) -> nx.MultiDiGraph: + """ + Convert node and edge GeoDataFrames to a MultiDiGraph. + + This function is the inverse of `graph_to_gdfs` and is designed to work in + conjunction with it. However, you can convert arbitrary node and edge + GeoDataFrames as long as 1) `gdf_nodes` is uniquely indexed by `osmid`, 2) + `gdf_nodes` contains `x` and `y` coordinate columns representing node + geometries, 3) `gdf_edges` is uniquely multi-indexed by `(u, v, key)` + (following normal MultiDiGraph structure). This allows you to load any + node/edge Shapefiles or GeoPackage layers as GeoDataFrames then convert + them to a MultiDiGraph for network analysis. + + Note that any `geometry` attribute on `gdf_nodes` is discarded, since `x` + and `y` provide the necessary node geometry information instead. + + Parameters + ---------- + gdf_nodes + GeoDataFrame of graph nodes uniquely indexed by `osmid`. + gdf_edges + GeoDataFrame of graph edges uniquely multi-indexed by `(u, v, key)`. + graph_attrs + The new `G.graph` attribute dictionary. If None, use `gdf_edges`'s CRS + as the only graph-level attribute (`gdf_edges` must have its `crs` + attribute set). + + Returns + ------- + G + """ + if not ("x" in gdf_nodes.columns and "y" in gdf_nodes.columns): # pragma: no cover + msg = "`gdf_nodes` must contain 'x' and 'y' columns." + raise ValueError(msg) + + if not hasattr(gdf_nodes, "geometry"): + msg = "`gdf_nodes` must have a 'geometry' attribute." + raise ValueError(msg) + + # drop geometry column from gdf_nodes (as we use x and y for geometry + # information), but warn the user if the geometry values differ from the + # coordinates in the x and y columns. this results in a df instead of gdf. + msg = ( + "Discarding the `gdf_nodes` 'geometry' column, though its values " + "differ from the coordinates in the 'x' and 'y' columns." + ) + try: + all_x_match = (gdf_nodes.geometry.x == gdf_nodes["x"]).all() + all_y_match = (gdf_nodes.geometry.y == gdf_nodes["y"]).all() + if not (all_x_match and all_y_match): + # warn if x/y coords don't match geometry column + warn(msg, category=UserWarning, stacklevel=2) + except ValueError: # pragma: no cover + # warn if geometry column contains non-point geometry types + warn(msg, category=UserWarning, stacklevel=2) + df_nodes = gdf_nodes.drop(columns=gdf_nodes.geometry.name) + + # create graph and add graph-level attribute dict + if graph_attrs is None: + graph_attrs = {"crs": gdf_edges.crs} + G = nx.MultiDiGraph(**graph_attrs) + + # add edges and their attributes to graph, but filter out null attribute + # values so that edges only get attributes with non-null values + attr_names = gdf_edges.columns.to_list() + for (u, v, k), attr_vals in zip(gdf_edges.index, gdf_edges.to_numpy()): + data_all = zip(attr_names, attr_vals) + data = {name: val for name, val in data_all if isinstance(val, list) or pd.notna(val)} + G.add_edge(u, v, key=k, **data) + + # add any nodes with no incident edges, since they wouldn't be added above + G.add_nodes_from(set(df_nodes.index) - set(G.nodes)) + + # now all nodes are added, so set nodes' attributes + for col in df_nodes.columns: + nx.set_node_attributes(G, name=col, values=df_nodes[col].dropna()) + + msg = "Created graph from node/edge GeoDataFrames" + utils.log(msg, level=lg.INFO) + return G + + +def get_digraph(G: nx.MultiDiGraph, *, weight: str = "length") -> nx.DiGraph: + """ + Convert MultiDiGraph to DiGraph. + + Chooses between parallel edges by minimizing `weight` attribute value. See + also `get_undirected` to convert MultiDiGraph to MultiGraph. + + Parameters + ---------- + G + Input graph. + weight + Attribute value to minimize when choosing between parallel edges. + + Returns + ------- + G + """ + # make a copy to not mutate original graph object caller passed in + G = G.copy() + to_remove: list[tuple[int, int, int]] = [] + + # identify all the parallel edges in the MultiDiGraph + parallels = ((u, v) for u, v in G.edges(keys=False) if len(G.get_edge_data(u, v)) > 1) + + # among all sets of parallel edges, remove all except the one with the + # minimum "weight" attribute value + for u, v in set(parallels): + k_min, _ = min(G.get_edge_data(u, v).items(), key=lambda x: x[1][weight]) + to_remove.extend((u, v, k) for k in G[u][v] if k != k_min) + + G.remove_edges_from(to_remove) + msg = "Converted MultiDiGraph to DiGraph" + utils.log(msg, level=lg.INFO) + + return nx.DiGraph(G) + + +def get_undirected(G: nx.MultiDiGraph) -> nx.MultiGraph: + """ + Convert MultiDiGraph to undirected MultiGraph. + + Maintains parallel edges only if their geometries differ. See also + `get_digraph` to convert MultiDiGraph to DiGraph. + + Parameters + ---------- + G + Input graph. + + Returns + ------- + Gu + """ + # make a copy to not mutate original graph object caller passed in + G = G.copy() + + # set from/to nodes before making graph undirected + for u, v, d in G.edges(data=True): + d["from"] = u + d["to"] = v + + # add geometry if missing, to compare parallel edges' geometries + if "geometry" not in d: + point_u = (G.nodes[u]["x"], G.nodes[u]["y"]) + point_v = (G.nodes[v]["x"], G.nodes[v]["y"]) + d["geometry"] = LineString([point_u, point_v]) + + # increment parallel edges' keys so we don't retain only one edge of sets + # of true parallel edges when we convert from MultiDiGraph to MultiGraph + G = _update_edge_keys(G) + + # convert MultiDiGraph to MultiGraph, retaining edges in both directions + # of parallel edges and self-loops for now + Gu = nx.MultiGraph(**G.graph) + Gu.add_nodes_from(G.nodes(data=True)) + Gu.add_edges_from(G.edges(keys=True, data=True)) + + # the previous operation added all directed edges from G as undirected + # edges in Gu. we now have duplicate edges for each bidirectional parallel + # edge or self-loop. so, look through the edges and remove any duplicates. + duplicate_edges = set() + for u1, v1, key1, data1 in Gu.edges(keys=True, data=True): + # if we haven't already flagged this edge as a duplicate + if (u1, v1, key1) not in duplicate_edges: + # look at every other edge between u and v, one at a time + for key2 in Gu[u1][v1]: + # don't compare this edge to itself + if key1 != key2: + # compare the first edge's data to the second's + # if they match up, flag the duplicate for removal + data2 = Gu.edges[u1, v1, key2] + if _is_duplicate_edge(data1, data2): + duplicate_edges.add((u1, v1, key2)) + + Gu.remove_edges_from(duplicate_edges) + msg = "Converted MultiDiGraph to undirected MultiGraph" + utils.log(msg, level=lg.INFO) + + return Gu + + +def _is_duplicate_edge(data1: dict[str, Any], data2: dict[str, Any]) -> bool: + """ + Check if two graph edge data dicts have the same `osmid` and `geometry`. + + Parameters + ---------- + data1 + The first edge's attribute data. + data2 + The second edge's attribute data. + + Returns + ------- + is_dupe + """ + is_dupe = False + + # if either edge's osmid contains multiple values (due to simplification) + # compare them as sets to see if they contain the same values + osmid1 = set(data1["osmid"]) if isinstance(data1["osmid"], list) else data1["osmid"] + osmid2 = set(data2["osmid"]) if isinstance(data2["osmid"], list) else data2["osmid"] + + # if they contain the same osmid or set of osmids (due to simplification) + if osmid1 == osmid2: + # if both edges have geometry attributes and they match each other + if ("geometry" in data1) and ("geometry" in data2): + if _is_same_geometry(data1["geometry"], data2["geometry"]): + is_dupe = True + + # if neither edge has a geometry attribute + elif ("geometry" not in data1) and ("geometry" not in data2): + is_dupe = True + + # if one edge has geometry attribute but the other doesn't: not dupes + else: + pass + + return is_dupe + + +def _is_same_geometry(ls1: LineString, ls2: LineString) -> bool: + """ + Determine if two LineString geometries are the same (in either direction). + + Check both the normal and reversed orders of their constituent points. + + Parameters + ---------- + ls1 + The first LineString geometry. + ls2 + The second LineString geometry. + + Returns + ------- + is_same + """ + # extract coordinates from each LineString geometry + geom1 = [tuple(coords) for coords in ls1.xy] + geom2 = [tuple(coords) for coords in ls2.xy] + + # reverse the first LineString's coordinates' direction + geom1_r = [tuple(reversed(coords)) for coords in ls1.xy] + + # if second geometry matches first in either direction, return True + return geom2 in (geom1, geom1_r) + + +def _update_edge_keys(G: nx.MultiDiGraph) -> nx.MultiDiGraph: + """ + Increment key of one edge of parallel edges that differ in geometry. + + For example, two streets from `u` to `v` that bow away from each other as + separate streets, rather than opposite direction edges of a single street. + Increment one of these edge's keys so that they do not match across + `(u, v, k)` or `(v, u, k)` so we can add both to an undirected MultiGraph. + + Parameters + ---------- + G + Input graph. + + Returns + ------- + G + """ + # identify all the edges that are duplicates based on a sorted combination + # of their origin, destination, and key. that is, edge uv will match edge vu + # as a duplicate, but only if they have the same key + edges = graph_to_gdfs(G, nodes=False, fill_edge_geometry=False) + edges["uvk"] = ["_".join([*sorted([str(u), str(v)]), str(k)]) for u, v, k in edges.index] + mask = edges["uvk"].duplicated(keep=False) + dupes = edges[mask].dropna(subset=["geometry"]) + + different_streets = [] + groups = dupes[["geometry", "uvk"]].groupby("uvk") + + # for each group of duplicate edges + for _, group in groups: + # for each pair of edges within this group + for geom1, geom2 in itertools.combinations(group["geometry"], 2): + # if they don't have the same geometry, flag them as different + # streets: flag edge uvk, but not edge vuk, otherwise we would + # increment both their keys and they'll still duplicate each other + if not _is_same_geometry(geom1, geom2): + different_streets.append(group.index[0]) + + # for each unique different street, increment its key to make it unique + for u, v, k in set(different_streets): + new_key = max(list(G[u][v]) + list(G[v][u])) + 1 + G.add_edge(u, v, key=new_key, **G.get_edge_data(u, v, k)) + G.remove_edge(u, v, key=k) + + return G From 6de9e961f8e2916341725d0670c027bece70be4b Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Wed, 13 Mar 2024 15:54:14 -0700 Subject: [PATCH 03/13] rename get_largest_component, get_undirected, and get_digraph functions --- osmnx/__init__.py | 4 ++-- osmnx/bearing.py | 2 +- osmnx/convert.py | 8 ++++---- osmnx/graph.py | 2 +- osmnx/io.py | 2 +- osmnx/plot.py | 2 +- osmnx/routing.py | 2 +- osmnx/stats.py | 2 +- osmnx/truncate.py | 6 +++--- tests/test_osmnx.py | 8 ++++---- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/osmnx/__init__.py b/osmnx/__init__.py index 502d0082f..596d51912 100644 --- a/osmnx/__init__.py +++ b/osmnx/__init__.py @@ -9,10 +9,10 @@ # by exposing these functions directly in the package's namespace. from .bearing import add_edge_bearings as add_edge_bearings from .bearing import orientation_entropy as orientation_entropy -from .convert import get_digraph as get_digraph -from .convert import get_undirected as get_undirected from .convert import graph_from_gdfs as graph_from_gdfs from .convert import graph_to_gdfs as graph_to_gdfs +from .convert import to_digraph as to_digraph +from .convert import to_undirected as to_undirected from .distance import nearest_edges as nearest_edges from .distance import nearest_nodes as nearest_nodes from .elevation import add_edge_grades as add_edge_grades diff --git a/osmnx/bearing.py b/osmnx/bearing.py index dc7fe18ec..a48703819 100644 --- a/osmnx/bearing.py +++ b/osmnx/bearing.py @@ -226,7 +226,7 @@ def _extract_edge_bearings( msg = ( "`G` is a MultiDiGraph, so edge bearings will be directional (one per " "edge). If you want bidirectional edge bearings (two reciprocal bearings " - "per edge), pass a MultiGraph instead. Use `convert.get_undirected`." + "per edge), pass a MultiGraph instead. Use `convert.to_undirected`." ) warn(msg, category=UserWarning, stacklevel=2) return bearings_array diff --git a/osmnx/convert.py b/osmnx/convert.py index a657b27df..fa26f2ff0 100644 --- a/osmnx/convert.py +++ b/osmnx/convert.py @@ -309,12 +309,12 @@ def graph_from_gdfs( return G -def get_digraph(G: nx.MultiDiGraph, *, weight: str = "length") -> nx.DiGraph: +def to_digraph(G: nx.MultiDiGraph, *, weight: str = "length") -> nx.DiGraph: """ Convert MultiDiGraph to DiGraph. Chooses between parallel edges by minimizing `weight` attribute value. See - also `get_undirected` to convert MultiDiGraph to MultiGraph. + also `to_undirected` to convert MultiDiGraph to MultiGraph. Parameters ---------- @@ -347,12 +347,12 @@ def get_digraph(G: nx.MultiDiGraph, *, weight: str = "length") -> nx.DiGraph: return nx.DiGraph(G) -def get_undirected(G: nx.MultiDiGraph) -> nx.MultiGraph: +def to_undirected(G: nx.MultiDiGraph) -> nx.MultiGraph: """ Convert MultiDiGraph to undirected MultiGraph. Maintains parallel edges only if their geometries differ. See also - `get_digraph` to convert MultiDiGraph to DiGraph. + `to_digraph` to convert MultiDiGraph to DiGraph. Parameters ---------- diff --git a/osmnx/graph.py b/osmnx/graph.py index 9641561c7..3fc07817a 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -621,7 +621,7 @@ def _create_graph( # retain only the largest connected component if retain_all=False if not retain_all: - G = truncate.get_largest_component(G) + G = truncate.largest_component(G) msg = f"Created graph with {len(G):,} nodes and {len(G.edges):,} edges" utils.log(msg, level=lg.INFO) diff --git a/osmnx/io.py b/osmnx/io.py index 866c06b2c..4e37b0f06 100644 --- a/osmnx/io.py +++ b/osmnx/io.py @@ -60,7 +60,7 @@ def save_graph_geopackage( if directed: gdf_nodes, gdf_edges = convert.graph_to_gdfs(G) else: - gdf_nodes, gdf_edges = convert.graph_to_gdfs(convert.get_undirected(G)) + gdf_nodes, gdf_edges = convert.graph_to_gdfs(convert.to_undirected(G)) gdf_nodes = _stringify_nonnumeric_cols(gdf_nodes) gdf_edges = _stringify_nonnumeric_cols(gdf_edges) diff --git a/osmnx/plot.py b/osmnx/plot.py index d4b58a514..965e6eb3f 100644 --- a/osmnx/plot.py +++ b/osmnx/plot.py @@ -510,7 +510,7 @@ def plot_figure_ground( } # we need an undirected graph to find every edge incident on a node - Gu = convert.get_undirected(G) + Gu = convert.to_undirected(G) # for each edge, get a linewidth according to street type edge_linewidths = [] diff --git a/osmnx/routing.py b/osmnx/routing.py index 25764d7db..493ecbb61 100644 --- a/osmnx/routing.py +++ b/osmnx/routing.py @@ -254,7 +254,7 @@ def k_shortest_paths( """ _verify_edge_attribute(G, weight) paths_gen = nx.shortest_simple_paths( - G=convert.get_digraph(G, weight=weight), + G=convert.to_digraph(G, weight=weight), source=orig, target=dest, weight=weight, diff --git a/osmnx/stats.py b/osmnx/stats.py index f2e5f83c0..14115a46c 100644 --- a/osmnx/stats.py +++ b/osmnx/stats.py @@ -372,7 +372,7 @@ def basic_stats( - `streets_per_node_counts` - see `streets_per_node_counts` function documentation - `streets_per_node_proportions` - see `streets_per_node_proportions` function documentation """ - Gu = convert.get_undirected(G) + Gu = convert.to_undirected(G) stats: dict[str, Any] = {} stats["n"] = len(G.nodes) diff --git a/osmnx/truncate.py b/osmnx/truncate.py index b8b59b4cd..0c86732ec 100644 --- a/osmnx/truncate.py +++ b/osmnx/truncate.py @@ -65,7 +65,7 @@ def truncate_graph_dist( # remove any isolated nodes and retain only the largest component (if # retain_all is True) if not retain_all: - G = get_largest_component(remove_isolated_nodes(G)) + G = largest_component(remove_isolated_nodes(G)) msg = f"Truncated graph by {weight}-weighted network distance" utils.log(msg, level=lg.INFO) @@ -174,7 +174,7 @@ def truncate_graph_polygon( utils.log(msg, level=lg.INFO) if not retain_all: - G = get_largest_component(remove_isolated_nodes(G)) + G = largest_component(remove_isolated_nodes(G)) msg = "Truncated graph by polygon" utils.log(msg, level=lg.INFO) @@ -207,7 +207,7 @@ def remove_isolated_nodes(G: nx.MultiDiGraph) -> nx.MultiDiGraph: return G -def get_largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.MultiDiGraph: +def largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.MultiDiGraph: """ Return subgraph of `G`'s largest weakly or strongly connected component. diff --git a/tests/test_osmnx.py b/tests/test_osmnx.py index e7b7fdbe0..75f43c9a9 100644 --- a/tests/test_osmnx.py +++ b/tests/test_osmnx.py @@ -141,7 +141,7 @@ def test_bearings() -> None: G_proj = ox.project_graph(G) # calculate entropy - Gu = ox.get_undirected(G) + Gu = ox.to_undirected(G) entropy = ox.bearing.orientation_entropy(Gu, weight="length") fig, ax = ox.plot.plot_orientation(Gu, area=True, title="Title") fig, ax = ox.plot.plot_orientation(Gu, ax=ax, area=False, title="Title") @@ -157,7 +157,7 @@ def test_bearings() -> None: bearings = ox.bearing._extract_edge_bearings(G, min_length=0, weight=None) assert list(bearings) == [0.0] # north bearings = ox.bearing._extract_edge_bearings( - ox.convert.get_undirected(G), + ox.convert.to_undirected(G), min_length=0, weight=None, ) @@ -371,7 +371,7 @@ def test_nearest() -> None: # get graph and x/y coords to search G = ox.graph_from_point(location_point, dist=500, network_type="drive", simplify=False) Gp = ox.project_graph(G) - points = ox.utils_geo.sample_points(ox.get_undirected(Gp), 5) + points = ox.utils_geo.sample_points(ox.to_undirected(Gp), 5) X = points.x.to_numpy() Y = points.y.to_numpy() @@ -565,7 +565,7 @@ def test_graph_from() -> None: # truncate graph by bounding box bbox = ox.utils_geo.bbox_from_point(location_point, dist=400) G = ox.truncate.truncate_graph_bbox(G, bbox) - G = ox.truncate.get_largest_component(G, strongly=True) + G = ox.truncate.largest_component(G, strongly=True) # graph from address G = ox.graph_from_address(address=address, dist=500, dist_type="bbox", network_type="bike") From a5d915e61b1fb0ad4d7bd5e286a6a73c8515da87 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Wed, 13 Mar 2024 17:20:56 -0700 Subject: [PATCH 04/13] update docstring --- osmnx/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osmnx/convert.py b/osmnx/convert.py index fa26f2ff0..6cde96038 100644 --- a/osmnx/convert.py +++ b/osmnx/convert.py @@ -1,4 +1,4 @@ -"""Convert NetworkX spatial graphs to/from different data types.""" +"""Convert spatial graphs to/from different data types.""" from __future__ import annotations From 67070677d40268ebf29b457452575b4e97a4288d Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Wed, 13 Mar 2024 17:31:35 -0700 Subject: [PATCH 05/13] update changelog --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4d75584c..e79b5dda8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,15 @@ Read the v2 [migration guide](https://github.com/gboeing/osmnx/issues/1123) - deprecate settings module's renamed or obsolete settings (#1138) - deprecate save_graph_xml function's renamed or obsolete parameters (#1138) +- deprecate simplify_graph function's renamed endpoint_attrs argument (#1146) +- deprecate utils_graph.get_digraph function and replace it with covert.to_digraph function (#1146) +- deprecate utils_graph.get_undirected function and replace it with covert.to_undirected function (#1146) +- deprecate utils_graph.graph_to_gdfs function and replace it with covert.graph_to_gdfs function (#1146) +- deprecate utils_graph.graph_from_gdfs function and replace it with covert.graph_from_gdfs function (#1146) +- deprecate utils_graph.remove_isolated_nodes function and replace it with truncate.remove_isolated_nodes function (#1146) +- deprecate utils_graph.get_largest_component function and replace it with truncate.largest_component function (#1146) +- deprecate utils_graph.route_to_gdf function and replace it with routing.route_to_gdf function (#1146) +- deprecate speed module and move all of its functionality to the routing module (#1146) ## 1.9.1 (2024-02-01) From e2d649a2ac079ffa0eda1143dd94f79125f97603 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 13:50:18 -0700 Subject: [PATCH 06/13] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e79b5dda8..23b2e99e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ Read the v2 [migration guide](https://github.com/gboeing/osmnx/issues/1123) - deprecate settings module's renamed or obsolete settings (#1138) - deprecate save_graph_xml function's renamed or obsolete parameters (#1138) +- deprecate graph_from_xml tags and polygon function parameters (#1146) - deprecate simplify_graph function's renamed endpoint_attrs argument (#1146) - deprecate utils_graph.get_digraph function and replace it with covert.to_digraph function (#1146) - deprecate utils_graph.get_undirected function and replace it with covert.to_undirected function (#1146) From 2f057acf9fe45fa0266c1d245a08789d5ea129b3 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 13:53:54 -0700 Subject: [PATCH 07/13] clean up package level namespace --- osmnx/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/osmnx/__init__.py b/osmnx/__init__.py index 596d51912..0b6e82aea 100644 --- a/osmnx/__init__.py +++ b/osmnx/__init__.py @@ -11,8 +11,6 @@ from .bearing import orientation_entropy as orientation_entropy from .convert import graph_from_gdfs as graph_from_gdfs from .convert import graph_to_gdfs as graph_to_gdfs -from .convert import to_digraph as to_digraph -from .convert import to_undirected as to_undirected from .distance import nearest_edges as nearest_edges from .distance import nearest_nodes as nearest_nodes from .elevation import add_edge_grades as add_edge_grades @@ -42,7 +40,6 @@ from .plot import plot_graph_route as plot_graph_route from .plot import plot_graph_routes as plot_graph_routes from .plot import plot_orientation as plot_orientation -from .projection import project_gdf as project_gdf from .projection import project_graph as project_graph from .routing import add_edge_speeds as add_edge_speeds from .routing import add_edge_travel_times as add_edge_travel_times From 633d7087a5689f48fa8338757d2eb271d3db7a7d Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 13:57:56 -0700 Subject: [PATCH 08/13] fix function calls --- tests/test_osmnx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_osmnx.py b/tests/test_osmnx.py index 75f43c9a9..5f7f025eb 100644 --- a/tests/test_osmnx.py +++ b/tests/test_osmnx.py @@ -88,7 +88,7 @@ def test_geocoder() -> None: city = ox.geocode_to_gdf("R2999176", by_osmid=True) city = ox.geocode_to_gdf(place1, which_result=1) city = ox.geocode_to_gdf(place2) - city_projected = ox.project_gdf(city, to_crs="epsg:3395") + city_projected = ox.projection.project_gdf(city, to_crs="epsg:3395") # test geocoding a bad query: should raise exception with pytest.raises(ox._errors.InsufficientResponseError): @@ -141,7 +141,7 @@ def test_bearings() -> None: G_proj = ox.project_graph(G) # calculate entropy - Gu = ox.to_undirected(G) + Gu = ox.convert.to_undirected(G) entropy = ox.bearing.orientation_entropy(Gu, weight="length") fig, ax = ox.plot.plot_orientation(Gu, area=True, title="Title") fig, ax = ox.plot.plot_orientation(Gu, ax=ax, area=False, title="Title") @@ -371,7 +371,7 @@ def test_nearest() -> None: # get graph and x/y coords to search G = ox.graph_from_point(location_point, dist=500, network_type="drive", simplify=False) Gp = ox.project_graph(G) - points = ox.utils_geo.sample_points(ox.to_undirected(Gp), 5) + points = ox.utils_geo.sample_points(ox.convert.to_undirected(Gp), 5) X = points.x.to_numpy() Y = points.y.to_numpy() From d9fecf749c0cb414084936b035e0a7dd1040b63d Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 16:47:18 -0700 Subject: [PATCH 09/13] clean up truncate module --- osmnx/truncate.py | 68 +++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/osmnx/truncate.py b/osmnx/truncate.py index 0c86732ec..591158e03 100644 --- a/osmnx/truncate.py +++ b/osmnx/truncate.py @@ -25,10 +25,10 @@ def truncate_graph_dist( retain_all: bool = False, ) -> nx.MultiDiGraph: """ - Remove every node farther than some network distance from `source_node`. + Remove from a graph every node beyond some network distance from a node. - This function can be slow for large graphs, as it must calculate shortest - path distances between `source_node` and every other graph node. + This function must calculate shortest path distances between `source_node` + and every other graph node, which can be slow on large graphs. Parameters ---------- @@ -62,8 +62,7 @@ def truncate_graph_dist( G = G.copy() G.remove_nodes_from(distant_nodes | unreachable_nodes) - # remove any isolated nodes and retain only the largest component (if - # retain_all is True) + # keep only the largest weakly connected component if retain_all is False if not retain_all: G = largest_component(remove_isolated_nodes(G)) @@ -80,7 +79,7 @@ def truncate_graph_bbox( retain_all: bool = False, ) -> nx.MultiDiGraph: """ - Remove every node in graph that falls outside a bounding box. + Remove from a graph every node that falls outside a bounding box. Parameters ---------- @@ -117,7 +116,7 @@ def truncate_graph_polygon( truncate_by_edge: bool = False, ) -> nx.MultiDiGraph: """ - Remove every node in graph that falls outside a (Multi)Polygon. + Remove from a graph every node that falls outside a (Multi)Polygon. Parameters ---------- @@ -173,6 +172,7 @@ def truncate_graph_polygon( msg = f"Removed {len(nodes_to_remove):,} nodes outside polygon" utils.log(msg, level=lg.INFO) + # keep only the largest weakly connected component if retain_all is False if not retain_all: G = largest_component(remove_isolated_nodes(G)) @@ -181,35 +181,9 @@ def truncate_graph_polygon( return G -def remove_isolated_nodes(G: nx.MultiDiGraph) -> nx.MultiDiGraph: - """ - Remove from a graph all nodes that have no incident edges. - - Parameters - ---------- - G - Graph from which to remove isolated nodes. - - Returns - ------- - G - Graph with all isolated nodes removed. - """ - # make a copy to not mutate original graph object caller passed in - G = G.copy() - - # get the set of all isolated nodes, then remove them - isolated_nodes = {node for node, degree in G.degree() if degree < 1} - G.remove_nodes_from(isolated_nodes) - - msg = f"Removed {len(isolated_nodes):,} isolated nodes" - utils.log(msg, level=lg.INFO) - return G - - def largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.MultiDiGraph: """ - Return subgraph of `G`'s largest weakly or strongly connected component. + Return `G`'s largest weakly or strongly connected component as a graph. Parameters ---------- @@ -245,3 +219,29 @@ def largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.Multi utils.log(msg, level=lg.INFO) return G + + +def remove_isolated_nodes(G: nx.MultiDiGraph) -> nx.MultiDiGraph: + """ + Remove from a graph all 0-degree nodes (i.e., no incident edges). + + Parameters + ---------- + G + Graph from which to remove 0-degree nodes. + + Returns + ------- + G + Graph with all 0-degree nodes removed. + """ + # make a copy to not mutate original graph object caller passed in + G = G.copy() + + # get the set of all 0-degree nodes, then remove them + isolated_nodes = {node for node, degree in G.degree() if degree < 1} + G.remove_nodes_from(isolated_nodes) + + msg = f"Removed {len(isolated_nodes):,} 0-degree nodes" + utils.log(msg, level=lg.INFO) + return G From 7384a3a767b2715aa0ac0aa167581cb89a10bec5 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 17:17:51 -0700 Subject: [PATCH 10/13] streamline truncate module and how retain_all param works throughout --- osmnx/graph.py | 37 ++++++++++++++++++------------------- osmnx/truncate.py | 24 ++---------------------- 2 files changed, 20 insertions(+), 41 deletions(-) diff --git a/osmnx/graph.py b/osmnx/graph.py index 3fc07817a..c52afbcf8 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -456,18 +456,17 @@ def graph_from_polygon( # create buffered graph from the downloaded data bidirectional = network_type in settings.bidirectional_network_types - G_buff = _create_graph(response_jsons, retain_all, bidirectional) + G_buff = _create_graph(response_jsons, bidirectional) # truncate buffered graph to the buffered polygon and retain_all for # now. needed because overpass returns entire ways that also include # nodes outside the poly if the way (that is, a way with a single OSM # ID) has a node inside the poly at some point. - G_buff = truncate.truncate_graph_polygon( - G_buff, - poly_buff, - retain_all=True, - truncate_by_edge=truncate_by_edge, - ) + G_buff = truncate.truncate_graph_polygon(G_buff, poly_buff, truncate_by_edge=truncate_by_edge) + + # keep only the largest weakly connected component if retain_all is False + if not retain_all: + G_buff = truncate.largest_component(G_buff, strongly=False) # simplify the graph topology if simplify: @@ -478,12 +477,13 @@ def graph_from_polygon( # intersections along the street that may now only connect 2 street # segments in the network, but in reality also connect to an # intersection just outside the polygon - G = truncate.truncate_graph_polygon( - G_buff, - polygon, - retain_all=retain_all, - truncate_by_edge=truncate_by_edge, - ) + G = truncate.truncate_graph_polygon(G_buff, polygon, truncate_by_edge=truncate_by_edge) + + # keep only the largest weakly connected component if retain_all is False + # we're doing this again in case the last truncate disconnected anything + # on the periphery + if not retain_all: + G = truncate.largest_component(G, strongly=False) # count how many physical streets in buffered graph connect to each # intersection in un-buffered graph, to retain true counts for each @@ -538,7 +538,11 @@ def graph_from_xml( response_jsons = [_osm_xml._overpass_json_from_xml(filepath, encoding)] # create graph using this response JSON - G = _create_graph(response_jsons, retain_all, bidirectional) + G = _create_graph(response_jsons, bidirectional) + + # keep only the largest weakly connected component if retain_all is False + if not retain_all: + G = truncate.largest_component(G, strongly=False) # simplify the graph topology as the last step if simplify: @@ -551,7 +555,6 @@ def graph_from_xml( def _create_graph( response_jsons: Iterable[dict[str, Any]], - retain_all: bool, # noqa: FBT001 bidirectional: bool, # noqa: FBT001 ) -> nx.MultiDiGraph: """ @@ -619,10 +622,6 @@ def _create_graph( G.add_nodes_from(nodes.items()) _add_paths(G, paths.values(), bidirectional) - # retain only the largest connected component if retain_all=False - if not retain_all: - G = truncate.largest_component(G) - msg = f"Created graph with {len(G):,} nodes and {len(G.edges):,} edges" utils.log(msg, level=lg.INFO) diff --git a/osmnx/truncate.py b/osmnx/truncate.py index 591158e03..eeeb8eec4 100644 --- a/osmnx/truncate.py +++ b/osmnx/truncate.py @@ -22,7 +22,6 @@ def truncate_graph_dist( dist: float, *, weight: str = "length", - retain_all: bool = False, ) -> nx.MultiDiGraph: """ Remove from a graph every node beyond some network distance from a node. @@ -42,9 +41,6 @@ def truncate_graph_dist( `source_node`. weight Graph edge attribute to use to measure distance. - retain_all - If True, return the entire graph even if it is not connected. - Otherwise, retain only the largest weakly connected component. Returns ------- @@ -62,10 +58,6 @@ def truncate_graph_dist( G = G.copy() G.remove_nodes_from(distant_nodes | unreachable_nodes) - # keep only the largest weakly connected component if retain_all is False - if not retain_all: - G = largest_component(remove_isolated_nodes(G)) - msg = f"Truncated graph by {weight}-weighted network distance" utils.log(msg, level=lg.INFO) return G @@ -76,7 +68,6 @@ def truncate_graph_bbox( bbox: tuple[float, float, float, float], *, truncate_by_edge: bool = False, - retain_all: bool = False, ) -> nx.MultiDiGraph: """ Remove from a graph every node that falls outside a bounding box. @@ -90,9 +81,6 @@ def truncate_graph_bbox( truncate_by_edge If True, retain nodes outside bounding box if at least one of node's neighbors is within the bounding box. - retain_all - If True, return the entire graph even if it is not connected. - Otherwise, retain only the largest weakly connected component. Returns ------- @@ -101,7 +89,7 @@ def truncate_graph_bbox( """ # convert bounding box to a polygon, then truncate polygon = utils_geo.bbox_to_poly(bbox=bbox) - G = truncate_graph_polygon(G, polygon, retain_all=retain_all, truncate_by_edge=truncate_by_edge) + G = truncate_graph_polygon(G, polygon, truncate_by_edge=truncate_by_edge) msg = "Truncated graph by bounding box" utils.log(msg, level=lg.INFO) @@ -112,7 +100,6 @@ def truncate_graph_polygon( G: nx.MultiDiGraph, polygon: Polygon | MultiPolygon, *, - retain_all: bool = False, truncate_by_edge: bool = False, ) -> nx.MultiDiGraph: """ @@ -124,9 +111,6 @@ def truncate_graph_polygon( Input graph. polygon Only retain nodes in graph that lie within this geometry. - retain_all - If True, return the entire graph even if it is not connected. - Otherwise, retain only the largest weakly connected component. truncate_by_edge If True, retain nodes outside boundary polygon if at least one of node's neighbors is within the polygon. @@ -172,10 +156,6 @@ def truncate_graph_polygon( msg = f"Removed {len(nodes_to_remove):,} nodes outside polygon" utils.log(msg, level=lg.INFO) - # keep only the largest weakly connected component if retain_all is False - if not retain_all: - G = largest_component(remove_isolated_nodes(G)) - msg = "Truncated graph by polygon" utils.log(msg, level=lg.INFO) return G @@ -223,7 +203,7 @@ def largest_component(G: nx.MultiDiGraph, *, strongly: bool = False) -> nx.Multi def remove_isolated_nodes(G: nx.MultiDiGraph) -> nx.MultiDiGraph: """ - Remove from a graph all 0-degree nodes (i.e., no incident edges). + Remove from a graph all 0-degree nodes (i.e., with no incident edges). Parameters ---------- From 6da75cc914934aede0f0182f2419c56e0431e690 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 17:26:20 -0700 Subject: [PATCH 11/13] handle retain_all if graph from point by network dist --- osmnx/graph.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/osmnx/graph.py b/osmnx/graph.py index c52afbcf8..15f35ee8d 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -143,10 +143,10 @@ def graph_from_point( Retain only those nodes within this many meters of `center_point`, measuring distance according to `dist_type`. dist_type - {"network", "bbox"} - If "bbox", retain only those nodes within a bounding box of `dist`. If - "network", retain only those nodes within `dist` network distance from - the centermost node. + {"bbox", "network"} + If "bbox", retain only those nodes within a bounding box of `dist` + length/width. If "network", retain only those nodes within `dist` + network distance of the nearest node to `center_point`. network_type {"all_private", "all", "bike", "drive", "drive_service", "walk"} What type of street network to retrieve if `custom_filter` is None. @@ -192,11 +192,14 @@ def graph_from_point( ) if dist_type == "network": - # if dist_type is network, find node in graph nearest to center point - # then truncate graph by network dist from it + # find node nearest to center then truncate graph by dist from it node = distance.nearest_nodes(G, X=center_point[1], Y=center_point[0]) G = truncate.truncate_graph_dist(G, node, dist) + # keep only the largest weakly connected component if retain_all is False + if not retain_all: + G = truncate.largest_component(G, strongly=False) + msg = f"graph_from_point returned graph with {len(G):,} nodes and {len(G.edges):,} edges" utils.log(msg, level=lg.INFO) return G From 0da3702eb2bc22a88d8a3a7521c1c8f3ee294ef4 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Thu, 14 Mar 2024 17:33:09 -0700 Subject: [PATCH 12/13] update changelog --- CHANGELOG.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23b2e99e8..18c012a2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,15 +5,13 @@ Read the v2 [migration guide](https://github.com/gboeing/osmnx/issues/1123) - add type annotations to all public and private functions throughout package (#1107) -- remove all functionality previously deprecated in v1 (#1113 #1122) +- remove all functionality previously deprecated in v1 (#1113 #1122 #1135 #1148) - drop Python 3.8 support (#1106) - bump minimum required numpy version to 1.21 for typing support (#1133) - improve docstrings throughout package (#1116) - improve logging and warnings throughout package (#1125) - improve error messages throughout package (#1131) - refactor save_graph_xml function and \_osm_xml module for a >5x speed improvement and bug fixes (#1135) -- remove settings module's osm_xml_node_attrs, osm_xml_node_tags, osm_xml_way_attrs, and osm_xml_way_tags settings (#1135) -- remove save_graph_xml function's node_tags, node_attrs, edge_tags, edge_attrs, merge_edges, oneway, api_version, and precision parameters (#1135) - make save_graph_xml function accept only an unsimplified MultiDiGraph as its input data (#1135) - replace save_graph_xml function's edge_tag_aggs tuple parameter with way_tag_aggs dict parameter (#1135) - make consolidate_intersections function retain unique attribute values when consolidating nodes (#1144) @@ -31,6 +29,7 @@ Read the v2 [migration guide](https://github.com/gboeing/osmnx/issues/1123) - add node_attrs_include argument to simplification.simplify_graph function to flexibly relax strictness (#1145) - rename simplification.simplify_graph endpoint_attrs argument to edge_attrs_differ (#1145) - rename truncate.truncate_graph_dist max_dist argument to dist for consistency with rest of package (#1134) +- remove retain_all argument from all truncate module functions (#1148) - rename settings module's default_accept_language, default_referer, and default_user_agent settings (#1129) - rename settings module's memory, nominatim_endpoint, overpass_endpoint, and timeout settings (#1136) - rename osm_xml module to \_osm_xml to make it private, as all its functions are private (#1113) From 550598d0d45c55ad9a322bcba2493bc0511c7202 Mon Sep 17 00:00:00 2001 From: Geoff Boeing Date: Fri, 15 Mar 2024 07:50:44 -0700 Subject: [PATCH 13/13] remove unnecessary truncate in graph from point by network dist --- osmnx/graph.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/osmnx/graph.py b/osmnx/graph.py index 15f35ee8d..994f3da58 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -196,10 +196,6 @@ def graph_from_point( node = distance.nearest_nodes(G, X=center_point[1], Y=center_point[0]) G = truncate.truncate_graph_dist(G, node, dist) - # keep only the largest weakly connected component if retain_all is False - if not retain_all: - G = truncate.largest_component(G, strongly=False) - msg = f"graph_from_point returned graph with {len(G):,} nodes and {len(G.edges):,} edges" utils.log(msg, level=lg.INFO) return G