diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a3696b194..c0cee44dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: - id: validate-pyproject - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.2.1" + rev: "v0.2.2" hooks: - id: ruff args: [--fix] diff --git a/CHANGELOG.md b/CHANGELOG.md index bee790c52..274079d8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ Read the v2 [migration guide](https://github.com/gboeing/osmnx/issues/1123) - improve docstrings throughout package (#1116) - improve logging and warnings throughout package (#1125) - improve error messages throughout package (#1131) +- refactor save_graph_xml function and \_osm_xml module for a >5x speed improvement and bug fixes (#1135) +- remove settings module's osm_xml_node_attrs, osm_xml_node_tags, osm_xml_way_attrs, and osm_xml_way_tags settings (#1135) +- remove save_graph_xml function's node_tags, node_attrs, edge_tags, edge_attrs, merge_edges, oneway, api_version, and precision parameters (#1135) +- make save_graph_xml function accept only an unsimplified MultiDiGraph as its input data (#1135) +- replace save_graph_xml function's edge_tag_aggs tuple parameter with way_tag_aggs dict parameter (#1135) - make optional function parameters keyword-only throughout package (#1134) - make dist function parameters required rather than optional throughout package (#1134) - make which_result function parameter consistently able to accept a list throughout package (#1113) diff --git a/environments/docker/requirements.txt b/environments/docker/requirements.txt index b26c34c57..bc16ca656 100644 --- a/environments/docker/requirements.txt +++ b/environments/docker/requirements.txt @@ -32,6 +32,7 @@ typeguard types-requests # linting/testing +lxml nbdime nbqa pre-commit diff --git a/osmnx/_osm_xml.py b/osmnx/_osm_xml.py index 447b277c8..26fb5bdab 100644 --- a/osmnx/_osm_xml.py +++ b/osmnx/_osm_xml.py @@ -1,10 +1,13 @@ -"""Read/write .osm formatted XML files.""" +""" +Read/write OSM XML files. + +For file format information see https://wiki.openstreetmap.org/wiki/OSM_XML +""" from __future__ import annotations import bz2 import logging as lg -import xml.sax from pathlib import Path from typing import TYPE_CHECKING from typing import Any @@ -14,21 +17,47 @@ from xml.etree.ElementTree import ElementTree from xml.etree.ElementTree import SubElement from xml.etree.ElementTree import parse as etree_parse +from xml.sax import parse as sax_parse +from xml.sax.handler import ContentHandler import networkx as nx -import numpy as np +import pandas as pd from . import settings from . import utils from . import utils_graph -from ._version import __version__ +from ._errors import GraphSimplificationError +from ._version import __version__ as osmnx_version if TYPE_CHECKING: + from xml.sax.xmlreader import AttributesImpl + import geopandas as gpd - import pandas as pd -class _OSMContentHandler(xml.sax.handler.ContentHandler): +# default values for standard "node" and "way" XML subelement attributes +# see: https://wiki.openstreetmap.org/wiki/Elements#Common_attributes +ATTR_DEFAULTS = { + "changeset": "1", + "timestamp": utils.ts(style="iso8601"), + "uid": "1", + "user": "OSMnx", + "version": "1", + "visible": "true", +} + +# default values for standard "osm" root XML element attributes +# current OSM editing API version: https://wiki.openstreetmap.org/wiki/API +ROOT_ATTR_DEFAULTS = { + "attribution": "https://www.openstreetmap.org/copyright", + "copyright": "OpenStreetMap and contributors", + "generator": f"OSMnx {osmnx_version}", + "license": "https://opendatacommons.org/licenses/odbl/1-0/", + "version": "0.6", +} + + +class _OSMContentHandler(ContentHandler): """ SAX content handler for OSM XML. @@ -41,22 +70,22 @@ def __init__(self) -> None: # noqa: ANN101 self._element: dict[str, Any] | None = None self.object: dict[str, Any] = {"elements": []} - def startElement(self, name: str, attrs: xml.sax.xmlreader.AttributesImpl) -> None: # noqa: ANN101,N802 + def startElement(self, name: str, attrs: AttributesImpl) -> None: # noqa: ANN101,N802 + # identify node/way/relation attrs to convert from string to numeric + float_attrs = {"lat", "lon"} + int_attrs = {"changeset", "id", "uid", "version"} + if name == "osm": - self.object.update({k: v for k, v in attrs.items() if k in {"version", "generator"}}) + self.object.update({k: v for k, v in attrs.items() if k in ROOT_ATTR_DEFAULTS}) elif name in {"node", "way"}: self._element = dict(type=name, tags={}, nodes=[], **attrs) - self._element.update({k: float(v) for k, v in attrs.items() if k in {"lat", "lon"}}) - self._element.update( - {k: int(v) for k, v in attrs.items() if k in {"id", "uid", "version", "changeset"}}, - ) + self._element.update({k: float(v) for k, v in attrs.items() if k in float_attrs}) + self._element.update({k: int(v) for k, v in attrs.items() if k in int_attrs}) elif name == "relation": self._element = dict(type=name, tags={}, members=[], **attrs) - self._element.update( - {k: int(v) for k, v in attrs.items() if k in {"id", "uid", "version", "changeset"}}, - ) + self._element.update({k: int(v) for k, v in attrs.items() if k in int_attrs}) elif name == "tag": self._element["tags"].update({attrs["k"]: attrs["v"]}) # type: ignore[index] @@ -74,9 +103,9 @@ def endElement(self, name: str) -> None: # noqa: ANN101,N802 self.object["elements"].append(self._element) -def _overpass_json_from_file(filepath: str | Path, encoding: str) -> dict[str, Any]: +def _overpass_json_from_xml(filepath: str | Path, encoding: str) -> dict[str, Any]: """ - Read OSM XML from file and return Overpass-like JSON. + Read OSM XML data from file and return Overpass-like JSON. Parameters ---------- @@ -115,408 +144,264 @@ def _opener(filepath: Path, encoding: str) -> TextIO: # parse the XML to Overpass-like JSON with _opener(Path(filepath), encoding) as f: handler = _OSMContentHandler() - xml.sax.parse(f, handler) # noqa: S317 + sax_parse(f, handler) # noqa: S317 return handler.object -def _save_graph_xml( # noqa: PLR0913 - data: nx.MultiDiGraph | tuple[gpd.GeoDataFrame, gpd.GeoDataFrame], +def _save_graph_xml( + G: nx.MultiDiGraph, filepath: str | Path | None, - node_tags: list[str], - node_attrs: list[str], - edge_tags: list[str], - edge_attrs: list[str], - oneway: bool, # noqa: FBT001 - merge_edges: bool, # noqa: FBT001 - edge_tag_aggs: list[tuple[str, str]] | None, - api_version: str, - precision: int, + way_tag_aggs: dict[str, Any] | None, + encoding: str = "utf-8", ) -> None: """ - Save graph to disk as an OSM-formatted XML .osm file. + Save graph to disk as an OSM XML file. Parameters ---------- - data - Either a MultiDiGraph or (gdf_nodes, gdf_edges) tuple. + G + Unsimplified graph to save as an OSM XML file. filepath - Path to the .osm file including extension. If None, use default + Path to the saved file including extension. If None, use default `settings.data_folder/graph.osm`. - node_tags - OSM node tags to include in output OSM XML. - node_attrs - OSM node attributes to include in output OSM XML. - edge_tags - OSM way tags to include in output OSM XML. - edge_attrs - OSM way attributes to include in output OSM XML. - oneway - The default oneway value used to fill this tag where missing. - merge_edges - If True, merge graph edges such that each OSM way has one entry and - one entry only in the OSM XML. Otherwise, every OSM way will have a - separate entry for each node pair it contains. - edge_tag_aggs - Useful only if `merge_edges` is True, this argument allows the user to - specify edge attributes to aggregate such that the merged OSM way - entry tags accurately represent the sum total of their component edge - attributes. For example, if the user wants the OSM way to have a - "length" attribute, the user must specify - `edge_tag_aggs=[('length', 'sum')]` in order to tell this function to - aggregate the lengths of the individual component edges. Otherwise, - the length attribute will simply reflect the length of the first edge - associated with the way. - api_version - OpenStreetMap API version to save in the XML file header. - precision - Number of decimal places to round latitude and longitude values. + way_tag_aggs + Keys are OSM way tag keys and values are aggregation functions + (anything accepted as an argument by pandas.agg). Allows user to + aggregate graph edge attribute values into single OSM way values. If + None, or if some tag's key does not exist in the dict, the way + attribute will be assigned the value of the first edge of the way. + encoding + The character encoding of the saved OSM XML file. Returns ------- None """ - # default filepath if none was provided - filepath = Path(settings.data_folder) / "graph.osm" if filepath is None else Path(filepath) + # default "oneway" value used to fill this tag where missing + ONEWAY = False - # if save folder does not already exist, create it - filepath.parent.mkdir(parents=True, exist_ok=True) + # round lat/lon coordinates to 7 decimals (approx 5 to 10 mm resolution) + PRECISION = 7 + # warn user if ox.settings.all_oneway is not currently True (but maybe it + # was when they created the graph) if not settings.all_oneway: - msg = ( - "For the `save_graph_xml` function to behave properly, the graph " - "must have been created with `ox.settings.all_oneway=True`." - ) + msg = "Make sure graph was created with `ox.settings.all_oneway=True` to save as OSM XML." warn(msg, category=UserWarning, stacklevel=2) - if isinstance(data, nx.MultiDiGraph): - gdf_nodes, gdf_edges = utils_graph.graph_to_gdfs( - data, - node_geometry=False, - fill_edge_geometry=False, - ) - elif isinstance(data, tuple): - gdf_nodes, gdf_edges = data - else: - msg = "`data` must be a MultiDiGraph or a tuple of node/edge GeoDataFrames." - raise TypeError(msg) - - # rename columns per osm specification - gdf_nodes = gdf_nodes.rename(columns={"x": "lon", "y": "lat"}) - gdf_nodes["lon"] = gdf_nodes["lon"].round(precision) - gdf_nodes["lat"] = gdf_nodes["lat"].round(precision) - gdf_nodes = gdf_nodes.reset_index().rename(columns={"osmid": "id"}) - if "id" in gdf_edges.columns: - gdf_edges = gdf_edges[[col for col in gdf_edges if col != "id"]] - if "uniqueid" in gdf_edges.columns: - gdf_edges = gdf_edges.rename(columns={"uniqueid": "id"}) - else: - gdf_edges = gdf_edges.reset_index().reset_index().rename(columns={"index": "id"}) - - # add default values for required attributes - for table in (gdf_nodes, gdf_edges): - table["uid"] = "1" - table["user"] = "OSMnx" - table["version"] = "1" - table["changeset"] = "1" - table["timestamp"] = utils.ts(template="{:%Y-%m-%dT%H:%M:%SZ}") - - # string replacement to meet OSM XML spec + # raise error if graph has been simplified + if G.graph.get("simplified", False): + msg = "Graph must be unsimplified to save as OSM XML." + raise GraphSimplificationError(msg) + + # set default filepath if None was provided + filepath = Path(settings.data_folder) / "graph.osm" if filepath is None else Path(filepath) + filepath.parent.mkdir(parents=True, exist_ok=True) + + # convert graph to node/edge gdfs and create dict of spatial bounds + gdf_nodes, gdf_edges = utils_graph.graph_to_gdfs(G, fill_edge_geometry=False) + coords = [str(round(c, PRECISION)) for c in gdf_nodes.unary_union.bounds] + bounds = dict(zip(["minlon", "minlat", "maxlon", "maxlat"], coords)) + + # add default values (if missing) for standard attrs + for gdf in (gdf_nodes, gdf_edges): + for col, value in ATTR_DEFAULTS.items(): + if col not in gdf.columns: + gdf[col] = value + else: + gdf[col] = gdf[col].fillna(value) + + # transform nodes gdf to meet OSM XML spec + # 1) reset index (osmid) then rename osmid, x, and y columns + # 2) round lat/lon coordinates + # 3) drop unnecessary geometry column + gdf_nodes = gdf_nodes.reset_index().rename(columns={"osmid": "id", "x": "lon", "y": "lat"}) + gdf_nodes[["lon", "lat"]] = gdf_nodes[["lon", "lat"]].round(PRECISION) + gdf_nodes = gdf_nodes.drop(columns=["geometry"]) + + # transform edges gdf to meet OSM XML spec + # 1) fill and convert oneway bools to strings + # 2) rename osmid column (but keep (u, v, k) index for processing) + # 3) drop unnecessary geometry column if "oneway" in gdf_edges.columns: - gdf_edges["oneway"] = gdf_edges["oneway"].fillna(oneway).replace({True: "yes", False: "no"}) - - # initialize XML tree with an OSM root element then append nodes/edges - root = Element("osm", attrib={"version": api_version, "generator": f"OSMnx {__version__}"}) - root = _append_nodes_xml_tree(root, gdf_nodes, node_attrs, node_tags) - root = _append_edges_xml_tree( - root, - gdf_edges, - edge_attrs, - edge_tags, - edge_tag_aggs, - merge_edges, - ) + gdf_edges["oneway"] = gdf_edges["oneway"].fillna(ONEWAY).replace({True: "yes", False: "no"}) + gdf_edges = gdf_edges.rename(columns={"osmid": "id"}).drop(columns=["geometry"]) + + # create parent XML element then add bounds, nodes, ways as subelements + element = Element("osm", attrib=ROOT_ATTR_DEFAULTS) + _ = SubElement(element, "bounds", attrib=bounds) + _add_nodes_xml(element, gdf_nodes) + _add_ways_xml(element, gdf_edges, way_tag_aggs) # write to disk - ElementTree(root).write(filepath, encoding="utf-8", xml_declaration=True) - msg = f"Saved graph as .osm file at {filepath!r}" + ElementTree(element).write(filepath, encoding=encoding, xml_declaration=True) + msg = f"Saved graph as OSM XML file at {str(filepath)!r}" utils.log(msg, level=lg.INFO) -def _append_nodes_xml_tree( - root: Element, +def _add_nodes_xml( + parent: Element, gdf_nodes: gpd.GeoDataFrame, - node_attrs: list[str], - node_tags: list[str], -) -> Element: +) -> None: """ - Append nodes to an XML tree. + Add graph nodes as subelements of an XML parent element. Parameters ---------- - root - The XML tree. + parent + The XML parent element. gdf_nodes A GeoDataFrame of graph nodes. - node_attrs - OSM way attributes to include in output OSM XML. - node_tags - OSM way tags to include in output OSM XML. Returns ------- - root - The XML tree with nodes appended. + None """ - for _, row in gdf_nodes.iterrows(): - row_str = row.dropna().astype(str) - node = SubElement(root, "node", attrib=row_str[node_attrs].to_dict()) + node_tags = set(settings.useful_tags_node) + node_attrs = {"id", "lat", "lon"}.union(ATTR_DEFAULTS) - for tag in node_tags: - if tag in row_str: - SubElement(node, "tag", attrib={"k": tag, "v": row_str[tag]}) - return root + # add each node attrs dict as a SubElement of parent + for node in gdf_nodes.to_dict(orient="records"): + attrs = {k: str(node[k]) for k in node_attrs if pd.notna(node[k])} + node_element = SubElement(parent, "node", attrib=attrs) + # add each node tag dict as its own SubElement of the node SubElement + tags = ({"k": k, "v": str(node[k])} for k in node_tags & node.keys() if pd.notna(node[k])) + for tag in tags: + _ = SubElement(node_element, "tag", attrib=tag) -def _create_way_for_each_edge( - root: Element, + +def _add_ways_xml( + parent: Element, gdf_edges: gpd.GeoDataFrame, - edge_attrs: list[str], - edge_tags: list[str], + way_tag_aggs: dict[str, Any] | None, ) -> None: """ - Append a new way to an empty XML tree graph for each edge in way. - - This will generate separate OSM ways for each network edge, even if the - edges are all part of the same original OSM way. As such, each way will be - composed of two nodes, and there will be many ways with the same OSM ID. - This does not conform to the OSM XML schema standard, but the data will - still comprise a valid network and will be readable by most OSM tools. + Add graph edges (grouped as ways) as subelements of an XML parent element. Parameters ---------- - root - An empty XML tree. + parent + The XML parent element. gdf_edges - A GeoDataFrame of graph edges. - edge_attrs - OSM way attributes to include in output OSM XML. - edge_tags - OSM way tags to include in output OSM XML. + A GeoDataFrame of graph edges with OSM way "id" column for grouping + edges into ways. + way_tag_aggs + Keys are OSM way tag keys and values are aggregation functions + (anything accepted as an argument by pandas.agg). Allows user to + aggregate graph edge attribute values into single OSM way values. If + None, or if some tag's key does not exist in the dict, the way + attribute will be assigned the value of the first edge of the way. Returns ------- None """ - for _, row in gdf_edges.iterrows(): - row_str = row.dropna().astype(str) - edge = SubElement(root, "way", attrib=row_str[edge_attrs].to_dict()) - SubElement(edge, "nd", attrib={"ref": row_str["u"]}) - SubElement(edge, "nd", attrib={"ref": row_str["v"]}) - for tag in edge_tags: - if tag in row_str: - SubElement(edge, "tag", attrib={"k": tag, "v": row_str[tag]}) - - -def _append_merged_edge_attrs( - xml_edge: Element, - sample_edge: dict[str, Any], - all_edges_df: pd.DataFrame, - edge_tags: list[str], - edge_tag_aggs: list[tuple[str, str]] | None, -) -> None: + way_tags = set(settings.useful_tags_way) + way_attrs = list({"id"}.union(ATTR_DEFAULTS)) + + for osmid, way in gdf_edges.groupby("id"): + # STEP 1: add the way and its attrs as a "way" subelement of the + # parent element + attrs = way[way_attrs].iloc[0].astype(str).to_dict() + way_element = SubElement(parent, "way", attrib=attrs) + + # STEP 2: add the way's edges' node IDs as "nd" subelements of the + # "way" subelement. if way contains more than 1 edge, sort the nodes + # topologically, otherwise just add node "u" then "v" from index. + if len(way) == 1: + nodes = way.index[0][:2] + else: + nodes = _sort_nodes(nx.MultiDiGraph(way.index.to_list()), osmid) + for node in nodes: + _ = SubElement(way_element, "nd", attrib={"ref": str(node)}) + + # STEP 3: add way's edges' tags as "tag" subelements of the "way" + # subelement. if an agg function was provided for a tag, apply it to + # the values of the edges in the way. if no agg function was provided + # for a tag, just use the value from first edge in way. + for tag in way_tags.intersection(way.columns): + if way_tag_aggs is not None and tag in way_tag_aggs: + value = way[tag].agg(way_tag_aggs[tag]) + else: + value = way[tag].iloc[0] + if pd.notna(value): + _ = SubElement(way_element, "tag", attrib={"k": tag, "v": str(value)}) + + +def _sort_nodes(G: nx.MultiDiGraph, osmid: int) -> list[int]: """ - Extract edge attributes and append to XML edge. + Topologically sort the nodes of an OSM way. Parameters ---------- - xml_edge - XML representation of an output graph edge. - sample_edge - Dict of sample row from the the dataframe of way edges. - all_edges_df - A DataFrame with one row for each edge in an OSM way. - edge_tags - OSM way tags to include in output OSM XML. - edge_tag_aggs - Useful only if `merge_edges` is True, this argument allows the user to - specify edge attributes to aggregate such that the merged OSM way - entry tags accurately represent the sum total of their component edge - attributes. For example, if the user wants the OSM way to have a - "length" attribute, the user must specify - `edge_tag_aggs=[('length', 'sum')]` in order to tell this function to - aggregate the lengths of the individual component edges. Otherwise, - the length attribute will simply reflect the length of the first edge - associated with the way. + G + The graph representing the OSM way. + osmid + The OSM way ID. Returns ------- - None + ordered_nodes + The way's node IDs in topologically sorted order. """ - if edge_tag_aggs is None: - for tag in edge_tags: - if tag in sample_edge: - SubElement(xml_edge, "tag", attrib={"k": tag, "v": sample_edge[tag]}) - else: - for tag in edge_tags: - if (tag in sample_edge) and (tag not in (t for t, agg in edge_tag_aggs)): - SubElement(xml_edge, "tag", attrib={"k": tag, "v": sample_edge[tag]}) - - for tag, agg in edge_tag_aggs: - if tag in all_edges_df.columns: - SubElement( - xml_edge, - "tag", - attrib={ - "k": tag, - "v": str(all_edges_df[tag].aggregate(agg)), - }, - ) - - -def _append_nodes_as_edge_attrs( - xml_edge: Element, - sample_edge: dict[str, Any], - all_edges_df: pd.DataFrame, -) -> None: - """ - Extract list of ordered nodes and append as attributes of XML edge. - - Parameters - ---------- - xml_edge - XML representation of an output graph edge. - sample_edge - Sample row from the the DataFrame of way edges. - all_edges_df: pandas.DataFrame - A DataFrame with one row for each edge in an OSM way. + try: + ordered_nodes = list(nx.topological_sort(G)) + + except nx.NetworkXUnfeasible: + # if it couldn't topologically sort the nodes, the way probably + # contains a cycle. try removing an edge to break the cycle. first, + # look for multiple edges emanating from the same source node + insert_before = True + edges = [ + edge + for source in [node for node, degree in G.out_degree() if degree > 1] + for edge in G.out_edges(source, keys=True) + ] + + # if none found, then look for multiple edges pointing at the same + # target node instead + if len(edges) == 0: + insert_before = False + edges = [ + edge + for target in [node for node, degree in G.in_degree() if degree > 1] + for edge in G.in_edges(target, keys=True) + ] + + # if still none, then take the first edge of the way: the entire + # way could just be a cycle in which each node appears once + if len(edges) == 0: + edges = [next(iter(G.edges))] + + # remove one edge at a time and, if the graph remains connected, exit + # the loop and check if we are able to topologically sort the nodes + for edge in edges: + G_ = G.copy() + G_.remove_edge(*edge) + if nx.is_weakly_connected(G_): + break - Returns - ------- - None - """ - if len(all_edges_df) == 1: - SubElement(xml_edge, "nd", attrib={"ref": sample_edge["u"]}) - SubElement(xml_edge, "nd", attrib={"ref": sample_edge["v"]}) - else: - # topological sort - all_edges_df = all_edges_df.reset_index() try: - ordered_nodes = _get_unique_nodes_ordered_from_way(all_edges_df) - except nx.NetworkXUnfeasible: - first_node = all_edges_df.iloc[0]["u"] - ordered_nodes = _get_unique_nodes_ordered_from_way(all_edges_df.iloc[1:]) - ordered_nodes = [first_node, *ordered_nodes] - for node in ordered_nodes: - SubElement(xml_edge, "nd", attrib={"ref": str(node)}) + ordered_nodes = list(nx.topological_sort(G_)) + # re-insert (before or after its neighbor as needed) the duplicate + # source or target node from the edge we removed + dupe_node = edge[0] if insert_before else edge[1] + neighbor = edge[1] if insert_before else edge[0] + position = ordered_nodes.index(neighbor) + position = position if insert_before else position + 1 + ordered_nodes.insert(position, dupe_node) -def _append_edges_xml_tree( - root: Element, - gdf_edges: gpd.GeoDataFrame, - edge_attrs: list[str], - edge_tags: list[str], - edge_tag_aggs: list[tuple[str, str]] | None, - merge_edges: bool, # noqa: FBT001 -) -> Element: - """ - Append edges to an XML tree. - - Parameters - ---------- - root - An XML tree. - gdf_edges - A GeoDataFrame of graph edges. - edge_attrs - OSM way attributes to include in output OSM XML. - edge_tags - OSM way tags to include in output OSM XML. - edge_tag_aggs - Useful only if `merge_edges` is True, this argument allows the user to - specify edge attributes to aggregate such that the merged OSM way - entry tags accurately represent the sum total of their component edge - attributes. For example, if the user wants the OSM way to have a - "length" attribute, the user must specify - `edge_tag_aggs=[('length', 'sum')]` in order to tell this function to - aggregate the lengths of the individual component edges. Otherwise, - the length attribute will simply reflect the length of the first edge - associated with the way. - merge_edges - If True, merge graph edges such that each OSM way has one entry and - one entry only in the OSM XML. Otherwise, every OSM way will have a - separate entry for each node pair it contains. - - Returns - ------- - root - XML tree with edges appended. - """ - gdf_edges = gdf_edges.reset_index() - if merge_edges: - for _, all_way_edges in gdf_edges.groupby("id"): - first = all_way_edges.iloc[0].dropna().astype(str) - edge = SubElement(root, "way", attrib=first[edge_attrs].dropna().to_dict()) - _append_nodes_as_edge_attrs( - xml_edge=edge, - sample_edge=first.to_dict(), - all_edges_df=all_way_edges, - ) - _append_merged_edge_attrs( - xml_edge=edge, - sample_edge=first.to_dict(), - edge_tags=edge_tags, - edge_tag_aggs=edge_tag_aggs, - all_edges_df=all_way_edges, - ) - - else: - _create_way_for_each_edge( - root=root, - gdf_edges=gdf_edges, - edge_attrs=edge_attrs, - edge_tags=edge_tags, - ) - - return root - - -def _get_unique_nodes_ordered_from_way(df_way_edges: pd.DataFrame) -> list[Any]: - """ - Recover original node order from edges associated with a single OSM way. - - Parameters - ---------- - df_way_edges - Dataframe containing columns 'u' and 'v' corresponding to origin and - destination nodes. - - Returns - ------- - unique_ordered_nodes - An ordered list of unique node IDs. If the edges do not all connect - (e.g. [(1, 2), (2,3), (10, 11), (11, 12), (12, 13)]), then this method - will return only those nodes associated with the largest component of - connected edges, even if subsequent connected chunks are contain more - total nodes. This ensures a proper topological representation of nodes - in the XML way records because if there are unconnected components, - the sorting algorithm cannot recover their original order. We would - not likely ever encounter this kind of disconnected structure of nodes - within a given way, but it is not explicitly forbidden in the OSM XML - design schema. - """ - G = nx.MultiDiGraph() - all_nodes = list(df_way_edges["u"].to_numpy()) + list(df_way_edges["v"].to_numpy()) - - G.add_nodes_from(all_nodes) - G.add_edges_from(df_way_edges[["u", "v"]].to_numpy()) - - # copy nodes into new graph - H = utils_graph.get_largest_component(G, strongly=False) - unique_ordered_nodes = list(nx.topological_sort(H)) - num_unique_nodes = len(np.unique(all_nodes)) - - if len(unique_ordered_nodes) < num_unique_nodes: - msg = f"Recovered order for {len(unique_ordered_nodes)} of {num_unique_nodes} nodes" - utils.log(msg, level=lg.INFO) - - return unique_ordered_nodes + except nx.NetworkXUnfeasible: + # if it failed again, this way probably contains multiple cycles, + # so remove a cycle then try to sort the nodes again, recursively. + # note this is destructive and will be missing in the saved data. + G_ = G.copy() + G_.remove_edges_from(nx.find_cycle(G_)) + G_ = utils_graph.remove_isolated_nodes(G_) + ordered_nodes = _sort_nodes(G_, osmid) + msg = f"Had to remove a cycle from way {str(osmid)!r} for topological sort" + utils.log(msg, level=lg.WARNING) + + return ordered_nodes diff --git a/osmnx/_overpass.py b/osmnx/_overpass.py index 56898bdd9..9b8951c2d 100644 --- a/osmnx/_overpass.py +++ b/osmnx/_overpass.py @@ -31,6 +31,9 @@ def _get_network_filter(network_type: str) -> str: """ Create a filter to query Overpass for the specified network type. + The filter searches for every OSM way with a "highway" tag, excluding + certain ways that are incompatible with the network type. + Parameters ---------- network_type @@ -39,7 +42,7 @@ def _get_network_filter(network_type: str) -> str: Returns ------- - overpass_filter + way_filter The Overpass query filter. """ # define built-in queries to send to the API. specifying way["highway"] @@ -106,12 +109,12 @@ def _get_network_filter(network_type: str) -> str: ) if network_type in filters: - overpass_filter = filters[network_type] + way_filter = filters[network_type] else: # pragma: no cover msg = f"Unrecognized network_type {network_type!r}." raise ValueError(msg) - return overpass_filter + return way_filter def _get_overpass_pause( @@ -342,7 +345,7 @@ def _download_overpass_network( """ # create a filter to exclude certain kinds of ways based on the requested # network_type, if provided, otherwise use custom_filter - osm_filter = custom_filter if custom_filter is not None else _get_network_filter(network_type) + way_filter = custom_filter if custom_filter is not None else _get_network_filter(network_type) # create overpass settings string overpass_settings = _make_overpass_settings() @@ -355,7 +358,7 @@ def _download_overpass_network( # pass exterior coordinates of each polygon in list to API, one at a time # the '>' makes it recurse so we get ways and the ways' nodes. for polygon_coord_str in polygon_coord_strs: - query_str = f"{overpass_settings};(way{osm_filter}(poly:{polygon_coord_str!r});>;);out;" + query_str = f"{overpass_settings};(way{way_filter}(poly:{polygon_coord_str!r});>;);out;" yield _overpass_request(OrderedDict(data=query_str)) diff --git a/osmnx/features.py b/osmnx/features.py index 58265a036..eb5a1c542 100644 --- a/osmnx/features.py +++ b/osmnx/features.py @@ -1,5 +1,5 @@ """ -Download OpenStreetMap geospatial features' geometries and attributes. +Download and create GeoDataFrames from OpenStreetMap geospatial features. Retrieve points of interest, building footprints, transit lines/stops, or any other map features from OSM, including their geometries and attribute data, @@ -91,13 +91,13 @@ def features_from_bbox( tags: dict[str, bool | str | list[str]], ) -> gpd.GeoDataFrame: """ - Create a GeoDataFrame of OSM features within a N, S, E, W bounding box. + Download OSM features within a lat-lon bounding box. You can use the `settings` module to retrieve a snapshot of historical OSM data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. - - For more details, see: https://wiki.openstreetmap.org/wiki/Map_features + memory allocation, and other custom settings. This function searches for + features using tags. For more details, see: + https://wiki.openstreetmap.org/wiki/Map_features Parameters ---------- @@ -132,13 +132,13 @@ def features_from_point( dist: float, ) -> gpd.GeoDataFrame: """ - Create GeoDataFrame of OSM features within some distance N, S, E, W of a point. + Download OSM features within some distance of a lat-lon point. You can use the `settings` module to retrieve a snapshot of historical OSM data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. - - For more details, see: https://wiki.openstreetmap.org/wiki/Map_features + memory allocation, and other custom settings. This function searches for + features using tags. For more details, see: + https://wiki.openstreetmap.org/wiki/Map_features Parameters ---------- @@ -177,13 +177,13 @@ def features_from_address( dist: float, ) -> gpd.GeoDataFrame: """ - Create GeoDataFrame of OSM features within some distance N, S, E, W of address. + Download OSM features within some distance of an address. You can use the `settings` module to retrieve a snapshot of historical OSM data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. - - For more details, see: https://wiki.openstreetmap.org/wiki/Map_features + memory allocation, and other custom settings. This function searches for + features using tags. For more details, see: + https://wiki.openstreetmap.org/wiki/Map_features Parameters ---------- @@ -221,7 +221,7 @@ def features_from_place( which_result: int | None | list[int | None] = None, ) -> gpd.GeoDataFrame: """ - Create GeoDataFrame of OSM features within boundaries of some place(s). + Download OSM features within the boundaries of some place(s). The query must be geocodable and OSM must have polygon boundaries for the geocode result. If OSM does not have a polygon for this place, you can @@ -238,9 +238,9 @@ def features_from_place( You can use the `settings` module to retrieve a snapshot of historical OSM data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. - - For more details, see: https://wiki.openstreetmap.org/wiki/Map_features + memory allocation, and other custom settings. This function searches for + features using tags. For more details, see: + https://wiki.openstreetmap.org/wiki/Map_features Parameters ---------- @@ -281,13 +281,13 @@ def features_from_polygon( tags: dict[str, bool | str | list[str]], ) -> gpd.GeoDataFrame: """ - Create GeoDataFrame of OSM features within boundaries of a (multi)polygon. + Download OSM features within the boundaries of a (Multi)Polygon. You can use the `settings` module to retrieve a snapshot of historical OSM data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. - - For more details, see: https://wiki.openstreetmap.org/wiki/Map_features + memory allocation, and other custom settings. This function searches for + features using tags. For more details, see: + https://wiki.openstreetmap.org/wiki/Map_features Parameters ---------- @@ -338,14 +338,14 @@ def features_from_xml( encoding: str = "utf-8", ) -> gpd.GeoDataFrame: """ - Create a GeoDataFrame of OSM features from an OSM-formatted XML file. + Create a GeoDataFrame of OSM features from data in an OSM XML file. - Because this function creates a GeoDataFrame of features from an - OSM-formatted XML file that has already been downloaded (i.e., no query is - made to the Overpass API) the polygon and tags arguments are not required. - If they are not passed, this will return features for all of the tagged - elements in the file. If they are passed, they will be used to filter the - final GeoDataFrame. + Because this function creates a GeoDataFrame of features from an OSM XML + file that has already been downloaded (i.e., no query is made to the + Overpass API) the polygon and tags arguments are not required. If they are + not passed, this will return features for all of the tagged elements in + the file. If they are passed, they will be used to filter the final + GeoDataFrame. Parameters ---------- @@ -373,7 +373,7 @@ def features_from_xml( gdf """ # transmogrify OSM XML file to JSON then create GeoDataFrame from it - response_jsons = [_osm_xml._overpass_json_from_file(filepath, encoding)] + response_jsons = [_osm_xml._overpass_json_from_xml(filepath, encoding)] return _create_gdf(response_jsons, polygon, tags) diff --git a/osmnx/graph.py b/osmnx/graph.py index 44eb9a8e4..f1b5fbe99 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -1,9 +1,6 @@ """ Download and create graphs from OpenStreetMap data. -This module uses filters to query the Overpass API: you can either specify a -built-in network type or provide your own custom filter with Overpass QL. - Refer to the Getting Started guide for usage limitations. """ @@ -51,11 +48,17 @@ def graph_from_bbox( custom_filter: str | None = None, ) -> nx.MultiDiGraph: """ - Download and create a graph within some bounding box. + Download and create a graph within a lat-lon bounding box. + + This function uses filters to query the Overpass API: you can either + specify a pre-defined `network_type` or provide your own `custom_filter` + with Overpass QL. - You can use the `settings` module to retrieve a snapshot of historical OSM - data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which OSM node/way tags are added as graph node/edge + attributes. You can also use the `settings` module to retrieve a snapshot + of historical OSM data as of a certain date, or to configure the Overpass + server timeout, memory allocation, and other custom settings. Parameters ---------- @@ -119,11 +122,17 @@ def graph_from_point( custom_filter: str | None = None, ) -> nx.MultiDiGraph: """ - Download and create a graph within some distance of a (lat, lon) point. + Download and create a graph within some distance of a lat-lon point. - You can use the `settings` module to retrieve a snapshot of historical OSM - data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. + This function uses filters to query the Overpass API: you can either + specify a pre-defined `network_type` or provide your own `custom_filter` + with Overpass QL. + + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which OSM node/way tags are added as graph node/edge + attributes. You can also use the `settings` module to retrieve a snapshot + of historical OSM data as of a certain date, or to configure the Overpass + server timeout, memory allocation, and other custom settings. Parameters ---------- @@ -208,9 +217,15 @@ def graph_from_address( """ Download and create a graph within some distance of an address. - You can use the `settings` module to retrieve a snapshot of historical OSM - data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. + This function uses filters to query the Overpass API: you can either + specify a pre-defined `network_type` or provide your own `custom_filter` + with Overpass QL. + + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which OSM node/way tags are added as graph node/edge + attributes. You can also use the `settings` module to retrieve a snapshot + of historical OSM data as of a certain date, or to configure the Overpass + server timeout, memory allocation, and other custom settings. Parameters ---------- @@ -287,19 +302,26 @@ def graph_from_place( The query must be geocodable and OSM must have polygon boundaries for the geocode result. If OSM does not have a polygon for this place, you can - instead get its street network using the graph_from_address function, + instead get its street network using the `graph_from_address` function, which geocodes the place name to a point and gets the network within some distance of that point. If OSM does have polygon boundaries for this place but you're not finding it, try to vary the query string, pass in a structured query dict, or vary - the which_result argument to use a different geocode result. If you know + the `which_result` argument to use a different geocode result. If you know the OSM ID of the place, you can retrieve its boundary polygon using the - geocode_to_gdf function, then pass it to the graph_from_polygon function. + `geocode_to_gdf` function, then pass it to the `features_from_polygon` + function. - You can use the `settings` module to retrieve a snapshot of historical OSM - data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. + This function uses filters to query the Overpass API: you can either + specify a pre-defined `network_type` or provide your own `custom_filter` + with Overpass QL. + + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which OSM node/way tags are added as graph node/edge + attributes. You can also use the `settings` module to retrieve a snapshot + of historical OSM data as of a certain date, or to configure the Overpass + server timeout, memory allocation, and other custom settings. Parameters ---------- @@ -367,11 +389,17 @@ def graph_from_polygon( custom_filter: str | None = None, ) -> nx.MultiDiGraph: """ - Download and create a graph within the boundaries of a (multi)polygon. + Download and create a graph within the boundaries of a (Multi)Polygon. + + This function uses filters to query the Overpass API: you can either + specify a pre-defined `network_type` or provide your own `custom_filter` + with Overpass QL. - You can use the `settings` module to retrieve a snapshot of historical OSM - data as of a certain date, or to configure the Overpass server timeout, - memory allocation, and other custom settings. + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which OSM node/way tags are added as graph node/edge + attributes. You can also use the `settings` module to retrieve a snapshot + of historical OSM data as of a certain date, or to configure the Overpass + server timeout, memory allocation, and other custom settings. Parameters ---------- @@ -478,13 +506,17 @@ def graph_from_xml( encoding: str = "utf-8", ) -> nx.MultiDiGraph: """ - Create a graph from data in an OSM-formatted XML file. + Create a graph from data in an OSM XML file. Do not load an XML file previously generated by OSMnx: this use case is not supported and may not behave as expected. To save/load graphs to/from disk for later use in OSMnx, use the `io.save_graphml` and `io.load_graphml` functions instead. + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which OSM node/way tags are added as graph node/edge + attributes. + Parameters ---------- filepath @@ -497,14 +529,14 @@ def graph_from_xml( If True, return the entire graph even if it is not connected. If False, retain only the largest weakly connected component. encoding - The XML file's character encoding. + The OSM XML file's character encoding. Returns ------- G """ # transmogrify file of OSM XML data into JSON - response_jsons = [_osm_xml._overpass_json_from_file(filepath, encoding)] + response_jsons = [_osm_xml._overpass_json_from_xml(filepath, encoding)] # create graph using this response JSON G = _create_graph(response_jsons, retain_all, bidirectional) diff --git a/osmnx/io.py b/osmnx/io.py index 12f391969..f00942c1c 100644 --- a/osmnx/io.py +++ b/osmnx/io.py @@ -1,4 +1,4 @@ -"""Serialize graphs to/from files on disk.""" +"""Read/write graphs from/to files on disk.""" from __future__ import annotations @@ -245,106 +245,55 @@ def load_graphml( return G -def save_graph_xml( # noqa: PLR0913 - data: nx.MultiDiGraph | tuple[gpd.GeoDataFrame, gpd.GeoDataFrame], +def save_graph_xml( + G: nx.MultiDiGraph, *, filepath: str | Path | None = None, - node_tags: list[str] = settings.osm_xml_node_tags, - node_attrs: list[str] = settings.osm_xml_node_attrs, - edge_tags: list[str] = settings.osm_xml_way_tags, - edge_attrs: list[str] = settings.osm_xml_way_attrs, - oneway: bool = False, - merge_edges: bool = True, - edge_tag_aggs: list[tuple[str, str]] | None = None, - api_version: str = "0.6", - precision: int = 6, + way_tag_aggs: dict[str, Any] | None = None, + encoding: str = "utf-8", ) -> None: """ - Save graph to disk as an OSM-formatted XML .osm file. + Save graph to disk as an OSM XML file. - This function exists only to allow serialization to the .osm file format + This function exists only to allow serialization to the OSM XML format for applications that require it, and has constraints to conform to that. - As such, this function has a limited use case which does not include - saving/loading graphs for subsequent OSMnx analysis. To save/load graphs - to/from disk for later use in OSMnx, use the `io.save_graphml` and - `io.load_graphml` functions instead. To load a graph from a .osm file that - you have downloaded or generated elsewhere, use the `graph.graph_from_xml` + As such, it has a limited use case which does not include saving/loading + graphs for subsequent OSMnx analysis. To save/load graphs to/from disk for + later use in OSMnx, use the `io.save_graphml` and `io.load_graphml` + functions instead. To load a graph from an OSM XML file that you have + downloaded or generated elsewhere, use the `graph.graph_from_xml` function. - Note: for large networks this function can take a long time to run. Before - using this function, make sure you configured OSMnx as described in the - example below when you created the graph. - - Example - ------- - >>> import osmnx as ox - >>> utn = ox.settings.useful_tags_node - >>> oxna = ox.settings.osm_xml_node_attrs - >>> oxnt = ox.settings.osm_xml_node_tags - >>> utw = ox.settings.useful_tags_way - >>> oxwa = ox.settings.osm_xml_way_attrs - >>> oxwt = ox.settings.osm_xml_way_tags - >>> utn = list(set(utn + oxna + oxnt)) - >>> utw = list(set(utw + oxwa + oxwt)) - >>> ox.settings.all_oneway = True - >>> ox.settings.useful_tags_node = utn - >>> ox.settings.useful_tags_way = utw - >>> G = ox.graph_from_place('Piedmont, CA, USA', network_type='drive') - >>> ox.save_graph_xml(G, filepath='./data/graph.osm') + Use the `settings` module's `useful_tags_node` and `useful_tags_way` + settings to configure which tags your graph is created and saved with. + This function merges graph edges such that each OSM way has one entry in + the XML output, with the way's nodes topologically sorted. `G` must be + unsimplified to save as OSM XML: otherwise, one edge could comprise + multiple OSM ways, making it impossible to properly group edges by way. + `G` should also have been created with `ox.settings.all_oneway=True` for + this function to behave properly. Parameters ---------- - data - Either a MultiDiGraph or (gdf_nodes, gdf_edges) tuple. + G + Unsimplified graph to save as an OSM XML file. filepath - Path to the .osm file including extension. If None, use default + Path to the saved file including extension. If None, use default `settings.data_folder/graph.osm`. - node_tags - OSM node tags to include in output OSM XML. - node_attrs - OSM node attributes to include in output OSM XML. - edge_tags - OSM way tags to include in output OSM XML. - edge_attrs - OSM way attributes to include in output OSM XML. - oneway - The default oneway value used to fill this tag where missing. - merge_edges - If True, merge graph edges such that each OSM way has one entry and - one entry only in the OSM XML. Otherwise, every OSM way will have a - separate entry for each node pair it contains. - edge_tag_aggs - Useful only if `merge_edges` is True, this argument allows the user to - specify edge attributes to aggregate such that the merged OSM way - entry tags accurately represent the sum total of their component edge - attributes. For example, if the user wants the OSM way to have a - "length" attribute, the user must specify - `edge_tag_aggs=[('length', 'sum')]` in order to tell this function to - aggregate the lengths of the individual component edges. Otherwise, - the length attribute will simply reflect the length of the first edge - associated with the way. - api_version - OpenStreetMap API version to save in the XML file header. - precision - Number of decimal places to round latitude and longitude values. + way_tag_aggs + Keys are OSM way tag keys and values are aggregation functions + (anything accepted as an argument by pandas.agg). Allows user to + aggregate graph edge attribute values into single OSM way values. If + None, or if some tag's key does not exist in the dict, the way + attribute will be assigned the value of the first edge of the way. + encoding + The character encoding of the saved OSM XML file. Returns ------- None """ - _osm_xml._save_graph_xml( - data, - filepath, - node_tags, - node_attrs, - edge_tags, - edge_attrs, - oneway, - merge_edges, - edge_tag_aggs, - api_version, - precision, - ) + _osm_xml._save_graph_xml(G, filepath, way_tag_aggs, encoding) def _convert_graph_attr_types(G: nx.MultiDiGraph, dtypes: dict[str, Any]) -> nx.MultiDiGraph: diff --git a/osmnx/settings.py b/osmnx/settings.py index 92f9e8638..2a02ae215 100644 --- a/osmnx/settings.py +++ b/osmnx/settings.py @@ -2,34 +2,34 @@ Global settings that can be configured by the user. all_oneway : bool - Only use if specifically saving to .osm XML file with the `save_graph_xml` - function. If True, forces all ways to be loaded as oneway ways, preserving - the original order of nodes stored in the OSM way XML. This also retains - original OSM string values for oneway attribute values, rather than - converting them to a True/False bool. Default is `False`. + Only use if subsequently saving graph to an OSM XML file via the + `save_graph_xml` function. If True, forces all ways to be added as one-way + ways, preserving the original order of the nodes in the OSM way. This also + retains the original OSM way's oneway tag's string value as edge attribute + values, rather than converting them to True/False bool values. Default is + `False`. bidirectional_network_types : list[str] Network types for which a fully bidirectional graph will be created. Default is `["walk"]`. cache_folder : str | Path - Path to folder in which to save/load HTTP response cache, if the - `use_cache` setting equals `True`. Default is `"./cache"`. + Path to folder to save/load HTTP response cache files, if the `use_cache` + setting is `True`. Default is `"./cache"`. cache_only_mode : bool If True, download network data from Overpass then raise a `CacheOnlyModeInterrupt` error for user to catch. This prevents graph - building from taking place and instead just saves OSM response data to + building from taking place and instead just saves Overpass response to cache. Useful for sequentially caching lots of raw data (as you can only query Overpass one request at a time) then using the local cache to quickly build many graphs simultaneously with multiprocessing. Default is `False`. data_folder : str | Path - Path to folder in which to save/load graph files by default. Default is - `"./data"`. + Path to folder to save/load graph files by default. Default is `"./data"`. default_access : str - Filter for the OSM "access" key. Default is `'["access"!~"private"]'`. + Filter for the OSM "access" tag. Default is `'["access"!~"private"]'`. Note that also filtering out "access=no" ways prevents including transit-only bridges (e.g., Tilikum Crossing) from appearing in drivable road network (e.g., `'["access"!~"private|no"]'`). However, some drivable - tollroads have "access=no" plus a "access:conditional" key to clarify when + tollroads have "access=no" plus a "access:conditional" tag to clarify when it is accessible, so we can't filter out all "access=no" ways by default. Best to be permissive here then remove complicated combinations of tags programatically after the full graph is downloaded and constructed. @@ -86,19 +86,6 @@ nominatim_key : str | None Your Nominatim API key, if you are using an API instance that requires one. Default is `None`. -osm_xml_node_attrs : list[str] - Node attributes for saving .osm XML files with `save_graph_xml` function. - Default is `["id", "timestamp", "uid", "user", "version", "changeset", - "lat", "lon"]`. -osm_xml_node_tags : list[str] - Node tags for saving .osm XML files with `save_graph_xml` function. - Default is `["highway"]`. -osm_xml_way_attrs : list[str] - Edge attributes for saving .osm XML files with `save_graph_xml` function. - Default is `["id", "timestamp", "uid", "user", "version", "changeset"]`. -osm_xml_way_tags : list[str] - Edge tags for for saving .osm XML files with `save_graph_xml` function. - Default is `["highway", "lanes", "maxspeed", "name", "oneway"]`. overpass_endpoint : str The base API url to use for Overpass queries. Default is `"https://overpass-api.de/api"`. @@ -123,8 +110,8 @@ The timeout interval in seconds for HTTP requests, and (when applicable) for API to use while running the query. Default is `180`. use_cache : bool - If True, cache HTTP responses locally instead of calling API repeatedly - for the same request. Default is `True`. + If True, cache HTTP responses locally in `cache_folder` instead of calling + API repeatedly for the same request. Default is `True`. useful_tags_node : list[str] OSM "node" tags to add as graph node attributes, when present in the data retrieved from OSM. Default is `["ref", "highway"]`. @@ -168,40 +155,27 @@ memory: int | None = None nominatim_endpoint: str = "https://nominatim.openstreetmap.org/" nominatim_key: str | None = None -osm_xml_node_attrs: list[str] = [ - "id", - "timestamp", - "uid", - "user", - "version", - "changeset", - "lat", - "lon", -] -osm_xml_node_tags: list[str] = ["highway"] -osm_xml_way_attrs: list[str] = ["id", "timestamp", "uid", "user", "version", "changeset"] -osm_xml_way_tags: list[str] = ["highway", "lanes", "maxspeed", "name", "oneway"] overpass_endpoint: str = "https://overpass-api.de/api" overpass_rate_limit: bool = True overpass_settings: str = "[out:json][timeout:{timeout}]{maxsize}" requests_kwargs: dict[str, Any] = {} timeout: float = 180 use_cache: bool = True -useful_tags_node: list[str] = ["ref", "highway"] +useful_tags_node: list[str] = ["highway", "ref"] useful_tags_way: list[str] = [ + "access", + "area", "bridge", - "tunnel", - "oneway", - "lanes", - "ref", - "name", + "est_width", "highway", + "junction", + "landuse", + "lanes", "maxspeed", + "name", + "oneway", + "ref", "service", - "access", - "area", - "landuse", + "tunnel", "width", - "est_width", - "junction", ] diff --git a/osmnx/utils.py b/osmnx/utils.py index f9b517116..2073f1931 100644 --- a/osmnx/utils.py +++ b/osmnx/utils.py @@ -74,7 +74,7 @@ def ts(style: str = "datetime", template: str | None = None) -> str: Parameters ---------- style - {"datetime", "date", "time"} + {"datetime", "iso8601", "date", "time"} Format the timestamp with this built-in style. template If not None, format the timestamp with this format string instead of @@ -87,6 +87,8 @@ def ts(style: str = "datetime", template: str | None = None) -> str: if template is None: if style == "datetime": template = "{:%Y-%m-%d %H:%M:%S}" + elif style == "iso8601": + template = "{:%Y-%m-%dT%H:%M:%SZ}" elif style == "date": template = "{:%Y-%m-%d}" elif style == "time": diff --git a/osmnx/utils_geo.py b/osmnx/utils_geo.py index 28f349dde..6487aad13 100644 --- a/osmnx/utils_geo.py +++ b/osmnx/utils_geo.py @@ -97,12 +97,15 @@ def _consolidate_subdivide_geometry(geometry: Polygon | MultiPolygon) -> MultiPo Consolidate a geometry into a convex hull, then subdivide it into smaller sub-polygons if its area exceeds max size (in geometry's units). Configure - the max size via `max_query_area_size` in the `settings` module. + the max size via the `settings` module's `max_query_area_size`. Geometries + with areas much larger than `max_query_area_size` may take a long time to + process. When the geometry has a very large area relative to its vertex count, the resulting MultiPolygon's boundary may differ somewhat from the input, due to the way long straight lines are projected. You can interpolate - additional vertices along your input geometry's exterior to mitigate this. + additional vertices along your input geometry's exterior to mitigate this + if necessary. Parameters ---------- diff --git a/pyproject.toml b/pyproject.toml index 047bf549b..c966f3690 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,42 +3,42 @@ build-backend = "hatchling.build" requires = ["hatchling"] [project] -authors = [{name = "Geoff Boeing", email = "boeing@usc.edu"}] +authors = [{ name = "Geoff Boeing", email = "boeing@usc.edu" }] classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Scientific/Engineering :: GIS", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Physics", - "Topic :: Scientific/Engineering :: Visualization", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Physics", + "Topic :: Scientific/Engineering :: Visualization", ] dependencies = [ - "geopandas>=0.12", - "networkx>=2.5", - "numpy>=1.21", - "pandas>=1.1", - "requests>=2.27", - "shapely>=2.0", + "geopandas>=0.12", + "networkx>=2.5", + "numpy>=1.21", + "pandas>=1.1", + "requests>=2.27", + "shapely>=2.0", ] description = "Download, model, analyze, and visualize street networks and other geospatial features from OpenStreetMap" dynamic = ["version"] keywords = ["GIS", "Networks", "OpenStreetMap", "Routing"] -license = {text = "MIT License"} -maintainers = [{name = "OSMnx contributors"}] +license = { text = "MIT License" } +maintainers = [{ name = "OSMnx contributors" }] name = "osmnx" readme = "README.md" -requires-python = ">=3.9" # match classifiers above and ruff/mypy versions below +requires-python = ">=3.9" # match classifiers above and ruff/mypy versions below [project.optional-dependencies] entropy = ["scipy>=1.5"] @@ -85,7 +85,7 @@ force-single-line = true max-complexity = 14 [tool.ruff.lint.pycodestyle] -max-line-length = 110 # line length + 10% since it isn't a hard upper bound +max-line-length = 110 # line length + 10% since it isn't a hard upper bound [tool.ruff.lint.pydocstyle] convention = "numpy" diff --git a/tests/environments/env-ci.yml b/tests/environments/env-ci.yml index c8f9365ca..e147cbc9e 100644 --- a/tests/environments/env-ci.yml +++ b/tests/environments/env-ci.yml @@ -21,6 +21,7 @@ dependencies: - scipy # testing + - lxml - pre-commit - pytest - pytest-cov diff --git a/tests/environments/env-test-minimal.yml b/tests/environments/env-test-minimal.yml index fa4ea40ea..7e011189a 100644 --- a/tests/environments/env-test-minimal.yml +++ b/tests/environments/env-test-minimal.yml @@ -24,6 +24,7 @@ dependencies: - scipy=1.5 # testing + - lxml - pre-commit - pytest - pytest-cov diff --git a/tests/input_data/osm_schema.xsd b/tests/input_data/osm_schema.xsd new file mode 100644 index 000000000..167fefc5f --- /dev/null +++ b/tests/input_data/osm_schema.xsd @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_osmnx.py b/tests/test_osmnx.py index d3163f71a..da50e74cf 100644 --- a/tests/test_osmnx.py +++ b/tests/test_osmnx.py @@ -15,13 +15,13 @@ import tempfile from collections import OrderedDict from pathlib import Path -from xml.etree import ElementTree import geopandas as gpd import networkx as nx import numpy as np import pandas as pd import pytest +from lxml import etree from requests.exceptions import ConnectionError from shapely import wkt from shapely.geometry import Point @@ -52,17 +52,18 @@ def test_logging() -> None: """Test the logger.""" - ox.log("test a fake default message") - ox.log("test a fake debug", level=lg.DEBUG) - ox.log("test a fake info", level=lg.INFO) - ox.log("test a fake warning", level=lg.WARNING) - ox.log("test a fake error", level=lg.ERROR) + ox.utils.log("test a fake default message") + ox.utils.log("test a fake debug", level=lg.DEBUG) + ox.utils.log("test a fake info", level=lg.INFO) + ox.utils.log("test a fake warning", level=lg.WARNING) + ox.utils.log("test a fake error", level=lg.ERROR) - ox.citation(style="apa") - ox.citation(style="bibtex") - ox.citation(style="ieee") - ox.ts(style="date") - ox.ts(style="time") + ox.utils.citation(style="apa") + ox.utils.citation(style="bibtex") + ox.utils.citation(style="ieee") + ox.utils.ts(style="iso8601") + ox.utils.ts(style="date") + ox.utils.ts(style="time") def test_exceptions() -> None: @@ -164,36 +165,29 @@ def test_osm_xml() -> None: Path.unlink(Path(temp_filename)) # test .osm xml saving - G = ox.graph_from_point(location_point, dist=500, network_type="drive") + G = ox.graph_from_point(location_point, dist=500, network_type="drive", simplify=False) fp = Path(ox.settings.data_folder) / "graph.osm" - ox.save_graph_xml(G, merge_edges=False, filepath=fp) # issues UserWarning - G = ox.graph_from_xml(fp) # issues UserWarning - - # test osm xml output merge edges - default_all_oneway = ox.settings.all_oneway - ox.settings.all_oneway = True - ox.io.save_graph_xml(G, merge_edges=True, edge_tag_aggs=[("length", "sum")], precision=5) + ox.io.save_graph_xml(G, filepath=fp, way_tag_aggs={"lanes": "sum"}) - # test osm xml output from gdfs - nodes, edges = ox.graph_to_gdfs(G) - ox.io.save_graph_xml((nodes, edges)) - - # test ordered nodes from way - df_uv = pd.DataFrame({"u": [54, 2, 5, 3, 10, 19, 20], "v": [76, 3, 8, 10, 5, 20, 15]}) - ordered_nodes = ox._osm_xml._get_unique_nodes_ordered_from_way(df_uv) - assert ordered_nodes == [2, 3, 10, 5, 8] + # validate saved XML against XSD schema + xsd_filepath = "./tests/input_data/osm_schema.xsd" + parser = etree.XMLParser(schema=etree.XMLSchema(file=xsd_filepath)) + _ = etree.parse(fp, parser=parser) # noqa: S320 # test roundabout handling + default_all_oneway = ox.settings.all_oneway + ox.settings.all_oneway = True default_overpass_settings = ox.settings.overpass_settings ox.settings.overpass_settings += '[date:"2023-04-01T00:00:00Z"]' point = (39.0290346, -84.4696884) + G = ox.graph_from_point(point, dist=500, dist_type="bbox", network_type="drive", simplify=True) + with pytest.raises(ox._errors.GraphSimplificationError): + ox.io.save_graph_xml(G) G = ox.graph_from_point(point, dist=500, dist_type="bbox", network_type="drive", simplify=False) - gdf_edges = ox.graph_to_gdfs(G, nodes=False) - gdf_way = gdf_edges[gdf_edges["osmid"] == 570883705] # roundabout - first = gdf_way.iloc[0].dropna().astype(str) - root = ElementTree.Element("osm", attrib={"version": "0.6", "generator": "OSMnx"}) - edge = ElementTree.SubElement(root, "way") - ox._osm_xml._append_nodes_as_edge_attrs(edge, first.to_dict(), gdf_way) + nx.set_node_attributes(G, 0, name="uid") + ox.io.save_graph_xml(G) + _ = etree.parse(fp, parser=parser) # noqa: S320 + G = ox.graph_from_xml(fp) # issues UserWarning # restore settings ox.settings.overpass_settings = default_overpass_settings