diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ccbfaa4c..9ef1e5c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased - formally support Python 3.12 +- fix Windows-specific character encoding issues when reading XML files ## 1.7.1 (2023-10-29) diff --git a/osmnx/features.py b/osmnx/features.py index 4440fbc00..d47808fd9 100644 --- a/osmnx/features.py +++ b/osmnx/features.py @@ -336,7 +336,7 @@ def features_from_polygon(polygon, tags): return _create_gdf(response_jsons, polygon, tags) -def features_from_xml(filepath, polygon=None, tags=None): +def features_from_xml(filepath, polygon=None, tags=None, encoding="utf-8"): """ Create a GeoDataFrame of OSM features in an OSM-formatted XML file. @@ -367,13 +367,15 @@ def features_from_xml(filepath, polygon=None, tags=None): the area. `tags = {'amenity':True, 'landuse':['retail','commercial'], 'highway':'bus_stop'}` would return all amenities, landuse=retail, landuse=commercial, and highway=bus_stop. + encoding : string + the XML file's character encoding Returns ------- gdf : geopandas.GeoDataFrame """ # transmogrify file of OSM XML data into JSON - response_jsons = [osm_xml._overpass_json_from_file(filepath)] + response_jsons = [osm_xml._overpass_json_from_file(filepath, encoding)] # create GeoDataFrame using this response JSON return _create_gdf(response_jsons, polygon=polygon, tags=tags) diff --git a/osmnx/graph.py b/osmnx/graph.py index c85d4e27d..e7241a774 100644 --- a/osmnx/graph.py +++ b/osmnx/graph.py @@ -530,7 +530,9 @@ def graph_from_polygon( return G -def graph_from_xml(filepath, bidirectional=False, simplify=True, retain_all=False): +def graph_from_xml( + filepath, bidirectional=False, simplify=True, retain_all=False, encoding="utf-8" +): """ Create a graph from data in a .osm formatted XML file. @@ -550,13 +552,15 @@ def graph_from_xml(filepath, bidirectional=False, simplify=True, retain_all=Fals retain_all : bool if True, return the entire graph even if it is not connected. otherwise, retain only the largest weakly connected component. + encoding : string + the XML file's character encoding Returns ------- G : networkx.MultiDiGraph """ # transmogrify file of OSM XML data into JSON - response_jsons = [osm_xml._overpass_json_from_file(filepath)] + response_jsons = [osm_xml._overpass_json_from_file(filepath, encoding)] # create graph using this response JSON G = _create_graph(response_jsons, bidirectional=bidirectional, retain_all=retain_all) diff --git a/osmnx/osm_xml.py b/osmnx/osm_xml.py index b51338f82..7a0b2e28e 100644 --- a/osmnx/osm_xml.py +++ b/osmnx/osm_xml.py @@ -61,7 +61,7 @@ def endElement(self, name): self.object["elements"].append(self._element) -def _overpass_json_from_file(filepath): +def _overpass_json_from_file(filepath, encoding): """ Read OSM XML from file and return Overpass-like JSON. @@ -69,6 +69,8 @@ def _overpass_json_from_file(filepath): ---------- filepath : string or pathlib.Path path to file containing OSM XML data + encoding : string + the XML file's character encoding Returns ------- @@ -76,15 +78,15 @@ def _overpass_json_from_file(filepath): """ # open the XML file, handling bz2 or regular XML - def _opener(filepath): + def _opener(filepath, encoding): if filepath.suffix == ".bz2": - return bz2.BZ2File(filepath) + return bz2.open(filepath, mode="rt", encoding=encoding) # otherwise just open it if it's not bz2 - return filepath.open() + return filepath.open(encoding=encoding) # warn if this XML file was generated by OSMnx itself - with _opener(Path(filepath)) as f: + with _opener(Path(filepath), encoding) as f: root_attrs = ET.parse(f).getroot().attrib if "generator" in root_attrs and "OSMnx" in root_attrs["generator"]: warn( @@ -97,7 +99,7 @@ def _opener(filepath): ) # parse the XML to Overpass-like JSON - with _opener(Path(filepath)) as f: + with _opener(Path(filepath), encoding) as f: handler = _OSMContentHandler() xml.sax.parse(f, handler) return handler.object @@ -187,7 +189,7 @@ def _save_graph_xml( precision=6, ): """ - Save graph to disk as an OSM-formatted XML .osm file. + Save graph to disk as an OSM-formatted UTF-8 encoded XML .osm file. Parameters ----------