Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expose encoding with default utf-8 for reading xml files #1084

Merged
merged 2 commits into from
Nov 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased

- formally support Python 3.12
- fix Windows-specific character encoding issues when reading XML files

## 1.7.1 (2023-10-29)

Expand Down
6 changes: 4 additions & 2 deletions osmnx/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def features_from_polygon(polygon, tags):
return _create_gdf(response_jsons, polygon, tags)


def features_from_xml(filepath, polygon=None, tags=None):
def features_from_xml(filepath, polygon=None, tags=None, encoding="utf-8"):
"""
Create a GeoDataFrame of OSM features in an OSM-formatted XML file.

Expand Down Expand Up @@ -367,13 +367,15 @@ def features_from_xml(filepath, polygon=None, tags=None):
the area. `tags = {'amenity':True, 'landuse':['retail','commercial'],
'highway':'bus_stop'}` would return all amenities, landuse=retail,
landuse=commercial, and highway=bus_stop.
encoding : string
the XML file's character encoding

Returns
-------
gdf : geopandas.GeoDataFrame
"""
# transmogrify file of OSM XML data into JSON
response_jsons = [osm_xml._overpass_json_from_file(filepath)]
response_jsons = [osm_xml._overpass_json_from_file(filepath, encoding)]

# create GeoDataFrame using this response JSON
return _create_gdf(response_jsons, polygon=polygon, tags=tags)
Expand Down
8 changes: 6 additions & 2 deletions osmnx/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,9 @@ def graph_from_polygon(
return G


def graph_from_xml(filepath, bidirectional=False, simplify=True, retain_all=False):
def graph_from_xml(
filepath, bidirectional=False, simplify=True, retain_all=False, encoding="utf-8"
):
"""
Create a graph from data in a .osm formatted XML file.

Expand All @@ -550,13 +552,15 @@ def graph_from_xml(filepath, bidirectional=False, simplify=True, retain_all=Fals
retain_all : bool
if True, return the entire graph even if it is not connected.
otherwise, retain only the largest weakly connected component.
encoding : string
the XML file's character encoding

Returns
-------
G : networkx.MultiDiGraph
"""
# transmogrify file of OSM XML data into JSON
response_jsons = [osm_xml._overpass_json_from_file(filepath)]
response_jsons = [osm_xml._overpass_json_from_file(filepath, encoding)]

# create graph using this response JSON
G = _create_graph(response_jsons, bidirectional=bidirectional, retain_all=retain_all)
Expand Down
16 changes: 9 additions & 7 deletions osmnx/osm_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,30 +61,32 @@ def endElement(self, name):
self.object["elements"].append(self._element)


def _overpass_json_from_file(filepath):
def _overpass_json_from_file(filepath, encoding):
"""
Read OSM XML from file and return Overpass-like JSON.

Parameters
----------
filepath : string or pathlib.Path
path to file containing OSM XML data
encoding : string
the XML file's character encoding

Returns
-------
OSMContentHandler object
"""

# open the XML file, handling bz2 or regular XML
def _opener(filepath):
def _opener(filepath, encoding):
if filepath.suffix == ".bz2":
return bz2.BZ2File(filepath)
return bz2.open(filepath, mode="rt", encoding=encoding)

# otherwise just open it if it's not bz2
return filepath.open()
return filepath.open(encoding=encoding)

# warn if this XML file was generated by OSMnx itself
with _opener(Path(filepath)) as f:
with _opener(Path(filepath), encoding) as f:
root_attrs = ET.parse(f).getroot().attrib
if "generator" in root_attrs and "OSMnx" in root_attrs["generator"]:
warn(
Expand All @@ -97,7 +99,7 @@ def _opener(filepath):
)

# parse the XML to Overpass-like JSON
with _opener(Path(filepath)) as f:
with _opener(Path(filepath), encoding) as f:
handler = _OSMContentHandler()
xml.sax.parse(f, handler)
return handler.object
Expand Down Expand Up @@ -187,7 +189,7 @@ def _save_graph_xml(
precision=6,
):
"""
Save graph to disk as an OSM-formatted XML .osm file.
Save graph to disk as an OSM-formatted UTF-8 encoded XML .osm file.

Parameters
----------
Expand Down