Skip to content

Commit

Permalink
expose encoding argument with default utf-8 for reading xml files
Browse files Browse the repository at this point in the history
  • Loading branch information
gboeing committed Nov 18, 2023
1 parent ef5d465 commit 64d0fcd
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
6 changes: 4 additions & 2 deletions osmnx/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def features_from_polygon(polygon, tags):
return _create_gdf(response_jsons, polygon, tags)


def features_from_xml(filepath, polygon=None, tags=None):
def features_from_xml(filepath, polygon=None, tags=None, encoding="utf-8"):
"""
Create a GeoDataFrame of OSM features in an OSM-formatted XML file.
Expand Down Expand Up @@ -367,13 +367,15 @@ def features_from_xml(filepath, polygon=None, tags=None):
the area. `tags = {'amenity':True, 'landuse':['retail','commercial'],
'highway':'bus_stop'}` would return all amenities, landuse=retail,
landuse=commercial, and highway=bus_stop.
encoding : string
the XML file's character encoding
Returns
-------
gdf : geopandas.GeoDataFrame
"""
# transmogrify file of OSM XML data into JSON
response_jsons = [osm_xml._overpass_json_from_file(filepath)]
response_jsons = [osm_xml._overpass_json_from_file(filepath, encoding)]

# create GeoDataFrame using this response JSON
return _create_gdf(response_jsons, polygon=polygon, tags=tags)
Expand Down
8 changes: 6 additions & 2 deletions osmnx/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,9 @@ def graph_from_polygon(
return G


def graph_from_xml(filepath, bidirectional=False, simplify=True, retain_all=False):
def graph_from_xml(
filepath, bidirectional=False, simplify=True, retain_all=False, encoding="utf-8"
):
"""
Create a graph from data in a .osm formatted XML file.
Expand All @@ -550,13 +552,15 @@ def graph_from_xml(filepath, bidirectional=False, simplify=True, retain_all=Fals
retain_all : bool
if True, return the entire graph even if it is not connected.
otherwise, retain only the largest weakly connected component.
encoding : string
the XML file's character encoding
Returns
-------
G : networkx.MultiDiGraph
"""
# transmogrify file of OSM XML data into JSON
response_jsons = [osm_xml._overpass_json_from_file(filepath)]
response_jsons = [osm_xml._overpass_json_from_file(filepath, encoding)]

# create graph using this response JSON
G = _create_graph(response_jsons, bidirectional=bidirectional, retain_all=retain_all)
Expand Down
16 changes: 9 additions & 7 deletions osmnx/osm_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,30 +61,32 @@ def endElement(self, name):
self.object["elements"].append(self._element)


def _overpass_json_from_file(filepath):
def _overpass_json_from_file(filepath, encoding):
"""
Read OSM XML from file and return Overpass-like JSON.
Parameters
----------
filepath : string or pathlib.Path
path to file containing OSM XML data
encoding : string
the XML file's character encoding
Returns
-------
OSMContentHandler object
"""

# open the XML file, handling bz2 or regular XML
def _opener(filepath):
def _opener(filepath, encoding):
if filepath.suffix == ".bz2":
return bz2.BZ2File(filepath)
return bz2.open(filepath, mode="rt", encoding=encoding)

# otherwise just open it if it's not bz2
return filepath.open()
return filepath.open(encoding=encoding)

# warn if this XML file was generated by OSMnx itself
with _opener(Path(filepath)) as f:
with _opener(Path(filepath), encoding) as f:
root_attrs = ET.parse(f).getroot().attrib
if "generator" in root_attrs and "OSMnx" in root_attrs["generator"]:
warn(
Expand All @@ -97,7 +99,7 @@ def _opener(filepath):
)

# parse the XML to Overpass-like JSON
with _opener(Path(filepath)) as f:
with _opener(Path(filepath), encoding) as f:
handler = _OSMContentHandler()
xml.sax.parse(f, handler)
return handler.object
Expand Down Expand Up @@ -187,7 +189,7 @@ def _save_graph_xml(
precision=6,
):
"""
Save graph to disk as an OSM-formatted XML .osm file.
Save graph to disk as an OSM-formatted UTF-8 encoded XML .osm file.
Parameters
----------
Expand Down

0 comments on commit 64d0fcd

Please sign in to comment.