Skip to content

Commit

Permalink
Merge pull request #54 from ropensci/encodingUTF8
Browse files Browse the repository at this point in the history
Set encoding to UTF-8 for tags and user names in returned data.frames
  • Loading branch information
jmaspons authored Jul 25, 2024
2 parents 3b6d998 + 4affa55 commit 790e351
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: osmapiR
Title: 'OpenStreetMap' API
Version: 0.1.0.9017
Version: 0.1.0.9018
Authors@R: c(
person("Joan", "Maspons", , "[email protected]", role = c("aut", "cre", "cph"),
comment = c(ORCID = "0000-0003-2286-8727")),
Expand Down
9 changes: 5 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
* Fix changesets' bbox in `st_as_sf.osmapi_chagesets()` ([84f16e7a](https://github.com/ropensci/osmapiR/commit/84f16e7adda087ab707cc2644c79ff1590cf307e)).
* Implement NA bboxes in `st_as_sf.osmapi_chagesets()` ([7ea4f5d7](https://github.com/ropensci/osmapiR/commit/7ea4f5d7f412ef8cf7691741b836cf45ddeb61f2)).
* Remove dontrun in examples that don't require authentication (#47).
* Improve performance when parsing gpx data to data.frame (#48)
* Parse TrackPointExtension data from gpx if available (#49)
* Tweaks in DESCRIPTION and CITATION files by @Maelle (#50, #51)
* Sort OSM objects in `osm_get_objects(..., full_objects = TRUE)` and optimize (#52)
* Improve performance when parsing gpx data to data.frame (#48).
* Parse TrackPointExtension data from gpx if available (#49).
* Tweaks in DESCRIPTION and CITATION files by @Maelle (#50, #51).
* Sort OSM objects in `osm_get_objects(..., full_objects = TRUE)` and optimize (#52).
* Set encoding to UTF-8 for tags and user names in returned data.frames (#54).


# osmapiR 0.1.0
Expand Down
1 change: 1 addition & 0 deletions R/osmapi_user_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ osm_get_preferences_user <- function(key, format = c("R", "xml", "json")) {

if (!missing(key)) {
out <- httr2::resp_body_string(resp)
out <- enc2utf8(out)
} else if (format %in% c("R", "xml")) {
out <- httr2::resp_body_xml(resp)
if (format == "R") {
Expand Down
30 changes: 28 additions & 2 deletions R/xml_to_R.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ tags_xml2mat_wide <- function(xml_nodeset) {
tagV <- vapply(tag, function(x) x, FUN.VALUE = character(2))
m[i, tagV[1, ]] <- tagV[2, ]
}
m <- enc2utf8(m)

return(m)
}
Expand All @@ -23,6 +24,8 @@ tags_xml2list_df <- function(xml_nodeset) {
)),
names = c("key", "value")
)
tags_df$key <- enc2utf8(tags_df$key)
tags_df$value <- enc2utf8(tags_df$value)

class(tags_df) <- c("tags_df", "data.frame")

Expand All @@ -35,7 +38,8 @@ tags_xml2list_df <- function(xml_nodeset) {

## Changesets ----

# osm_download_changeset() in osmChange xml format. Not related
# For osm_get_changesets() & osm_query_changesets().
# osm_download_changeset() in osmChange xml format not related

changeset_xml2DF <- function(xml, tags_in_columns = FALSE) {
changesets <- xml2::xml_children(xml)
Expand Down Expand Up @@ -67,6 +71,7 @@ changeset_xml2DF <- function(xml, tags_in_columns = FALSE) {
out$changes_count <- as.integer(out$changes_count)
out$created_at <- as.POSIXct(out$created_at, format = "%Y-%m-%dT%H:%M:%OS", tz = "GMT")
out$closed_at <- as.POSIXct(out$closed_at, format = "%Y-%m-%dT%H:%M:%OS", tz = "GMT")
out$user <- enc2utf8(out$user)

discussion <- xml2::xml_child(changesets, "discussion")

Expand All @@ -79,6 +84,8 @@ changeset_xml2DF <- function(xml, tags_in_columns = FALSE) {
comment_text <- xml2::xml_text(xml2::xml_child(x, "text"))
dis <- data.frame(comment_attrs, comment_text)
dis$date <- as.POSIXct(dis$date, format = "%Y-%m-%dT%H:%M:%OS", tz = "GMT")
dis$user <- enc2utf8(dis$user)
dis$comment_text <- enc2utf8(dis$comment_text)

class(dis) <- c("changeset_comments", "data.frame")

Expand Down Expand Up @@ -202,6 +209,7 @@ object_xml2DF <- function(xml, tags_in_columns = FALSE) {
out$visible <- ifelse(out$visible == "true", TRUE, FALSE)
out$version <- as.integer(out$version)
out$timestamp <- as.POSIXct(out$timestamp, format = "%Y-%m-%dT%H:%M:%OS", tz = "GMT")
out$user <- enc2utf8(out$user)

members <- vector("list", length = length(objects))
members[object_type == "way"] <- lapply(objects[object_type == "way"], function(x) {
Expand Down Expand Up @@ -270,11 +278,17 @@ gpx_meta_xml2DF <- function(xml) {

gpx_attrs <- do.call(rbind, xml2::xml_attrs(gpx_files))
description <- xml2::xml_text(xml2::xml_child(gpx_files, "description"))
tags <- lapply(xml2::xml_find_all(gpx_files, ".//tag", flatten = FALSE), xml2::xml_text)
tags <- lapply(xml2::xml_find_all(gpx_files, ".//tag", flatten = FALSE), function(x) {
x <- xml2::xml_text(x)
enc2utf8(x)
})

out <- data.frame(gpx_attrs, description)
out$timestamp <- as.POSIXct(out$timestamp, format = "%Y-%m-%dT%H:%M:%OS", tz = "GMT")
out$pending <- ifelse(out$pending == "true", TRUE, FALSE)
out$name <- enc2utf8(out$name)
out$user <- enc2utf8(out$user)
out$description <- enc2utf8(out$description)

out$tags <- tags

Expand Down Expand Up @@ -380,6 +394,7 @@ trk_xml2df <- function(trk) {

details <- xml2::xml_find_all(trk, "./*[not(name() = 'trkseg')]") # no trkseg nodes
trk_details <- stats::setNames(xml2::xml_text(details), nm = xml2::xml_name(details))
trk_details <- enc2utf8(trk_details)
if (length(trk_details)) {
names(trk_details) <- paste0("track_", names(trk_details))
}
Expand Down Expand Up @@ -476,6 +491,9 @@ user_details_xml2DF <- function(xml) {

out$account_created <- as.POSIXct(out$account_created, format = "%Y-%m-%dT%H:%M:%OS", tz = "GMT")

out$display_name <- enc2utf8(out$display_name)
out$description <- enc2utf8(out$description)

return(out)
}

Expand Down Expand Up @@ -515,6 +533,9 @@ logged_user_details_xml2list <- function(xml) {
)
)

out$user["display_name"] <- enc2utf8(out$user["display_name"])
out$description <- enc2utf8(out$description)

return(out)
}

Expand All @@ -529,6 +550,8 @@ user_preferences_xml2DF <- function(xml) {
as.data.frame(do.call(rbind, xml2::xml_attrs(preference))),
names = c("key", "value")
)
out$key <- enc2utf8(out$key)
out$value <- enc2utf8(out$value)

return(out)
}
Expand Down Expand Up @@ -566,6 +589,9 @@ note_xml2DF <- function(xml) {

comm <- data.frame(date, uid, user, user_url, action, text, html)
comm$date <- as.POSIXct(comm$date, format = "%Y-%m-%d %H:%M:%OS", tz = "GMT")
comm$user <- enc2utf8(comm$user)
comm$text <- enc2utf8(comm$text)
comm$html <- enc2utf8(comm$html)

class(comm) <- c("note_comments", "data.frame")

Expand Down
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"codeRepository": "https://github.com/ropensci/osmapiR",
"issueTracker": "https://github.com/ropensci/osmapiR/issues",
"license": "https://spdx.org/licenses/GPL-3.0",
"version": "0.1.0.9017",
"version": "0.1.0.9018",
"programmingLanguage": {
"@type": "ComputerLanguage",
"name": "R",
Expand Down Expand Up @@ -180,7 +180,7 @@
"name": "osmapiR: OpenStreetMap API",
"identifier": "10.32614/CRAN.package.osmapiR",
"url": "https://docs.ropensci.org/osmapiR/",
"description": "R package version 0.1.0.9017 \nhttps://github.com/ropensci/osmapiR",
"description": "R package version 0.1.0.9018 \nhttps://github.com/ropensci/osmapiR",
"@id": "https://doi.org/10.32614/CRAN.package.osmapiR",
"sameAs": "https://doi.org/10.32614/CRAN.package.osmapiR"
}
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ userName
usethis
usr
usrs
utf
uvAy
vals
vapply
Expand Down

0 comments on commit 790e351

Please sign in to comment.