From 1d906b815b214a37989705b598101c74fdd3650e Mon Sep 17 00:00:00 2001 From: Rich Bielby Date: Tue, 28 Jan 2025 17:06:51 +0000 Subject: [PATCH] Added search functionality to get_publications() (#67) * Added search functionality to get_publications() * Updated news.md with development updates * Added no rows returned warning function * Added test for warning_no_rows * Added clean up to remove spaces from search string in get_publications * Updates to get publications tests * Update tests/testthat/test-get_publication.R Co-authored-by: Cam Race <52536248+cjrace@users.noreply.github.com> * Adjusting publication search tests based on PR comments * Redoing a bit I accidentally undid on the publication search testing --------- Co-authored-by: Cam Race <52536248+cjrace@users.noreply.github.com> --- NEWS.md | 5 +++ R/api_url.R | 10 +++++- R/get_publications.R | 31 +++++++++++++++++- R/http_request_error.R | 12 ++++--- R/warning_max_pages.R | 3 +- R/warning_no_rows.R | 23 ++++++++++++++ man/api_url.Rd | 4 +++ man/get_publications.Rd | 5 +++ man/warning_max_pages.Rd | 3 ++ man/warning_no_rows.Rd | 24 ++++++++++++++ tests/testthat/test-get_publication.R | 46 ++++++++++++++++++++++++--- tests/testthat/test-query_dataset.R | 5 ++- tests/testthat/test-warning_no_rows.R | 6 ++++ 13 files changed, 163 insertions(+), 14 deletions(-) create mode 100644 R/warning_no_rows.R create mode 100644 man/warning_no_rows.Rd create mode 100644 tests/testthat/test-warning_no_rows.R diff --git a/NEWS.md b/NEWS.md index 892d143..f62638d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # eesyapi (development version) +* Added `search` parameter to `get_publications()` to allow filtering on publication title text. +* Cleaned up behaviour of `query_dataset()` to only use geographies as a parameter, rather than +geographies, locations and geographic_levels. +* Moved all background functions to be internal. + # eesyapi 0.4.0 * Optimisation to parsing of JSON responses in `query_dataset()` diff --git a/R/api_url.R b/R/api_url.R index 7ac7065..6a79c96 100644 --- a/R/api_url.R +++ b/R/api_url.R @@ -14,6 +14,8 @@ #' #' @param endpoint Name of endpoint, can be "get-publications", "get-data-catalogue", #' "get-summary", "get-meta", "get-csv", "get-data" or "post-data" +#' @param search String for filtering the publication list for publication titles and summaries +#' containing the search string provided (strings separated by spaces are combined with OR logic). #' @param publication_id ID of the publication to be connected to. This is required if the #' endpoint is "get-data-catalogue" #' @param dataset_id ID of data set to be connected to. This is required if the endpoint is one @@ -50,6 +52,7 @@ #' ) api_url <- function( endpoint = "get-publications", + search = NULL, publication_id = NULL, dataset_id = NULL, indicators = NULL, @@ -124,7 +127,12 @@ api_url <- function( url <- paste0( endpoint_base_version, "publications?", - api_url_pages(page_size = page_size, page = page) + api_url_pages(page_size = page_size, page = page), + ifelse( + !is.null(search), + paste0("&search=", search), + "" + ) ) } else if (endpoint == "get-data-catalogue") { url <- paste0( diff --git a/R/get_publications.R b/R/get_publications.R index daf8fdc..c36db4d 100644 --- a/R/get_publications.R +++ b/R/get_publications.R @@ -7,17 +7,43 @@ #' #' @examples #' get_publications() +#' get_publications(search = "attendance") get_publications <- function( ees_environment = NULL, api_version = NULL, + search = NULL, page_size = 40, page = NULL, verbose = FALSE) { validate_page_size(page_size) + if (!is.null(search)) { + # Replacing any spaces with dashes, this will combine separate strings as an OR query. + search <- stringr::str_replace_all(search, " ", "-") + search_substr <- search |> + strsplit("-") |> + magrittr::extract2(1) + search_substr_lengths <- search_substr |> + stringr::str_length() + # Check for any of the search strings being shorten than 3 characters. + if (all(search_substr_lengths < 3)) { + stop( + "Individual search string(s) must be 3 characters or longer: \"", + paste0(search_substr[search_substr_lengths < 3], collapse = "\", \""), + "\"." + ) + } else if (any(search_substr_lengths < 3)) { + warning( + "The API search will ignore any search strings less than 3 chars in length: \"", + paste0(search_substr[search_substr_lengths < 3], collapse = "\", \""), + "\"." + ) + } + } response <- httr::GET( api_url( ees_environment = ees_environment, api_version = api_version, + search = search, page_size = page_size, page = page, verbose = verbose @@ -33,6 +59,7 @@ get_publications <- function( api_url( ees_environment = ees_environment, api_version = api_version, + search = search, page_size = page_size, page = page, verbose = verbose @@ -45,6 +72,8 @@ get_publications <- function( } } } - response |> warning_max_pages() + response |> + warning_max_pages() |> + warning_no_rows() return(response$results) } diff --git a/R/http_request_error.R b/R/http_request_error.R index 4b4895e..a1cd923 100644 --- a/R/http_request_error.R +++ b/R/http_request_error.R @@ -57,11 +57,13 @@ http_request_error <- function( error_message, ifelse( "items" %in% names(error_detail), - paste0("\n Error items: ",error_detail |> - dplyr::pull("items") |> - unlist() |> - paste0(collapse = ", ") - ), + paste0( + "\n Error items: ", + error_detail |> + dplyr::pull("items") |> + unlist() |> + paste0(collapse = ", ") + ), "" ), ifelse( diff --git a/R/warning_max_pages.R b/R/warning_max_pages.R index 9ab7bd5..9a9793b 100644 --- a/R/warning_max_pages.R +++ b/R/warning_max_pages.R @@ -2,7 +2,7 @@ #' #' @param api_result Output from an API get query #' -#' @return NULL +#' @return Original input (api_result) unchanged #' #' @keywords internal #' @@ -23,4 +23,5 @@ warning_max_pages <- function(api_result) { ) ) } + return(api_result) } diff --git a/R/warning_no_rows.R b/R/warning_no_rows.R new file mode 100644 index 0000000..182214d --- /dev/null +++ b/R/warning_no_rows.R @@ -0,0 +1,23 @@ +#' Warn on zero rows returned +#' +#' @param api_result Output from an API get query +#' +#' @return Original input (api_result) unchanged +#' +#' @keywords internal +#' +#' @examples +#' response_page <- httr::GET(api_url("get-publications", search = "bob")) |> +#' httr::content("text") |> +#' jsonlite::fromJSON() |> +#' eesyapi:::warning_no_rows() +warning_no_rows <- function(api_result) { + if (api_result$paging$totalResults == 0) { + warning( + paste0( + "Your query returned zero rows." + ) + ) + } + return(api_result) +} diff --git a/man/api_url.Rd b/man/api_url.Rd index 9cf347f..d723d86 100644 --- a/man/api_url.Rd +++ b/man/api_url.Rd @@ -6,6 +6,7 @@ \usage{ api_url( endpoint = "get-publications", + search = NULL, publication_id = NULL, dataset_id = NULL, indicators = NULL, @@ -25,6 +26,9 @@ api_url( \item{endpoint}{Name of endpoint, can be "get-publications", "get-data-catalogue", "get-summary", "get-meta", "get-csv", "get-data" or "post-data"} +\item{search}{String for filtering the publication list for publication titles and summaries +containing the search string provided (strings separated by spaces are combined with OR logic).} + \item{publication_id}{ID of the publication to be connected to. This is required if the endpoint is "get-data-catalogue"} diff --git a/man/get_publications.Rd b/man/get_publications.Rd index 78e6e47..fc36816 100644 --- a/man/get_publications.Rd +++ b/man/get_publications.Rd @@ -7,6 +7,7 @@ get_publications( ees_environment = NULL, api_version = NULL, + search = NULL, page_size = 40, page = NULL, verbose = FALSE @@ -17,6 +18,9 @@ get_publications( \item{api_version}{EES API version} +\item{search}{String for filtering the publication list for publication titles and summaries +containing the search string provided (strings separated by spaces are combined with OR logic).} + \item{page_size}{Number of results to return in a single query} \item{page}{Page number of query results to return} @@ -31,4 +35,5 @@ Get publications } \examples{ get_publications() +get_publications(search = "attendance") } diff --git a/man/warning_max_pages.Rd b/man/warning_max_pages.Rd index c293d74..612a01e 100644 --- a/man/warning_max_pages.Rd +++ b/man/warning_max_pages.Rd @@ -9,6 +9,9 @@ warning_max_pages(api_result) \arguments{ \item{api_result}{Output from an API get query} } +\value{ +Original input (api_result) unchanged +} \description{ Overshot maximum pages in results } diff --git a/man/warning_no_rows.Rd b/man/warning_no_rows.Rd new file mode 100644 index 0000000..25ec5dd --- /dev/null +++ b/man/warning_no_rows.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/warning_no_rows.R +\name{warning_no_rows} +\alias{warning_no_rows} +\title{Warn on zero rows returned} +\usage{ +warning_no_rows(api_result) +} +\arguments{ +\item{api_result}{Output from an API get query} +} +\value{ +Original input (api_result) unchanged +} +\description{ +Warn on zero rows returned +} +\examples{ +response_page <- httr::GET(api_url("get-publications", search = "bob")) |> + httr::content("text") |> + jsonlite::fromJSON() |> + eesyapi:::warning_no_rows() +} +\keyword{internal} diff --git a/tests/testthat/test-get_publication.R b/tests/testthat/test-get_publication.R index 4b49fc7..bb0549c 100644 --- a/tests/testthat/test-get_publication.R +++ b/tests/testthat/test-get_publication.R @@ -1,10 +1,8 @@ # Check the get_publication_catalogue() function returns the expected data -# WARNING: This depends on live data, so may fail due to real-life changes. -# If that's the case, take a new snapshot by running seed_tests() test_that("Retrieve publication list", { - expect_equal( - get_publications(), - readRDS("testdata/example_publication_catalogue.rds") + expect_gt( + get_publications() |> nrow(), + 0 ) }) @@ -17,3 +15,41 @@ test_that("Retrieve data set list for publication", { readRDS("testdata/example_publication_datasets.rds") ) }) + +test_that("Search doesn't return anything it shouldn't", { + expect_equal( + get_publications(search = "attendance") |> + dplyr::filter( + !grepl("attendance", title, ignore.case = TRUE), + !grepl("attendance", summary, ignore.case = TRUE) + ) |> + nrow(), + 0 + ) +}) + + +test_that("Search doesn't return anything it shouldn't", { + result <- get_publications(search = "attendance") + expect_equal( + result |> + dplyr::mutate(title_summary = paste(title, summary)) |> + dplyr::filter( + grepl("attendance", title_summary, ignore.case = TRUE) + ) |> + nrow(), + nrow(result) + ) +}) + +test_that("Search throws an error if all search terms are less than 3 characters", { + expect_error( + get_publications(search = "AP") + ) +}) + +test_that("Search throws a warning if any search term is less than 3 characters", { + expect_warning( + get_publications(search = "api d") + ) +}) diff --git a/tests/testthat/test-query_dataset.R b/tests/testthat/test-query_dataset.R index ad91b75..496b548 100644 --- a/tests/testthat/test-query_dataset.R +++ b/tests/testthat/test-query_dataset.R @@ -157,7 +157,10 @@ test_that("Test filter-combinations POST dataset query", { test_that("Indicators not found in data set", { expect_error( query_dataset(example_id(), indicators = c("uywet", "uywed")), - "\nHTTP connection error: 400\nOne or more indicators could not be found.\n Error items: uywet, uywed" + paste0( + "\nHTTP connection error: 400\nOne or more indicators could not be found.", + "\n Error items: uywet, uywed" + ) ) }) diff --git a/tests/testthat/test-warning_no_rows.R b/tests/testthat/test-warning_no_rows.R new file mode 100644 index 0000000..f7715a7 --- /dev/null +++ b/tests/testthat/test-warning_no_rows.R @@ -0,0 +1,6 @@ +test_that("Get publications returns warning when no rows returned", { + expect_warning( + x <- get_publications(search = "z3x4c5v6b7"), + "Your query returned zero rows." + ) +})