From 38eb8e2100af335d45d59a9afec53f7afa2a5515 Mon Sep 17 00:00:00 2001 From: Cam Race <52536248+cjrace@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:44:15 +0000 Subject: [PATCH] Add API character limit warnings (#146) * initial working * finish off adding api character warnings * recomment debugging file and touch others to try to reset line endings * touch files to reset line endings * touch the main tests file again after restart * add a git attributes file for the repo * update gitattributes to ignore the www folder * renormalize files I accidentally lf-ed --- .gitattributes | 6 + R/mainTests.r | 196 +++++++++++++++++- manifest.json | 47 ++++- server.R | 2 +- .../example_files-002.json | 2 +- .../testthat/mainTests/filter_item_length.csv | 4 + .../mainTests/filter_item_length.meta.csv | 4 + .../mainTests/location_code_length.csv | 4 + .../mainTests/location_code_length.meta.csv | 3 + .../mainTests/location_name_length.csv | 4 + .../mainTests/location_name_length.meta.csv | 3 + .../mainTests/variable_label_length.csv | 4 + .../mainTests/variable_label_length.meta.csv | 4 + .../mainTests/variable_name_length.csv | 4 + .../mainTests/variable_name_length.meta.csv | 4 + tests/testthat/test-function-edge_cases.R | 2 + tests/testthat/test-function-mainTests.R | 20 ++ 17 files changed, 302 insertions(+), 11 deletions(-) create mode 100644 .gitattributes create mode 100644 tests/testthat/mainTests/filter_item_length.csv create mode 100644 tests/testthat/mainTests/filter_item_length.meta.csv create mode 100644 tests/testthat/mainTests/location_code_length.csv create mode 100644 tests/testthat/mainTests/location_code_length.meta.csv create mode 100644 tests/testthat/mainTests/location_name_length.csv create mode 100644 tests/testthat/mainTests/location_name_length.meta.csv create mode 100644 tests/testthat/mainTests/variable_label_length.csv create mode 100644 tests/testthat/mainTests/variable_label_length.meta.csv create mode 100644 tests/testthat/mainTests/variable_name_length.csv create mode 100644 tests/testthat/mainTests/variable_name_length.meta.csv diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e00dfc0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# Set default behaviour incase anyone doesn't have core.autocrlf set +* text eol=crlf + +# Mark all files in the www folder as binary to avoid messing with non text files +www/* -text +tests/testthat/test-data/passes_everything.xlsx -text \ No newline at end of file diff --git a/R/mainTests.r b/R/mainTests.r index 9a043db..c2f7b1f 100644 --- a/R/mainTests.r +++ b/R/mainTests.r @@ -1,6 +1,5 @@ # mainTests ------------------------------------- # main tests functions - mainTests <- function(data_character, meta_character, datafile, metafile) { as_tibble(t(rbind( cbind( @@ -62,7 +61,14 @@ mainTests <- function(data_character, meta_character, datafile, metafile) { ethnicity_values(datafile), # active test ethnicity_characteristic_group(datafile), # active test ethnicity_characteristic_values(datafile), # active test - indicators_smushed(metafile) # active test + indicators_smushed(metafile), # active test + + # API specific tests, though could be standard for everyone at some point + variable_name_length(metafile), # active test + variable_label_length(metafile), # active test + filter_item_length(datafile, metafile), # active test + location_name_length(datafile), # active test + location_code_length(datafile) # active test ), "stage" = "mainTests", "test" = c(activeTests$`R/mainTests.r`) @@ -2801,3 +2807,189 @@ indicators_smushed <- function(meta) { return(output) } + +#' Validate length of filters and indicators +#' +#' @param meta +variable_name_length <- function(meta) { + lengths_table <- data.table( + "variable_name" = meta$col_name, + "length" = nchar(meta$col_name) + ) + + names_too_long <- lengths_table[length > 50, variable_name] + + if (length(names_too_long) == 0) { + output <- list( + "message" = "All variable names are 50 characters or fewer.", + "result" = "PASS" + ) + } else { + if (length(names_too_long) == 1) { + output <- list( + "message" = paste0("The following variable name is over 50 characters, this will need shortening before this data can be published through the API: '", paste(names_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } else { + output <- list( + "message" = paste0("The following variable names are over 50 characters, these will need shortening before this data can be published through the API: '", paste0(names_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } + } + + return(output) +} + +#' Validate length of filter and indicator labels +#' +#' @param meta +variable_label_length <- function(meta) { + lengths_table <- data.table( + "variable_label" = meta$label, + "length" = unlist(lapply(meta$label, nchar), use.names = FALSE) + ) + + labels_too_long <- lengths_table[length > 80, variable_label] + + if (length(labels_too_long) == 0) { + output <- list( + "message" = "All variable labels are 80 characters or fewer.", + "result" = "PASS" + ) + } else { + if (length(labels_too_long) == 1) { + output <- list( + "message" = paste0("The following variable label is over 80 characters, this will need shortening before this data can be published through the API: '", paste(labels_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } else { + output <- list( + "message" = paste0("The following variable labels are over 80 characters, these will need shortening before this data can be published through the API: '", paste0(labels_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } + } + + return(output) +} + +#' Validate length of filter items +#' +#' @param data +#' @param meta +filter_item_length <- function(data, meta) { + filters <- meta[col_type == "Filter"]$col_name + + filter_items <- data |> + select(all_of(filters)) |> + unlist(use.names = FALSE) + + lengths_table <- data.frame( + "filter_item" = filter_items, + "length" = unlist(lapply(filter_items, nchar), use.names = FALSE) + ) + + lengths_too_long <- lengths_table[lengths_table$length > 120, "filter_item"] + + if (length(lengths_too_long) == 0) { + output <- list( + "message" = "All filter items are 120 characters or fewer.", + "result" = "PASS" + ) + } else { + if (length(lengths_too_long) == 1) { + output <- list( + "message" = paste0("The following filter item is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } else { + output <- list( + "message" = paste0("The following filter items are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } + } +} + + +#' Validate length of location names +#' +#' @param data +location_name_length <- function(data) { + location_names <- data |> + select(any_of(geography_dataframe$name_field)) |> + unlist(use.names = FALSE) |> + purrr::discard(~ is.na(.) | . == "" | . == "NA") + + lengths_table <- data.frame( + "location_name" = location_names, + "length" = unlist(lapply(location_names, nchar), use.names = FALSE) + ) + + lengths_too_long <- lengths_table[lengths_table$length > 120, "location_name"] + + if (length(lengths_too_long) == 0) { + output <- list( + "message" = "All location names are 120 characters or fewer.", + "result" = "PASS" + ) + } else { + if (length(lengths_too_long) == 1) { + output <- list( + "message" = paste0("The following location name is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } else { + output <- list( + "message" = paste0("The following location names are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } + } + + return(output) +} + + +#' Validate length of location codes +#' +#' @param data +location_code_length <- function(data) { + location_code_cols <- c(geography_dataframe$code_field, geography_dataframe$code_field_secondary) |> + purrr::discard(~ is.na(.) | . == "") + + location_codes <- data |> + select(any_of(location_code_cols)) |> + unlist(use.names = FALSE) |> + purrr::discard(~ is.na(.) | . == "" | . == "NA") + + + lengths_table <- data.frame( + "location_code" = location_codes, + "length" = unlist(lapply(location_codes, nchar), use.names = FALSE) + ) + + lengths_too_long <- lengths_table[lengths_table$length > 30, "location_code"] + + if (length(lengths_too_long) == 0) { + output <- list( + "message" = "All location codes are 30 characters or fewer.", + "result" = "PASS" + ) + } else { + if (length(lengths_too_long) == 1) { + output <- list( + "message" = paste0("The following location code is over 30 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } else { + output <- list( + "message" = paste0("The following location codes are over 30 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."), + "result" = "ADVISORY" + ) + } + } + + return(output) +} diff --git a/manifest.json b/manifest.json index a4fbc33..5a6b25b 100644 --- a/manifest.json +++ b/manifest.json @@ -4220,6 +4220,9 @@ } }, "files": { + ".gitattributes": { + "checksum": "c3177b8a6fff470ce0b0378a0dc4c68e" + }, ".github/PULL_REQUEST_TEMPLATE.md": { "checksum": "cbe2be338cd6586dc070e6a77e8e69ab" }, @@ -4233,7 +4236,7 @@ "checksum": "e8d96d5ffd16a6867fd6d7458bd2f905" }, ".Rprofile": { - "checksum": "27de70b9670311feeabe917efcc4b0a5" + "checksum": "1247aefe829eb8e208797eeed763d57f" }, "azure-pipelines-dev.yml": { "checksum": "fc2ef843ff5024bcba25ff84eec05f4c" @@ -4317,7 +4320,7 @@ "checksum": "b9db57c3ebf85dabb5b9e96687206282" }, "R/mainTests.r": { - "checksum": "c1b707b9db41701430b9ed0740e76044" + "checksum": "8f05516284acd631bf9e58dc6b589931" }, "R/manual_scripts/debugging.R": { "checksum": "e56763afdf3659c7c47cfe0e3262e8cf" @@ -4350,7 +4353,7 @@ "checksum": "4fdd4e1b952cd929d2ba64685af21c88" }, "server.R": { - "checksum": "76d87396cd2c1b083696fcac91980a6d" + "checksum": "23fc15ec19183749d64db4406219fbc4" }, "tests/testthat.R": { "checksum": "12e9b09de5275bdc13bde6ae5a82f8b1" @@ -4359,7 +4362,7 @@ "checksum": "3824e397e0b583a9367e306830fccd72" }, "tests/testthat/_snaps/UI-01_example_files/example_files-002.json": { - "checksum": "86cfc414c609ab64a79c1cb7bbdd235b" + "checksum": "e79a0ba571bfbbe17386b6e9cadaa93d" }, "tests/testthat/_snaps/UI-02_edge_cases/edge_cases-001.json": { "checksum": "75228173459ba6f210f07e7ce363ab5e" @@ -4601,6 +4604,12 @@ "tests/testthat/mainTests/filter_hint.meta.csv": { "checksum": "97346a84a5185b12c5a12e9a6ea7b30d" }, + "tests/testthat/mainTests/filter_item_length.csv": { + "checksum": "5acc5c8fe056940ebfe20d8dc15cfc4c" + }, + "tests/testthat/mainTests/filter_item_length.meta.csv": { + "checksum": "beb8bedd0c4f3ef9c5d0997405b41c56" + }, "tests/testthat/mainTests/geographic_catch.csv": { "checksum": "ff410d317b0ab3a637f6a1e7750141e3" }, @@ -4709,6 +4718,18 @@ "tests/testthat/mainTests/lep_combinations.meta.csv": { "checksum": "8ce884f6f73324bd3aacc70567cfcb64" }, + "tests/testthat/mainTests/location_code_length.csv": { + "checksum": "1c441e486859e833f6a75e4ca4fcffec" + }, + "tests/testthat/mainTests/location_code_length.meta.csv": { + "checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207" + }, + "tests/testthat/mainTests/location_name_length.csv": { + "checksum": "f74bf87c35c3667c43675ee7b9df823f" + }, + "tests/testthat/mainTests/location_name_length.meta.csv": { + "checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207" + }, "tests/testthat/mainTests/lsip_combinations.csv": { "checksum": "9f77ca00f1bdb7855a87600e8581a9bf" }, @@ -4853,6 +4874,18 @@ "tests/testthat/mainTests/variable_characteristic.meta.csv": { "checksum": "df80ff86dbbb685cbcb52c64abf418d1" }, + "tests/testthat/mainTests/variable_label_length.csv": { + "checksum": "4fd02e14a65b21b0b41a60bb337dc335" + }, + "tests/testthat/mainTests/variable_label_length.meta.csv": { + "checksum": "2aeeb092aadf1aa12f9dd8470cc45cbd" + }, + "tests/testthat/mainTests/variable_name_length.csv": { + "checksum": "2c811e8c4808248041ffb50ede344854" + }, + "tests/testthat/mainTests/variable_name_length.meta.csv": { + "checksum": "04463d66a3fd9a109ef0867e9e0342e8" + }, "tests/testthat/mainTests/variable_snake_case.csv": { "checksum": "98488233ca477f8445d0b193f1d2b212" }, @@ -5244,7 +5277,7 @@ "checksum": "41a423a4bc68e5ba6c20a11f06092471" }, "tests/testthat/test-function-edge_cases.R": { - "checksum": "967f63745faee40e71b3f5c74a2006e1" + "checksum": "4d43ddb1ccbad14c3cdd3c30ecc9bef4" }, "tests/testthat/test-function-fileValidation.R": { "checksum": "302dd04b308471644c1a6574c3844c81" @@ -5253,7 +5286,7 @@ "checksum": "0ad4c6ef88604b628207156991a92349" }, "tests/testthat/test-function-mainTests.R": { - "checksum": "def2be36fe97859696445a59afbb0725" + "checksum": "cdad605b53d891ce1c2a8494e868fd00" }, "tests/testthat/test-function-preCheck1.R": { "checksum": "ecc3826341427734e1a8f097c1e1e6f8" @@ -5277,7 +5310,7 @@ "checksum": "88baeb581e4bd597d2e9834ea0c3a7c8" }, "www/acalat_theme.css": { - "checksum": "e926d2ff3a5799c06d16a5e3c70b47aa" + "checksum": "8e7009445352b2fd4b44667eac6c48a3" }, "www/builder-duck.PNG": { "checksum": "7574642f76936b645e9ce137ec6a99e9" diff --git a/server.R b/server.R index 2e934b5..1af1fec 100644 --- a/server.R +++ b/server.R @@ -1160,7 +1160,7 @@ server <- function(input, output, session) { symbol_expected <- data.frame(Symbol = gss_symbols) suppress_count <- symbol_expected %>% - left_join(suppress_count) %>% + left_join(suppress_count, by = join_by(Symbol)) %>% mutate_all(~ replace(., is.na(.), 0)) output$suppressed_cell_count_table <- DT::renderDT({ diff --git a/tests/testthat/_snaps/UI-01_example_files/example_files-002.json b/tests/testthat/_snaps/UI-01_example_files/example_files-002.json index ae29d34..308436a 100644 --- a/tests/testthat/_snaps/UI-01_example_files/example_files-002.json +++ b/tests/testthat/_snaps/UI-01_example_files/example_files-002.json @@ -219,7 +219,7 @@ "failed": 1, "ignored": 17, "info": 0, - "passed": 66, + "passed": 71, "progress_message": "Made it to the full screening checks but failed" } } diff --git a/tests/testthat/mainTests/filter_item_length.csv b/tests/testthat/mainTests/filter_item_length.csv new file mode 100644 index 0000000..f7173b3 --- /dev/null +++ b/tests/testthat/mainTests/filter_item_length.csv @@ -0,0 +1,4 @@ +time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments +2020,Calendar year,National,E92000001,England,All colours,20121,6835059 +2020,Calendar year,National,E92000001,England,Yellow,16739,3885774 +2020,Calendar year,National,E92000001,England,Orange with a really really really really really really really really really really really really really really really really really really really really really really really really really really really long item name,13357,936489 diff --git a/tests/testthat/mainTests/filter_item_length.meta.csv b/tests/testthat/mainTests/filter_item_length.meta.csv new file mode 100644 index 0000000..e281ed2 --- /dev/null +++ b/tests/testthat/mainTests/filter_item_length.meta.csv @@ -0,0 +1,4 @@ +col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column +colour,Filter,Colour,,,,, +num_schools,Indicator,Number of schools,,,,, +enrolments,Indicator,Enrolments,,,,, diff --git a/tests/testthat/mainTests/location_code_length.csv b/tests/testthat/mainTests/location_code_length.csv new file mode 100644 index 0000000..fa75421 --- /dev/null +++ b/tests/testthat/mainTests/location_code_length.csv @@ -0,0 +1,4 @@ +time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments +2020,Calendar year,Opportunity area,E92000001,England,An opportunity area,1995080819950808199508081995080819950808199508081995080819950808,20121,6835059 +2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774 +2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street,081995081995080819950808199508081995080819950808199508081995080819950808,17849,936489 diff --git a/tests/testthat/mainTests/location_code_length.meta.csv b/tests/testthat/mainTests/location_code_length.meta.csv new file mode 100644 index 0000000..53abaf1 --- /dev/null +++ b/tests/testthat/mainTests/location_code_length.meta.csv @@ -0,0 +1,3 @@ +col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column +num_schools,Indicator,Number of schools,,,,, +enrolments,Indicator,Enrolments,,,,, diff --git a/tests/testthat/mainTests/location_name_length.csv b/tests/testthat/mainTests/location_name_length.csv new file mode 100644 index 0000000..120651a --- /dev/null +++ b/tests/testthat/mainTests/location_name_length.csv @@ -0,0 +1,4 @@ +time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments +2020,Calendar year,Opportunity area,E92000001,England,An opportunity area with a really really really really really really really really really really really really really really really really long name,19950808,20121,6835059 +2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774 +2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street,08199508,17849,936489 diff --git a/tests/testthat/mainTests/location_name_length.meta.csv b/tests/testthat/mainTests/location_name_length.meta.csv new file mode 100644 index 0000000..53abaf1 --- /dev/null +++ b/tests/testthat/mainTests/location_name_length.meta.csv @@ -0,0 +1,3 @@ +col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column +num_schools,Indicator,Number of schools,,,,, +enrolments,Indicator,Enrolments,,,,, diff --git a/tests/testthat/mainTests/variable_label_length.csv b/tests/testthat/mainTests/variable_label_length.csv new file mode 100644 index 0000000..76b2cca --- /dev/null +++ b/tests/testthat/mainTests/variable_label_length.csv @@ -0,0 +1,4 @@ +time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments +2020,Calendar year,National,E92000001,England,All colours,20121,6835059 +2020,Calendar year,National,E92000001,England,Yellow,16739,3885774 +2020,Calendar year,National,E92000001,England,Orange,13357,936489 diff --git a/tests/testthat/mainTests/variable_label_length.meta.csv b/tests/testthat/mainTests/variable_label_length.meta.csv new file mode 100644 index 0000000..8a1814f --- /dev/null +++ b/tests/testthat/mainTests/variable_label_length.meta.csv @@ -0,0 +1,4 @@ +col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column +colour,Filter,Colour,,,,, +num_schools,Indicator,Number of schools,,,,, +enrolments,Indicator,Enrolments with a really really really really really really really really really really really really really really really really really really really really long name,,,,, diff --git a/tests/testthat/mainTests/variable_name_length.csv b/tests/testthat/mainTests/variable_name_length.csv new file mode 100644 index 0000000..a381fd0 --- /dev/null +++ b/tests/testthat/mainTests/variable_name_length.csv @@ -0,0 +1,4 @@ +time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments_with_a_really_really_really_really_really_really_really_really_really_really_really_really_really_really_long_name +2020,Calendar year,National,E92000001,England,All colours,20121,6835059 +2020,Calendar year,National,E92000001,England,Yellow,16739,3885774 +2020,Calendar year,National,E92000001,England,Orange,13357,936489 diff --git a/tests/testthat/mainTests/variable_name_length.meta.csv b/tests/testthat/mainTests/variable_name_length.meta.csv new file mode 100644 index 0000000..5d919ba --- /dev/null +++ b/tests/testthat/mainTests/variable_name_length.meta.csv @@ -0,0 +1,4 @@ +col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column +colour,Filter,Colour,,,,, +num_schools,Indicator,Number of schools,,,,, +enrolments_with_a_really_really_really_really_really_really_really_really_really_really_really_really_really_really_long_name,Indicator,Enrolments,,,,, diff --git a/tests/testthat/test-function-edge_cases.R b/tests/testthat/test-function-edge_cases.R index 6074d60..524d18d 100644 --- a/tests/testthat/test-function-edge_cases.R +++ b/tests/testthat/test-function-edge_cases.R @@ -206,10 +206,12 @@ test_that("blank_meta_label_notNA_still_fails", { }) test_that("Can handle incorrect provider cols", { + # TODO: Currently being skipped, not sure why expect_no_error(screeningOutput <- testOther("../../tests/testthat/otherData/provider_col_incorrect.csv")) }) test_that("Can handle missing region_name", { + # TODO: Currently being skipped, not sure why expect_no_error(screeningOutput <- testOther("../../tests/testthat/otherData/missing_region_name.csv")) }) diff --git a/tests/testthat/test-function-mainTests.R b/tests/testthat/test-function-mainTests.R index 332841b..188265b 100644 --- a/tests/testthat/test-function-mainTests.R +++ b/tests/testthat/test-function-mainTests.R @@ -431,3 +431,23 @@ test_that("ethnicity_characteristic_values", { test_that("indicators_smushed", { expect_equal(testIndividualTest(pathStart, "indicators_smushed"), "FAIL") }) + +test_that("variable_name_length", { + expect_equal(testIndividualTest(pathStart, "variable_name_length"), "ADVISORY") +}) + +test_that("variable_label_length", { + expect_equal(testIndividualTest(pathStart, "variable_label_length"), "ADVISORY") +}) + +test_that("filter_item_length", { + expect_equal(testIndividualTest(pathStart, "filter_item_length"), "ADVISORY") +}) + +test_that("location_name_length", { + expect_equal(testIndividualTest(pathStart, "location_name_length"), "ADVISORY") +}) + +test_that("location_code_length", { + expect_equal(testIndividualTest(pathStart, "location_code_length"), "ADVISORY") +})