Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API character limit warnings #146

Merged
merged 8 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Set default behaviour incase anyone doesn't have core.autocrlf set
* text eol=crlf

# Mark all files in the www folder as binary to avoid messing with non text files
www/* -text
tests/testthat/test-data/passes_everything.xlsx -text
196 changes: 194 additions & 2 deletions R/mainTests.r
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# mainTests -------------------------------------
# main tests functions

mainTests <- function(data_character, meta_character, datafile, metafile) {
as_tibble(t(rbind(
cbind(
Expand Down Expand Up @@ -62,7 +61,14 @@ mainTests <- function(data_character, meta_character, datafile, metafile) {
ethnicity_values(datafile), # active test
ethnicity_characteristic_group(datafile), # active test
ethnicity_characteristic_values(datafile), # active test
indicators_smushed(metafile) # active test
indicators_smushed(metafile), # active test

# API specific tests, though could be standard for everyone at some point
variable_name_length(metafile), # active test
variable_label_length(metafile), # active test
filter_item_length(datafile, metafile), # active test
location_name_length(datafile), # active test
location_code_length(datafile) # active test
),
"stage" = "mainTests",
"test" = c(activeTests$`R/mainTests.r`)
Expand Down Expand Up @@ -2801,3 +2807,189 @@ indicators_smushed <- function(meta) {

return(output)
}

#' Validate length of filters and indicators
#'
#' @param meta
variable_name_length <- function(meta) {
lengths_table <- data.table(
"variable_name" = meta$col_name,
"length" = nchar(meta$col_name)
)

names_too_long <- lengths_table[length > 50, variable_name]

if (length(names_too_long) == 0) {
output <- list(
"message" = "All variable names are 50 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(names_too_long) == 1) {
output <- list(
"message" = paste0("The following variable name is over 50 characters, this will need shortening before this data can be published through the API: '", paste(names_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following variable names are over 50 characters, these will need shortening before this data can be published through the API: '", paste0(names_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}

#' Validate length of filter and indicator labels
#'
#' @param meta
variable_label_length <- function(meta) {
lengths_table <- data.table(
"variable_label" = meta$label,
"length" = unlist(lapply(meta$label, nchar), use.names = FALSE)
)

labels_too_long <- lengths_table[length > 80, variable_label]

if (length(labels_too_long) == 0) {
output <- list(
"message" = "All variable labels are 80 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(labels_too_long) == 1) {
output <- list(
"message" = paste0("The following variable label is over 80 characters, this will need shortening before this data can be published through the API: '", paste(labels_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following variable labels are over 80 characters, these will need shortening before this data can be published through the API: '", paste0(labels_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}

#' Validate length of filter items
#'
#' @param data
#' @param meta
filter_item_length <- function(data, meta) {
filters <- meta[col_type == "Filter"]$col_name

filter_items <- data |>
select(all_of(filters)) |>
unlist(use.names = FALSE)

lengths_table <- data.frame(
"filter_item" = filter_items,
"length" = unlist(lapply(filter_items, nchar), use.names = FALSE)
)

lengths_too_long <- lengths_table[lengths_table$length > 120, "filter_item"]

if (length(lengths_too_long) == 0) {
output <- list(
"message" = "All filter items are 120 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(lengths_too_long) == 1) {
output <- list(
"message" = paste0("The following filter item is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following filter items are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}
}


#' Validate length of location names
#'
#' @param data
location_name_length <- function(data) {
location_names <- data |>
select(any_of(geography_dataframe$name_field)) |>
unlist(use.names = FALSE) |>
purrr::discard(~ is.na(.) | . == "" | . == "NA")

lengths_table <- data.frame(
"location_name" = location_names,
"length" = unlist(lapply(location_names, nchar), use.names = FALSE)
)

lengths_too_long <- lengths_table[lengths_table$length > 120, "location_name"]

if (length(lengths_too_long) == 0) {
output <- list(
"message" = "All location names are 120 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(lengths_too_long) == 1) {
output <- list(
"message" = paste0("The following location name is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following location names are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}


#' Validate length of location codes
#'
#' @param data
location_code_length <- function(data) {
location_code_cols <- c(geography_dataframe$code_field, geography_dataframe$code_field_secondary) |>
purrr::discard(~ is.na(.) | . == "")

location_codes <- data |>
select(any_of(location_code_cols)) |>
unlist(use.names = FALSE) |>
purrr::discard(~ is.na(.) | . == "" | . == "NA")


lengths_table <- data.frame(
"location_code" = location_codes,
"length" = unlist(lapply(location_codes, nchar), use.names = FALSE)
)

lengths_too_long <- lengths_table[lengths_table$length > 30, "location_code"]

if (length(lengths_too_long) == 0) {
output <- list(
"message" = "All location codes are 30 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(lengths_too_long) == 1) {
output <- list(
"message" = paste0("The following location code is over 30 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following location codes are over 30 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}
47 changes: 40 additions & 7 deletions manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -4220,6 +4220,9 @@
}
},
"files": {
".gitattributes": {
"checksum": "c3177b8a6fff470ce0b0378a0dc4c68e"
},
".github/PULL_REQUEST_TEMPLATE.md": {
"checksum": "cbe2be338cd6586dc070e6a77e8e69ab"
},
Expand All @@ -4233,7 +4236,7 @@
"checksum": "e8d96d5ffd16a6867fd6d7458bd2f905"
},
".Rprofile": {
"checksum": "27de70b9670311feeabe917efcc4b0a5"
"checksum": "1247aefe829eb8e208797eeed763d57f"
},
"azure-pipelines-dev.yml": {
"checksum": "fc2ef843ff5024bcba25ff84eec05f4c"
Expand Down Expand Up @@ -4317,7 +4320,7 @@
"checksum": "b9db57c3ebf85dabb5b9e96687206282"
},
"R/mainTests.r": {
"checksum": "c1b707b9db41701430b9ed0740e76044"
"checksum": "8f05516284acd631bf9e58dc6b589931"
},
"R/manual_scripts/debugging.R": {
"checksum": "e56763afdf3659c7c47cfe0e3262e8cf"
Expand Down Expand Up @@ -4350,7 +4353,7 @@
"checksum": "4fdd4e1b952cd929d2ba64685af21c88"
},
"server.R": {
"checksum": "76d87396cd2c1b083696fcac91980a6d"
"checksum": "23fc15ec19183749d64db4406219fbc4"
},
"tests/testthat.R": {
"checksum": "12e9b09de5275bdc13bde6ae5a82f8b1"
Expand All @@ -4359,7 +4362,7 @@
"checksum": "3824e397e0b583a9367e306830fccd72"
},
"tests/testthat/_snaps/UI-01_example_files/example_files-002.json": {
"checksum": "86cfc414c609ab64a79c1cb7bbdd235b"
"checksum": "e79a0ba571bfbbe17386b6e9cadaa93d"
},
"tests/testthat/_snaps/UI-02_edge_cases/edge_cases-001.json": {
"checksum": "75228173459ba6f210f07e7ce363ab5e"
Expand Down Expand Up @@ -4601,6 +4604,12 @@
"tests/testthat/mainTests/filter_hint.meta.csv": {
"checksum": "97346a84a5185b12c5a12e9a6ea7b30d"
},
"tests/testthat/mainTests/filter_item_length.csv": {
"checksum": "5acc5c8fe056940ebfe20d8dc15cfc4c"
},
"tests/testthat/mainTests/filter_item_length.meta.csv": {
"checksum": "beb8bedd0c4f3ef9c5d0997405b41c56"
},
"tests/testthat/mainTests/geographic_catch.csv": {
"checksum": "ff410d317b0ab3a637f6a1e7750141e3"
},
Expand Down Expand Up @@ -4709,6 +4718,18 @@
"tests/testthat/mainTests/lep_combinations.meta.csv": {
"checksum": "8ce884f6f73324bd3aacc70567cfcb64"
},
"tests/testthat/mainTests/location_code_length.csv": {
"checksum": "1c441e486859e833f6a75e4ca4fcffec"
},
"tests/testthat/mainTests/location_code_length.meta.csv": {
"checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207"
},
"tests/testthat/mainTests/location_name_length.csv": {
"checksum": "f74bf87c35c3667c43675ee7b9df823f"
},
"tests/testthat/mainTests/location_name_length.meta.csv": {
"checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207"
},
"tests/testthat/mainTests/lsip_combinations.csv": {
"checksum": "9f77ca00f1bdb7855a87600e8581a9bf"
},
Expand Down Expand Up @@ -4853,6 +4874,18 @@
"tests/testthat/mainTests/variable_characteristic.meta.csv": {
"checksum": "df80ff86dbbb685cbcb52c64abf418d1"
},
"tests/testthat/mainTests/variable_label_length.csv": {
"checksum": "4fd02e14a65b21b0b41a60bb337dc335"
},
"tests/testthat/mainTests/variable_label_length.meta.csv": {
"checksum": "2aeeb092aadf1aa12f9dd8470cc45cbd"
},
"tests/testthat/mainTests/variable_name_length.csv": {
"checksum": "2c811e8c4808248041ffb50ede344854"
},
"tests/testthat/mainTests/variable_name_length.meta.csv": {
"checksum": "04463d66a3fd9a109ef0867e9e0342e8"
},
"tests/testthat/mainTests/variable_snake_case.csv": {
"checksum": "98488233ca477f8445d0b193f1d2b212"
},
Expand Down Expand Up @@ -5244,7 +5277,7 @@
"checksum": "41a423a4bc68e5ba6c20a11f06092471"
},
"tests/testthat/test-function-edge_cases.R": {
"checksum": "967f63745faee40e71b3f5c74a2006e1"
"checksum": "4d43ddb1ccbad14c3cdd3c30ecc9bef4"
},
"tests/testthat/test-function-fileValidation.R": {
"checksum": "302dd04b308471644c1a6574c3844c81"
Expand All @@ -5253,7 +5286,7 @@
"checksum": "0ad4c6ef88604b628207156991a92349"
},
"tests/testthat/test-function-mainTests.R": {
"checksum": "def2be36fe97859696445a59afbb0725"
"checksum": "cdad605b53d891ce1c2a8494e868fd00"
},
"tests/testthat/test-function-preCheck1.R": {
"checksum": "ecc3826341427734e1a8f097c1e1e6f8"
Expand All @@ -5277,7 +5310,7 @@
"checksum": "88baeb581e4bd597d2e9834ea0c3a7c8"
},
"www/acalat_theme.css": {
"checksum": "e926d2ff3a5799c06d16a5e3c70b47aa"
"checksum": "8e7009445352b2fd4b44667eac6c48a3"
},
"www/builder-duck.PNG": {
"checksum": "7574642f76936b645e9ce137ec6a99e9"
Expand Down
2 changes: 1 addition & 1 deletion server.R
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ server <- function(input, output, session) {
symbol_expected <- data.frame(Symbol = gss_symbols)

suppress_count <- symbol_expected %>%
left_join(suppress_count) %>%
left_join(suppress_count, by = join_by(Symbol)) %>%
mutate_all(~ replace(., is.na(.), 0))

output$suppressed_cell_count_table <- DT::renderDT({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@
"failed": 1,
"ignored": 17,
"info": 0,
"passed": 66,
"passed": 71,
"progress_message": "Made it to the full screening checks but failed"
}
}
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/filter_item_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments
2020,Calendar year,National,E92000001,England,All colours,20121,6835059
2020,Calendar year,National,E92000001,England,Yellow,16739,3885774
2020,Calendar year,National,E92000001,England,Orange with a really really really really really really really really really really really really really really really really really really really really really really really really really really really long item name,13357,936489
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/filter_item_length.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
colour,Filter,Colour,,,,,
num_schools,Indicator,Number of schools,,,,,
enrolments,Indicator,Enrolments,,,,,
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/location_code_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments
2020,Calendar year,Opportunity area,E92000001,England,An opportunity area,1995080819950808199508081995080819950808199508081995080819950808,20121,6835059
2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774
2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street,081995081995080819950808199508081995080819950808199508081995080819950808,17849,936489
3 changes: 3 additions & 0 deletions tests/testthat/mainTests/location_code_length.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
num_schools,Indicator,Number of schools,,,,,
enrolments,Indicator,Enrolments,,,,,
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/location_name_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments
2020,Calendar year,Opportunity area,E92000001,England,An opportunity area with a really really really really really really really really really really really really really really really really long name,19950808,20121,6835059
2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774
2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street,08199508,17849,936489
3 changes: 3 additions & 0 deletions tests/testthat/mainTests/location_name_length.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
num_schools,Indicator,Number of schools,,,,,
enrolments,Indicator,Enrolments,,,,,
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/variable_label_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments
2020,Calendar year,National,E92000001,England,All colours,20121,6835059
2020,Calendar year,National,E92000001,England,Yellow,16739,3885774
2020,Calendar year,National,E92000001,England,Orange,13357,936489
Loading
Loading