Skip to content

Commit

Permalink
Add API character limit warnings (#146)
Browse files Browse the repository at this point in the history
* initial working

* finish off adding api character warnings

* recomment debugging file and touch others to try to reset line endings

* touch files to reset line endings

* touch the main tests file again after restart

* add a git attributes file for the repo

* update gitattributes to ignore the www folder

* renormalize files I accidentally lf-ed
  • Loading branch information
cjrace authored Jan 10, 2025
1 parent 8bf62c6 commit 38eb8e2
Show file tree
Hide file tree
Showing 17 changed files with 302 additions and 11 deletions.
6 changes: 6 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Set default behaviour incase anyone doesn't have core.autocrlf set
* text eol=crlf

# Mark all files in the www folder as binary to avoid messing with non text files
www/* -text
tests/testthat/test-data/passes_everything.xlsx -text
196 changes: 194 additions & 2 deletions R/mainTests.r
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# mainTests -------------------------------------
# main tests functions

mainTests <- function(data_character, meta_character, datafile, metafile) {
as_tibble(t(rbind(
cbind(
Expand Down Expand Up @@ -62,7 +61,14 @@ mainTests <- function(data_character, meta_character, datafile, metafile) {
ethnicity_values(datafile), # active test
ethnicity_characteristic_group(datafile), # active test
ethnicity_characteristic_values(datafile), # active test
indicators_smushed(metafile) # active test
indicators_smushed(metafile), # active test

# API specific tests, though could be standard for everyone at some point
variable_name_length(metafile), # active test
variable_label_length(metafile), # active test
filter_item_length(datafile, metafile), # active test
location_name_length(datafile), # active test
location_code_length(datafile) # active test
),
"stage" = "mainTests",
"test" = c(activeTests$`R/mainTests.r`)
Expand Down Expand Up @@ -2801,3 +2807,189 @@ indicators_smushed <- function(meta) {

return(output)
}

#' Validate length of filters and indicators
#'
#' @param meta
variable_name_length <- function(meta) {
lengths_table <- data.table(
"variable_name" = meta$col_name,
"length" = nchar(meta$col_name)
)

names_too_long <- lengths_table[length > 50, variable_name]

if (length(names_too_long) == 0) {
output <- list(
"message" = "All variable names are 50 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(names_too_long) == 1) {
output <- list(
"message" = paste0("The following variable name is over 50 characters, this will need shortening before this data can be published through the API: '", paste(names_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following variable names are over 50 characters, these will need shortening before this data can be published through the API: '", paste0(names_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}

#' Validate length of filter and indicator labels
#'
#' @param meta
variable_label_length <- function(meta) {
lengths_table <- data.table(
"variable_label" = meta$label,
"length" = unlist(lapply(meta$label, nchar), use.names = FALSE)
)

labels_too_long <- lengths_table[length > 80, variable_label]

if (length(labels_too_long) == 0) {
output <- list(
"message" = "All variable labels are 80 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(labels_too_long) == 1) {
output <- list(
"message" = paste0("The following variable label is over 80 characters, this will need shortening before this data can be published through the API: '", paste(labels_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following variable labels are over 80 characters, these will need shortening before this data can be published through the API: '", paste0(labels_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}

#' Validate length of filter items
#'
#' @param data
#' @param meta
filter_item_length <- function(data, meta) {
filters <- meta[col_type == "Filter"]$col_name

filter_items <- data |>
select(all_of(filters)) |>
unlist(use.names = FALSE)

lengths_table <- data.frame(
"filter_item" = filter_items,
"length" = unlist(lapply(filter_items, nchar), use.names = FALSE)
)

lengths_too_long <- lengths_table[lengths_table$length > 120, "filter_item"]

if (length(lengths_too_long) == 0) {
output <- list(
"message" = "All filter items are 120 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(lengths_too_long) == 1) {
output <- list(
"message" = paste0("The following filter item is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following filter items are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}
}


#' Validate length of location names
#'
#' @param data
location_name_length <- function(data) {
location_names <- data |>
select(any_of(geography_dataframe$name_field)) |>
unlist(use.names = FALSE) |>
purrr::discard(~ is.na(.) | . == "" | . == "NA")

lengths_table <- data.frame(
"location_name" = location_names,
"length" = unlist(lapply(location_names, nchar), use.names = FALSE)
)

lengths_too_long <- lengths_table[lengths_table$length > 120, "location_name"]

if (length(lengths_too_long) == 0) {
output <- list(
"message" = "All location names are 120 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(lengths_too_long) == 1) {
output <- list(
"message" = paste0("The following location name is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following location names are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}


#' Validate length of location codes
#'
#' @param data
location_code_length <- function(data) {
location_code_cols <- c(geography_dataframe$code_field, geography_dataframe$code_field_secondary) |>
purrr::discard(~ is.na(.) | . == "")

location_codes <- data |>
select(any_of(location_code_cols)) |>
unlist(use.names = FALSE) |>
purrr::discard(~ is.na(.) | . == "" | . == "NA")


lengths_table <- data.frame(
"location_code" = location_codes,
"length" = unlist(lapply(location_codes, nchar), use.names = FALSE)
)

lengths_too_long <- lengths_table[lengths_table$length > 30, "location_code"]

if (length(lengths_too_long) == 0) {
output <- list(
"message" = "All location codes are 30 characters or fewer.",
"result" = "PASS"
)
} else {
if (length(lengths_too_long) == 1) {
output <- list(
"message" = paste0("The following location code is over 30 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
} else {
output <- list(
"message" = paste0("The following location codes are over 30 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
"result" = "ADVISORY"
)
}
}

return(output)
}
47 changes: 40 additions & 7 deletions manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -4220,6 +4220,9 @@
}
},
"files": {
".gitattributes": {
"checksum": "c3177b8a6fff470ce0b0378a0dc4c68e"
},
".github/PULL_REQUEST_TEMPLATE.md": {
"checksum": "cbe2be338cd6586dc070e6a77e8e69ab"
},
Expand All @@ -4233,7 +4236,7 @@
"checksum": "e8d96d5ffd16a6867fd6d7458bd2f905"
},
".Rprofile": {
"checksum": "27de70b9670311feeabe917efcc4b0a5"
"checksum": "1247aefe829eb8e208797eeed763d57f"
},
"azure-pipelines-dev.yml": {
"checksum": "fc2ef843ff5024bcba25ff84eec05f4c"
Expand Down Expand Up @@ -4317,7 +4320,7 @@
"checksum": "b9db57c3ebf85dabb5b9e96687206282"
},
"R/mainTests.r": {
"checksum": "c1b707b9db41701430b9ed0740e76044"
"checksum": "8f05516284acd631bf9e58dc6b589931"
},
"R/manual_scripts/debugging.R": {
"checksum": "e56763afdf3659c7c47cfe0e3262e8cf"
Expand Down Expand Up @@ -4350,7 +4353,7 @@
"checksum": "4fdd4e1b952cd929d2ba64685af21c88"
},
"server.R": {
"checksum": "76d87396cd2c1b083696fcac91980a6d"
"checksum": "23fc15ec19183749d64db4406219fbc4"
},
"tests/testthat.R": {
"checksum": "12e9b09de5275bdc13bde6ae5a82f8b1"
Expand All @@ -4359,7 +4362,7 @@
"checksum": "3824e397e0b583a9367e306830fccd72"
},
"tests/testthat/_snaps/UI-01_example_files/example_files-002.json": {
"checksum": "86cfc414c609ab64a79c1cb7bbdd235b"
"checksum": "e79a0ba571bfbbe17386b6e9cadaa93d"
},
"tests/testthat/_snaps/UI-02_edge_cases/edge_cases-001.json": {
"checksum": "75228173459ba6f210f07e7ce363ab5e"
Expand Down Expand Up @@ -4601,6 +4604,12 @@
"tests/testthat/mainTests/filter_hint.meta.csv": {
"checksum": "97346a84a5185b12c5a12e9a6ea7b30d"
},
"tests/testthat/mainTests/filter_item_length.csv": {
"checksum": "5acc5c8fe056940ebfe20d8dc15cfc4c"
},
"tests/testthat/mainTests/filter_item_length.meta.csv": {
"checksum": "beb8bedd0c4f3ef9c5d0997405b41c56"
},
"tests/testthat/mainTests/geographic_catch.csv": {
"checksum": "ff410d317b0ab3a637f6a1e7750141e3"
},
Expand Down Expand Up @@ -4709,6 +4718,18 @@
"tests/testthat/mainTests/lep_combinations.meta.csv": {
"checksum": "8ce884f6f73324bd3aacc70567cfcb64"
},
"tests/testthat/mainTests/location_code_length.csv": {
"checksum": "1c441e486859e833f6a75e4ca4fcffec"
},
"tests/testthat/mainTests/location_code_length.meta.csv": {
"checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207"
},
"tests/testthat/mainTests/location_name_length.csv": {
"checksum": "f74bf87c35c3667c43675ee7b9df823f"
},
"tests/testthat/mainTests/location_name_length.meta.csv": {
"checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207"
},
"tests/testthat/mainTests/lsip_combinations.csv": {
"checksum": "9f77ca00f1bdb7855a87600e8581a9bf"
},
Expand Down Expand Up @@ -4853,6 +4874,18 @@
"tests/testthat/mainTests/variable_characteristic.meta.csv": {
"checksum": "df80ff86dbbb685cbcb52c64abf418d1"
},
"tests/testthat/mainTests/variable_label_length.csv": {
"checksum": "4fd02e14a65b21b0b41a60bb337dc335"
},
"tests/testthat/mainTests/variable_label_length.meta.csv": {
"checksum": "2aeeb092aadf1aa12f9dd8470cc45cbd"
},
"tests/testthat/mainTests/variable_name_length.csv": {
"checksum": "2c811e8c4808248041ffb50ede344854"
},
"tests/testthat/mainTests/variable_name_length.meta.csv": {
"checksum": "04463d66a3fd9a109ef0867e9e0342e8"
},
"tests/testthat/mainTests/variable_snake_case.csv": {
"checksum": "98488233ca477f8445d0b193f1d2b212"
},
Expand Down Expand Up @@ -5244,7 +5277,7 @@
"checksum": "41a423a4bc68e5ba6c20a11f06092471"
},
"tests/testthat/test-function-edge_cases.R": {
"checksum": "967f63745faee40e71b3f5c74a2006e1"
"checksum": "4d43ddb1ccbad14c3cdd3c30ecc9bef4"
},
"tests/testthat/test-function-fileValidation.R": {
"checksum": "302dd04b308471644c1a6574c3844c81"
Expand All @@ -5253,7 +5286,7 @@
"checksum": "0ad4c6ef88604b628207156991a92349"
},
"tests/testthat/test-function-mainTests.R": {
"checksum": "def2be36fe97859696445a59afbb0725"
"checksum": "cdad605b53d891ce1c2a8494e868fd00"
},
"tests/testthat/test-function-preCheck1.R": {
"checksum": "ecc3826341427734e1a8f097c1e1e6f8"
Expand All @@ -5277,7 +5310,7 @@
"checksum": "88baeb581e4bd597d2e9834ea0c3a7c8"
},
"www/acalat_theme.css": {
"checksum": "e926d2ff3a5799c06d16a5e3c70b47aa"
"checksum": "8e7009445352b2fd4b44667eac6c48a3"
},
"www/builder-duck.PNG": {
"checksum": "7574642f76936b645e9ce137ec6a99e9"
Expand Down
2 changes: 1 addition & 1 deletion server.R
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ server <- function(input, output, session) {
symbol_expected <- data.frame(Symbol = gss_symbols)

suppress_count <- symbol_expected %>%
left_join(suppress_count) %>%
left_join(suppress_count, by = join_by(Symbol)) %>%
mutate_all(~ replace(., is.na(.), 0))

output$suppressed_cell_count_table <- DT::renderDT({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@
"failed": 1,
"ignored": 17,
"info": 0,
"passed": 66,
"passed": 71,
"progress_message": "Made it to the full screening checks but failed"
}
}
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/filter_item_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments
2020,Calendar year,National,E92000001,England,All colours,20121,6835059
2020,Calendar year,National,E92000001,England,Yellow,16739,3885774
2020,Calendar year,National,E92000001,England,Orange with a really really really really really really really really really really really really really really really really really really really really really really really really really really really long item name,13357,936489
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/filter_item_length.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
colour,Filter,Colour,,,,,
num_schools,Indicator,Number of schools,,,,,
enrolments,Indicator,Enrolments,,,,,
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/location_code_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments
2020,Calendar year,Opportunity area,E92000001,England,An opportunity area,1995080819950808199508081995080819950808199508081995080819950808,20121,6835059
2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774
2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street,081995081995080819950808199508081995080819950808199508081995080819950808,17849,936489
3 changes: 3 additions & 0 deletions tests/testthat/mainTests/location_code_length.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
num_schools,Indicator,Number of schools,,,,,
enrolments,Indicator,Enrolments,,,,,
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/location_name_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments
2020,Calendar year,Opportunity area,E92000001,England,An opportunity area with a really really really really really really really really really really really really really really really really long name,19950808,20121,6835059
2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774
2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street,08199508,17849,936489
3 changes: 3 additions & 0 deletions tests/testthat/mainTests/location_name_length.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
num_schools,Indicator,Number of schools,,,,,
enrolments,Indicator,Enrolments,,,,,
4 changes: 4 additions & 0 deletions tests/testthat/mainTests/variable_label_length.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments
2020,Calendar year,National,E92000001,England,All colours,20121,6835059
2020,Calendar year,National,E92000001,England,Yellow,16739,3885774
2020,Calendar year,National,E92000001,England,Orange,13357,936489
Loading

0 comments on commit 38eb8e2

Please sign in to comment.