dfe-analytical-services · cjrace · Jan 10, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 10, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,6 @@
+# Set default behaviour incase anyone doesn't have core.autocrlf set
+* text eol=crlf
+
+# Mark all files in the www folder as binary to avoid messing with non text files
+www/* -text
+tests/testthat/test-data/passes_everything.xlsx -text
diff --git a/R/mainTests.r b/R/mainTests.r
@@ -1,6 +1,5 @@
 # mainTests -------------------------------------
 # main tests functions
-
 mainTests <- function(data_character, meta_character, datafile, metafile) {
   as_tibble(t(rbind(
     cbind(
@@ -62,7 +61,14 @@ mainTests <- function(data_character, meta_character, datafile, metafile) {
       ethnicity_values(datafile), # active test
       ethnicity_characteristic_group(datafile), # active test
       ethnicity_characteristic_values(datafile), # active test
-      indicators_smushed(metafile) # active test
+      indicators_smushed(metafile), # active test
+
+      # API specific tests, though could be standard for everyone at some point
+      variable_name_length(metafile), # active test
+      variable_label_length(metafile), # active test
+      filter_item_length(datafile, metafile), # active test
+      location_name_length(datafile), # active test
+      location_code_length(datafile) # active test
     ),
     "stage" = "mainTests",
     "test" = c(activeTests$`R/mainTests.r`)
@@ -2801,3 +2807,189 @@ indicators_smushed <- function(meta) {
 
   return(output)
 }
+
+#' Validate length of filters and indicators
+#'
+#' @param meta
+variable_name_length <- function(meta) {
+  lengths_table <- data.table(
+    "variable_name" = meta$col_name,
+    "length" = nchar(meta$col_name)
+  )
+
+  names_too_long <- lengths_table[length > 50, variable_name]
+
+  if (length(names_too_long) == 0) {
+    output <- list(
+      "message" = "All variable names are 50 characters or fewer.",
+      "result" = "PASS"
+    )
+  } else {
+    if (length(names_too_long) == 1) {
+      output <- list(
+        "message" = paste0("The following variable name is over 50 characters, this will need shortening before this data can be published through the API: '", paste(names_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    } else {
+      output <- list(
+        "message" = paste0("The following variable names are over 50 characters, these will need shortening before this data can be published through the API: '", paste0(names_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    }
+  }
+
+  return(output)
+}
+
+#' Validate length of filter and indicator labels
+#'
+#' @param meta
+variable_label_length <- function(meta) {
+  lengths_table <- data.table(
+    "variable_label" = meta$label,
+    "length" = unlist(lapply(meta$label, nchar), use.names = FALSE)
+  )
+
+  labels_too_long <- lengths_table[length > 80, variable_label]
+
+  if (length(labels_too_long) == 0) {
+    output <- list(
+      "message" = "All variable labels are 80 characters or fewer.",
+      "result" = "PASS"
+    )
+  } else {
+    if (length(labels_too_long) == 1) {
+      output <- list(
+        "message" = paste0("The following variable label is over 80 characters, this will need shortening before this data can be published through the API: '", paste(labels_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    } else {
+      output <- list(
+        "message" = paste0("The following variable labels are over 80 characters, these will need shortening before this data can be published through the API: '", paste0(labels_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    }
+  }
+
+  return(output)
+}
+
+#' Validate length of filter items
+#'
+#' @param data
+#' @param meta
+filter_item_length <- function(data, meta) {
+  filters <- meta[col_type == "Filter"]$col_name
+
+  filter_items <- data |>
+    select(all_of(filters)) |>
+    unlist(use.names = FALSE)
+
+  lengths_table <- data.frame(
+    "filter_item" = filter_items,
+    "length" = unlist(lapply(filter_items, nchar), use.names = FALSE)
+  )
+
+  lengths_too_long <- lengths_table[lengths_table$length > 120, "filter_item"]
+
+  if (length(lengths_too_long) == 0) {
+    output <- list(
+      "message" = "All filter items are 120 characters or fewer.",
+      "result" = "PASS"
+    )
+  } else {
+    if (length(lengths_too_long) == 1) {
+      output <- list(
+        "message" = paste0("The following filter item is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    } else {
+      output <- list(
+        "message" = paste0("The following filter items are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    }
+  }
+}
+
+
+#' Validate length of location names
+#'
+#' @param data
+location_name_length <- function(data) {
+  location_names <- data |>
+    select(any_of(geography_dataframe$name_field)) |>
+    unlist(use.names = FALSE) |>
+    purrr::discard(~ is.na(.) | . == "" | . == "NA")
+
+  lengths_table <- data.frame(
+    "location_name" = location_names,
+    "length" = unlist(lapply(location_names, nchar), use.names = FALSE)
+  )
+
+  lengths_too_long <- lengths_table[lengths_table$length > 120, "location_name"]
+
+  if (length(lengths_too_long) == 0) {
+    output <- list(
+      "message" = "All location names are 120 characters or fewer.",
+      "result" = "PASS"
+    )
+  } else {
+    if (length(lengths_too_long) == 1) {
+      output <- list(
+        "message" = paste0("The following location name is over 120 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    } else {
+      output <- list(
+        "message" = paste0("The following location names are over 120 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    }
+  }
+
+  return(output)
+}
+
+
+#' Validate length of location codes
+#'
+#' @param data
+location_code_length <- function(data) {
+  location_code_cols <- c(geography_dataframe$code_field, geography_dataframe$code_field_secondary) |>
+    purrr::discard(~ is.na(.) | . == "")
+
+  location_codes <- data |>
+    select(any_of(location_code_cols)) |>
+    unlist(use.names = FALSE) |>
+    purrr::discard(~ is.na(.) | . == "" | . == "NA")
+
+
+  lengths_table <- data.frame(
+    "location_code" = location_codes,
+    "length" = unlist(lapply(location_codes, nchar), use.names = FALSE)
+  )
+
+  lengths_too_long <- lengths_table[lengths_table$length > 30, "location_code"]
+
+  if (length(lengths_too_long) == 0) {
+    output <- list(
+      "message" = "All location codes are 30 characters or fewer.",
+      "result" = "PASS"
+    )
+  } else {
+    if (length(lengths_too_long) == 1) {
+      output <- list(
+        "message" = paste0("The following location code is over 30 characters, this will need shortening before this data can be published through the API: '", paste(lengths_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    } else {
+      output <- list(
+        "message" = paste0("The following location codes are over 30 characters, these will need shortening before this data can be published through the API: '", paste0(lengths_too_long, collapse = "', '"), "'."),
+        "result" = "ADVISORY"
+      )
+    }
+  }
+
+  return(output)
+}
diff --git a/manifest.json b/manifest.json
@@ -4220,6 +4220,9 @@
     }
   },
   "files": {
+    ".gitattributes": {
+      "checksum": "c3177b8a6fff470ce0b0378a0dc4c68e"
+    },
     ".github/PULL_REQUEST_TEMPLATE.md": {
       "checksum": "cbe2be338cd6586dc070e6a77e8e69ab"
     },
@@ -4233,7 +4236,7 @@
       "checksum": "e8d96d5ffd16a6867fd6d7458bd2f905"
     },
     ".Rprofile": {
-      "checksum": "27de70b9670311feeabe917efcc4b0a5"
+      "checksum": "1247aefe829eb8e208797eeed763d57f"
     },
     "azure-pipelines-dev.yml": {
       "checksum": "fc2ef843ff5024bcba25ff84eec05f4c"
@@ -4317,7 +4320,7 @@
       "checksum": "b9db57c3ebf85dabb5b9e96687206282"
     },
     "R/mainTests.r": {
-      "checksum": "c1b707b9db41701430b9ed0740e76044"
+      "checksum": "8f05516284acd631bf9e58dc6b589931"
     },
     "R/manual_scripts/debugging.R": {
       "checksum": "e56763afdf3659c7c47cfe0e3262e8cf"
@@ -4350,7 +4353,7 @@
       "checksum": "4fdd4e1b952cd929d2ba64685af21c88"
     },
     "server.R": {
-      "checksum": "76d87396cd2c1b083696fcac91980a6d"
+      "checksum": "23fc15ec19183749d64db4406219fbc4"
     },
     "tests/testthat.R": {
       "checksum": "12e9b09de5275bdc13bde6ae5a82f8b1"
@@ -4359,7 +4362,7 @@
       "checksum": "3824e397e0b583a9367e306830fccd72"
     },
     "tests/testthat/_snaps/UI-01_example_files/example_files-002.json": {
-      "checksum": "86cfc414c609ab64a79c1cb7bbdd235b"
+      "checksum": "e79a0ba571bfbbe17386b6e9cadaa93d"
     },
     "tests/testthat/_snaps/UI-02_edge_cases/edge_cases-001.json": {
       "checksum": "75228173459ba6f210f07e7ce363ab5e"
@@ -4601,6 +4604,12 @@
     "tests/testthat/mainTests/filter_hint.meta.csv": {
       "checksum": "97346a84a5185b12c5a12e9a6ea7b30d"
     },
+    "tests/testthat/mainTests/filter_item_length.csv": {
+      "checksum": "5acc5c8fe056940ebfe20d8dc15cfc4c"
+    },
+    "tests/testthat/mainTests/filter_item_length.meta.csv": {
+      "checksum": "beb8bedd0c4f3ef9c5d0997405b41c56"
+    },
     "tests/testthat/mainTests/geographic_catch.csv": {
       "checksum": "ff410d317b0ab3a637f6a1e7750141e3"
     },
@@ -4709,6 +4718,18 @@
     "tests/testthat/mainTests/lep_combinations.meta.csv": {
       "checksum": "8ce884f6f73324bd3aacc70567cfcb64"
     },
+    "tests/testthat/mainTests/location_code_length.csv": {
+      "checksum": "1c441e486859e833f6a75e4ca4fcffec"
+    },
+    "tests/testthat/mainTests/location_code_length.meta.csv": {
+      "checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207"
+    },
+    "tests/testthat/mainTests/location_name_length.csv": {
+      "checksum": "f74bf87c35c3667c43675ee7b9df823f"
+    },
+    "tests/testthat/mainTests/location_name_length.meta.csv": {
+      "checksum": "c1a7a9a5c1d0dc12fd7569b9238b6207"
+    },
     "tests/testthat/mainTests/lsip_combinations.csv": {
       "checksum": "9f77ca00f1bdb7855a87600e8581a9bf"
     },
@@ -4853,6 +4874,18 @@
     "tests/testthat/mainTests/variable_characteristic.meta.csv": {
       "checksum": "df80ff86dbbb685cbcb52c64abf418d1"
     },
+    "tests/testthat/mainTests/variable_label_length.csv": {
+      "checksum": "4fd02e14a65b21b0b41a60bb337dc335"
+    },
+    "tests/testthat/mainTests/variable_label_length.meta.csv": {
+      "checksum": "2aeeb092aadf1aa12f9dd8470cc45cbd"
+    },
+    "tests/testthat/mainTests/variable_name_length.csv": {
+      "checksum": "2c811e8c4808248041ffb50ede344854"
+    },
+    "tests/testthat/mainTests/variable_name_length.meta.csv": {
+      "checksum": "04463d66a3fd9a109ef0867e9e0342e8"
+    },
     "tests/testthat/mainTests/variable_snake_case.csv": {
       "checksum": "98488233ca477f8445d0b193f1d2b212"
     },
@@ -5244,7 +5277,7 @@
       "checksum": "41a423a4bc68e5ba6c20a11f06092471"
     },
     "tests/testthat/test-function-edge_cases.R": {
-      "checksum": "967f63745faee40e71b3f5c74a2006e1"
+      "checksum": "4d43ddb1ccbad14c3cdd3c30ecc9bef4"
     },
     "tests/testthat/test-function-fileValidation.R": {
       "checksum": "302dd04b308471644c1a6574c3844c81"
@@ -5253,7 +5286,7 @@
       "checksum": "0ad4c6ef88604b628207156991a92349"
     },
     "tests/testthat/test-function-mainTests.R": {
-      "checksum": "def2be36fe97859696445a59afbb0725"
+      "checksum": "cdad605b53d891ce1c2a8494e868fd00"
     },
     "tests/testthat/test-function-preCheck1.R": {
       "checksum": "ecc3826341427734e1a8f097c1e1e6f8"
@@ -5277,7 +5310,7 @@
       "checksum": "88baeb581e4bd597d2e9834ea0c3a7c8"
     },
     "www/acalat_theme.css": {
-      "checksum": "e926d2ff3a5799c06d16a5e3c70b47aa"
+      "checksum": "8e7009445352b2fd4b44667eac6c48a3"
     },
     "www/builder-duck.PNG": {
       "checksum": "7574642f76936b645e9ce137ec6a99e9"

diff --git a/server.R b/server.R
@@ -1160,7 +1160,7 @@ server <- function(input, output, session) {
             symbol_expected <- data.frame(Symbol = gss_symbols)
 
             suppress_count <- symbol_expected %>%
-              left_join(suppress_count) %>%
+              left_join(suppress_count, by = join_by(Symbol)) %>%
               mutate_all(~ replace(., is.na(.), 0))
 
             output$suppressed_cell_count_table <- DT::renderDT({

diff --git a/tests/testthat/_snaps/UI-01_example_files/example_files-002.json b/tests/testthat/_snaps/UI-01_example_files/example_files-002.json
@@ -219,7 +219,7 @@
     "failed": 1,
     "ignored": 17,
     "info": 0,
-    "passed": 66,
+    "passed": 71,
     "progress_message": "Made it to the full screening checks but failed"
   }
 }
diff --git a/tests/testthat/mainTests/filter_item_length.csv b/tests/testthat/mainTests/filter_item_length.csv
@@ -0,0 +1,4 @@
+time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments
+2020,Calendar year,National,E92000001,England,All colours,20121,6835059
+2020,Calendar year,National,E92000001,England,Yellow,16739,3885774
+2020,Calendar year,National,E92000001,England,Orange with a really really really really really really really really really really really really really really really really really really really really really really really really really really really long item name,13357,936489
diff --git a/tests/testthat/mainTests/filter_item_length.meta.csv b/tests/testthat/mainTests/filter_item_length.meta.csv
@@ -0,0 +1,4 @@
+col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
+colour,Filter,Colour,,,,,
+num_schools,Indicator,Number of schools,,,,,
+enrolments,Indicator,Enrolments,,,,,
diff --git a/tests/testthat/mainTests/location_code_length.csv b/tests/testthat/mainTests/location_code_length.csv
@@ -0,0 +1,4 @@
+time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments
+2020,Calendar year,Opportunity area,E92000001,England,An opportunity area,1995080819950808199508081995080819950808199508081995080819950808,20121,6835059
+2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774
+2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street,081995081995080819950808199508081995080819950808199508081995080819950808,17849,936489
diff --git a/tests/testthat/mainTests/location_code_length.meta.csv b/tests/testthat/mainTests/location_code_length.meta.csv
@@ -0,0 +1,3 @@
+col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
+num_schools,Indicator,Number of schools,,,,,
+enrolments,Indicator,Enrolments,,,,,
diff --git a/tests/testthat/mainTests/location_name_length.csv b/tests/testthat/mainTests/location_name_length.csv
@@ -0,0 +1,4 @@
+time_period,time_identifier,geographic_level,country_code,country_name,opportunity_area_name,opportunity_area_code,num_schools,enrolments
+2020,Calendar year,Opportunity area,E92000001,England,An opportunity area with a really really really really really really really really really really really really really really really really long name,19950808,20121,6835059
+2020,Calendar year,Opportunity area,E92000001,England,Lealholm,08081995,16739,3885774
+2020,Calendar year,Opportunity area,E92000001,England,Appleton-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street-le-street,08199508,17849,936489
diff --git a/tests/testthat/mainTests/location_name_length.meta.csv b/tests/testthat/mainTests/location_name_length.meta.csv
@@ -0,0 +1,3 @@
+col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
+num_schools,Indicator,Number of schools,,,,,
+enrolments,Indicator,Enrolments,,,,,
diff --git a/tests/testthat/mainTests/variable_label_length.csv b/tests/testthat/mainTests/variable_label_length.csv
@@ -0,0 +1,4 @@
+time_period,time_identifier,geographic_level,country_code,country_name,colour,num_schools,enrolments
+2020,Calendar year,National,E92000001,England,All colours,20121,6835059
+2020,Calendar year,National,E92000001,England,Yellow,16739,3885774
+2020,Calendar year,National,E92000001,England,Orange,13357,936489