Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding in checks for suspected wide data style column names #102

Merged
merged 12 commits into from
Dec 5, 2023
Merged
45 changes: 44 additions & 1 deletion R/mainTests.r
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ mainTests <- function(data_character, meta_character, datafile, metafile) {
ethnicity_headers(metafile), # active test
ethnicity_values(datafile), # active test
ethnicity_characteristic_group(datafile), # active test
ethnicity_characteristic_values(datafile) # active test
ethnicity_characteristic_values(datafile), # active test
indicators_smushed(metafile) # active test
),
"stage" = "mainTests",
"test" = c(activeTests$`R/mainTests.r`)
Expand Down Expand Up @@ -2721,3 +2722,45 @@ ethnicity_characteristic_values <- function(data) {
}
return(output)
}


# Indicators smushed
#
# @description This test checks the meta data file for any indicators that appear
# to be 'smushed'. To do this, it flags any indicator col_name that contains
# common filter entries (e.g. male, female, white, asian, black, etc)
#
# @param meta
#
# @return list(message, result)
indicators_smushed <- function(meta) {
common_filter_substrings <- c(
"male", "female",
"white", "asian", "black", "chinese", "indian", "pakistani"
cjrace marked this conversation as resolved.
Show resolved Hide resolved
)

indicator_names <- meta %>%
filter(
col_type == "Indicator",
grepl(paste(common_filter_substrings, collapse = "|"), col_name, ignore.case = TRUE)
) %>%
pull(col_name)

if (length(indicator_names) > 0) {
output <- list(
"message" = paste0(
"The following indicators appear to not conform to tidy data principles: ",
paste(indicator_names, collapse = ", "),
". We recommend pivoting your data longer and adding a filter to contain characteristic choices."
cjrace marked this conversation as resolved.
Show resolved Hide resolved
),
"result" = "FAIL"
)
} else {
output <- list(
"message" = "No indicators found containing typical filter entries.",
"result" = "PASS"
)
}

return(output)
}
2 changes: 1 addition & 1 deletion tests/shinytest/UI_tests-expected/002.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/shinytest/UI_tests-expected/003.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/shinytest/UI_tests-expected/004.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/shinytest/UI_tests-expected/007.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/shinytest/UI_tests-expected/009.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/shinytest/UI_tests-expected/024.json

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions tests/testthat/mainTests/indicators_smushed.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
time_period,time_identifier,geographic_level,country_code,country_name,region_code,region_name,pupil_count,pupil_count_sex_male,pupil_count_sex_female,pupil_count_white,pupil_count_asian,pupil_count_black,pupil_count_mixed,pupil_count_other
201718,Academic year,National,E92000001,England,,,7909,3776,4133,5315,1111,1156,145,182
201718,Academic year,Regional,E92000001,England,E12000001,North East,975,432,543,741,65,117,20,32
201718,Academic year,Regional,E92000001,England,E12000003,Yorkshire and The Humber,465,234,231,326,80,28,9,22
201718,Academic year,Regional,E92000001,England,E12000004,East Midlands,666,234,432,532,62,64,2,6
201718,Academic year,Regional,E92000001,England,E12000006,East of England,1086,543,543,977,44,32,22,11
201718,Academic year,Regional,E92000001,England,E12000008,South East,1188,534,654,713,300,95,24,56
201718,Academic year,Regional,E92000001,England,E13000001,Inner London,1519,765,754,820,260,364,30,45
201718,Academic year,Regional,E92000001,England,E13000002,Outer London,2010,1034,976,1206,300,456,38,10
9 changes: 9 additions & 0 deletions tests/testthat/mainTests/indicators_smushed.meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
col_name,col_type,label,indicator_grouping,indicator_unit,indicator_dp,filter_hint,filter_grouping_column
pupil_count,Indicator,Number of pupils,,,0,,
pupil_count_sex_male,Indicator,Number of male pupils,Sex,,0,,
pupil_count_sex_female,Indicator,Number of female pupils,Sex,,0,,
pupil_count_white,Indicator,Number of ethnically white pupils,Ethnicity,,0,,
pupil_count_asian,Indicator,Number of ethnically Asian pupils,Ethnicity,,0,,
pupil_count_black,Indicator,Number of ethnically black pupils,Ethnicity,,0,,
pupil_count_mixed,Indicator,Number of ethnically mixed pupils,Ethnicity,,0,,
pupil_count_other,Indicator,Number of pupils identifying as Other ethnicity,Ethnicity,,0,,
4 changes: 4 additions & 0 deletions tests/testthat/test-mainTests.R
Original file line number Diff line number Diff line change
Expand Up @@ -429,3 +429,7 @@ test_that("ethnicity_characteristic_values", {
"ADVISORY"
)
})

test_that("indicators_smushed", {
expect_equal(testIndividualTest(pathStart, "indicators_smushed"), "FAIL")
})