diff --git a/.Rbuildignore b/.Rbuildignore index b9c98a3..f4c5d16 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ ^data-raw$ ^doc$ ^Meta$ +^cran-comments\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 7bbb0b9..c023d75 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,20 +1,22 @@ Type: Package Package: dfeR -Title: Common DfE R tasks -Version: 0.6.1.9000 +Title: Common Department for Education Analysis Tasks +Version: 1.0.1 Authors@R: c( person("Cam", "Race", , "cameron.race@education.gov.uk", role = c("aut", "cre")), + person("Department for Education, England", , , "statistics.development@education.gov.uk", role = "cph"), person("Laura", "Selby", , "laura.selby@education.gov.uk", role = "aut"), person("Adam", "Robinson", role = "aut"), person("Jen", "Machin", , "jen.machin@education.gov.uk", role = "ctb"), person("Jake", "Tufts", , "jake.tufts@education.gov.uk", role = "ctb"), person("Rich", "Bielby", , "richard.bielby@education.gov.uk", role = "ctb", comment = c(ORCID = "0000-0001-9070-9969")), - person("Menna", "Zayed", , "menna.zayed@education.gov.uk", role = "ctb") + person("Menna", "Zayed", , "menna.zayed@education.gov.uk", role = "ctb"), + person("Lauren", "Snaathorst", , "lauren.snaathorst@education.gov.uk", role = "ctb") ) -Description: This package contains R functions to allow DfE analysts to - re-use code for common analytical tasks that are undertaken across the - Department. +Description: Preferred methods for common analytical tasks that are + undertaken across the Department, including number formatting, project + templates and curated reference data. License: GPL (>= 3) URL: https://dfe-analytical-services.github.io/dfeR/, https://github.com/dfe-analytical-services/dfeR diff --git a/NEWS.md b/NEWS.md index 39dd128..9dc5187 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,11 @@ -# dfeR (development version) +# dfeR 1.0.1 -Added lookup data geog_time_identifiers +Fix the spacing and printing of the z_replace() warning message, updating the eesyapi URL in the README and removed extraneous package tests. + +# dfeR 1.0.0 + +Initial CRAN release. +Added lookup data geog_time_identifiers. Added z_replace() to replace NA values in tables except for ones in geography and time columns that match ones in geog_time_identifiers. # dfeR 0.6.1 diff --git a/R/create_project.R b/R/create_project.R index 722741f..fd9c7a1 100644 --- a/R/create_project.R +++ b/R/create_project.R @@ -77,7 +77,7 @@ create_project <- function( "call those functions, run\n", "# `source('R/helper_functions.R')` at the start of your ", "script.\n\n", - "print('Your scripts and functions should be in ", + "message('Your scripts and functions should be in ", "the R folder.')" ) ) @@ -175,7 +175,10 @@ create_project <- function( # Create the readme ----- - file.copy("README_template.md", file.path(path, "README.md")) + file.copy( + system.file(package = "dfeR", "README_template.md"), + file.path(path, "README.md") + ) # .renvignore file.create(paste0(path, "/.renvignore")) @@ -242,8 +245,6 @@ create_project <- function( ) } - - # Create a .Rprofile with a custom welcome message if (!file.exists(paste0(path, "/.Rprofile"))) { file.create(paste0(path, "/.Rprofile")) @@ -267,13 +268,9 @@ create_project <- function( ) writeLines(rprofile_content, paste0(path, "/.Rprofile")) - - - - # Successful project creation message (or delete project if fails) if (successful_creation) { - cat( + message( paste0( "\n\n", "****************************************************************\n", diff --git a/R/datasets_documentation.R b/R/datasets_documentation.R index cffb579..28e638a 100644 --- a/R/datasets_documentation.R +++ b/R/datasets_documentation.R @@ -114,5 +114,5 @@ #' @format ## `geog_time_identifiers` #' A character vector with 38 potential column names in snake case format. #' @source curated by explore.statistics@@education.gov.uk. -#' \href{https://shorturl.at/j4532}{Get guidance on time and geography data.} +#' \href{https://www.shorturl.at/j4532}{Guidance on time and geography data.} "geog_time_identifiers" diff --git a/R/datasets_utils.R b/R/datasets_utils.R index 20dded5..db5e98e 100644 --- a/R/datasets_utils.R +++ b/R/datasets_utils.R @@ -19,6 +19,7 @@ #' data frame from R memory #' #' @keywords internal +#' @noRd #' @return a data frame of a tidied lookup file tidy_raw_lookup <- function(raw_lookup_file) { if (!is.data.frame(raw_lookup_file)) { @@ -142,6 +143,7 @@ tidy_raw_lookup <- function(raw_lookup_file) { #' usually the output of tidy_raw_lookup #' #' @return single data.frame of all lookup files combined +#' @noRd create_time_series_lookup <- function(lookups_list) { # Input validation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Added some quick checks based on the assumptions we make in this function @@ -234,6 +236,7 @@ create_time_series_lookup <- function(lookups_list) { #' @return data.frame for the individual year of the lookup #' #' @keywords internal +#' @noRd get_wd_pcon_lad_la <- function(year) { # Crude way to grab 2 digits, works for anything that isn't in the noughties year_end <- year %% 100 @@ -304,6 +307,7 @@ get_wd_pcon_lad_la <- function(year) { #' @return data.frame for the individual year of the lookup #' #' @keywords internal +#' @noRd get_lad_region <- function(year) { # Crude way to grab 2 digits, works for anything that isn't in the noughties year_end <- year %% 100 diff --git a/R/fetch_utils.R b/R/fetch_utils.R index 480e4c7..9437835 100644 --- a/R/fetch_utils.R +++ b/R/fetch_utils.R @@ -5,6 +5,7 @@ #' #' @return nothing, unless a failure, and then it will give an error #' @keywords internal +#' @noRd check_fetch_location_inputs <- function(year_input, country_input) { if (year_input != "All") { if (!grepl("^\\d{4}$", as.character(year_input))) { @@ -39,6 +40,7 @@ check_fetch_location_inputs <- function(year_input, country_input) { #' #' @return a data frame of location names and codes #' @keywords internal +#' @noRd fetch_locations <- function(lookup_data, cols, year, countries) { # Return only the cols we specified # We know their position from the dplyr selection of the lookup diff --git a/R/get_ons_api_data.R b/R/get_ons_api_data.R index a7b43da..a0fda85 100644 --- a/R/get_ons_api_data.R +++ b/R/get_ons_api_data.R @@ -22,7 +22,7 @@ #' Geography Portal #' @param query_params query parameters to pass into the API, see the ESRI #' documentation for more information on query parameters - -#' \href{https://shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)} +#' \href{https://www.shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)} #' @param batch_size the number of rows per query. This is 250 by default, if #' you hit errors then try lowering this. The API has a limit of 1000 to 2000 #' rows per query, and in truth, the actual limit for our method is lower as @@ -36,17 +36,19 @@ #' @return parsed data.frame of geographic names and codes #' #' @examples -#' if (interactive()) { -#' # Specify some parameters -#' get_ons_api_data( -#' data_id = "LAD23_RGN23_EN_LU", -#' query_params = -#' list(outFields = "column1, column2", outSR = "4326", f = "json") -#' ) +#' # Fetch everything from a data set +#' dfeR::get_ons_api_data(data_id = "LAD23_RGN23_EN_LU") #' -#' # Just fetch everything -#' get_ons_api_data(data_id = "LAD23_RGN23_EN_LU") -#' } +#' # Specify the columns you want +#' dfeR::get_ons_api_data( +#' "RGN_DEC_2023_EN_NC", +#' query_params = list( +#' where = "1=1", +#' outFields = "RGN23CD,RGN23NM", +#' outSR = 4326, +#' f = "json" +#' ) +#' ) get_ons_api_data <- function(data_id, query_params = list( diff --git a/R/toggle_message.R b/R/toggle_message.R index 2bfbef5..92b26ea 100644 --- a/R/toggle_message.R +++ b/R/toggle_message.R @@ -10,6 +10,8 @@ #' @param verbose logical, usually a variable passed from the function you are #' using this within #' +#' @return No return value, called for side effects +#' #' @export #' #' @examples diff --git a/R/z_replace.R b/R/z_replace.R index d595f1c..7c27558 100644 --- a/R/z_replace.R +++ b/R/z_replace.R @@ -3,7 +3,7 @@ #' @description #' Replaces `NA` values in tables except for ones in time and geography #' columns that must be included in DfE official statistics. -#' \href{https://shorturl.at/chy76}{Get more guidance on Open Data Standards.} +#' \href{https://www.shorturl.at/chy76}{Guidance on our Open Data Standards.} #' #' @details @@ -87,7 +87,7 @@ z_replace <- function(data, stop( "Your table has geography and/or time column(s) that are not ", "in snake_case.\nPlease amend your column names to match the formatting", - "to dfeR::geog_time_identifiers." + " of dfeR::geog_time_identifiers." ) } diff --git a/README.Rmd b/README.Rmd index b69e95b..5c6543e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,7 +13,7 @@ knitr::opts_chunk$set( ) ``` -# dfeR +# dfeR [![R-CMD-check](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml) @@ -38,13 +38,13 @@ Functionality for dfeR is expected to focus around the following: 4. API wrappers commonly needed in DfE analysis (where they don't have their own separate package) 5. Geography lookup files and helper functions -Documentation for what has been included in the package so far is on our [pkgdown site](http://dfe-analytical-services.github.io/dfeR/). +Documentation for what has been included in the package so far is on our [pkgdown site](https://dfe-analytical-services.github.io/dfeR/). ### Relevant other packages We also maintain the [dfeshiny](https://github.com/dfe-analytical-services/dfeshiny) package, and expect any functions specific to R Shiny applications will live there. -For connecting to data in the [explore education statistics](https://explore-education-statistics.service.gov.uk/), we are building the [eesyapi](https://github.com/dfe-analytical-services/eesyapi) package. +For connecting to data in the [explore education statistics](https://explore-education-statistics.service.gov.uk/), we are building the [eesyapi](https://github.com/dfe-analytical-services/eesyapi.R) package. There is a [giasr](https://github.com/dfe-analytical-services/giasr) package, which has been developed for connecting to data in the [get information about schools service](https://get-information-schools.service.gov.uk/). @@ -54,7 +54,13 @@ While we have some DfE specific data in the dfeR package taken from the [Open Ge ## Installation -dfeR is not currently available on CRAN. For the time being you can install the development version from GitHub. +dfeR is available on CRAN and you can install directly from there: + +``` r +install.packages("dfeR") +``` + +You can install the development version from GitHub. If you are using [renv](https://rstudio.github.io/renv/articles/renv.html) in your project (recommended): @@ -73,7 +79,7 @@ devtools::install_github("dfe-analytical-services/dfeR") ## Proxy -The need for setting proxy settings in order to be able to work with R and Git within the DfE estate has now ended. If you previously run the proxy script in previous versions of the dfeR package, then contact the [Statistics Development Team](statistics.development@education.gov.uk) to assist in cleaning out your system settings. +The need for setting proxy settings in order to be able to work with R and Git within the DfE estate has now ended. If you previously run the proxy script in previous versions of the dfeR package, then contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) to assist in cleaning out your system settings. --- diff --git a/README.md b/README.md index 75192b4..5be6f02 100644 --- a/README.md +++ b/README.md @@ -1,282 +1,288 @@ - - - -# dfeR - - - -[![R-CMD-check](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml) -[![pkgdown](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml) -[![Codecov test -coverage](https://codecov.io/gh/dfe-analytical-services/dfeR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/dfe-analytical-services/dfeR?branch=main) -[![Lifecycle: -experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) - - -The goal of dfeR is to help standardise R programming across the -Department for Education (DfE), and facilitate sharing of business -specific functions, making our code easier to read and write. - -Credit to [lauraselby](https://github.com/lauraselby) for the logo -featuring Frederick! - -## Scope - -This package is open to all of DfE and anything we think could be useful -to other programmers and analysts can be contributed. - -Functionality for dfeR is expected to focus around the following: - -1. DfE specific formatting and helper functions -2. Working with DfE databases -3. Templates for analytical projects -4. API wrappers commonly needed in DfE analysis (where they don’t have - their own separate package) -5. Geography lookup files and helper functions - -Documentation for what has been included in the package so far is on our -[pkgdown site](http://dfe-analytical-services.github.io/dfeR/). - -### Relevant other packages - -We also maintain the -[dfeshiny](https://github.com/dfe-analytical-services/dfeshiny) package, -and expect any functions specific to R Shiny applications will live -there. - -For connecting to data in the [explore education -statistics](https://explore-education-statistics.service.gov.uk/), we -are building the -[eesyapi](https://github.com/dfe-analytical-services/eesyapi) package. - -There is a [giasr](https://github.com/dfe-analytical-services/giasr) -package, which has been developed for connecting to data in the [get -information about schools -service](https://get-information-schools.service.gov.uk/). - -While we have some DfE specific data in the dfeR package taken from the -[Open Geography Portal](https://geoportal.statistics.gov.uk/). If you’re -looking at getting new data from the portal it is also worth looking at -the [boundr](https://github.com/francisbarton/boundr) package, as this -gives more functions for directly extracting data from there. - ------------------------------------------------------------------------- - -## Installation - -dfeR is not currently available on CRAN. For the time being you can -install the development version from GitHub. - -If you are using -[renv](https://rstudio.github.io/renv/articles/renv.html) in your -project (recommended): - -``` r -renv::install("dfe-analytical-services/dfeR") -``` - -Otherwise: - -``` r -# install.packages("devtools") -devtools::install_github("dfe-analytical-services/dfeR") -``` - ------------------------------------------------------------------------- - -## Proxy - -The need for setting proxy settings in order to be able to work with R -and Git within the DfE estate has now ended. If you previously run the -proxy script in previous versions of the dfeR package, then contact the -[Statistics Development Team](statistics.development@education.gov.uk) -to assist in cleaning out your system settings. - ------------------------------------------------------------------------- - -## Contributing - -Ideas for dfeR should first be raised as a [GitHub -issue](https://github.com/dfe-analytical-services/dfeR) after which -anyone is free to write the code and create a pull request for review. - -For more details on contributing to dfeR, see our [contributing -guidelines](https://dfe-analytical-services.github.io/dfeR/CONTRIBUTING.html). - ------------------------------------------------------------------------- - -## Code of Conduct - -Please note that the dfeR project is released with a [Contributor Code -of -Conduct](https://dfe-analytical-services.github.io/dfeR/CODE_OF_CONDUCT.html). -By contributing to this project, you agree to abide by its terms. - ------------------------------------------------------------------------- - -## Examples - -Here are some example functions from within the package: - -``` r -library(dfeR) - -# Prettify large numbers -pretty_num(111111111, gbp = TRUE) -#> [1] "£111.11 million" -pretty_num(-11^8, dp = -1) -#> [1] "-210 million" - -# Convert bytes to readable size -pretty_filesize(77777777) -#> [1] "77.78 MB" - -# Calculate elapsed time and present prettily -start <- Sys.time() -end <- Sys.time() + 789890 -pretty_time_taken(start, end) -#> [1] "219 hours 24 minutes 50 seconds" - -# Round 5's up instead of bankers round used by round() in base R -round_five_up(2.5) -#> [1] 3 -round(2.5) # base R -#> [1] 2 - -# Custom formatting for academic and financial years -format_ay(202425) -#> [1] "2024/25" -format_fy(202425) -#> [1] "2024-25" -format_ay_reverse("2024/25") -#> [1] "202425" -format_fy_reverse("2024-25") -#> [1] "202425" - -# Get Ward to PCon to LAD to LA to Rgn to Ctry lookup file -my_data <- dfeR::wd_pcon_lad_la_rgn_ctry -head(my_data) # show first 5 rows in console -#> first_available_year_included most_recent_year_included -#> 1 2017 2017 -#> 2 2017 2017 -#> 3 2017 2020 -#> 4 2017 2017 -#> 5 2017 2020 -#> 6 2017 2017 -#> ward_name pcon_name lad_name la_name -#> 1 Bastwell Blackburn Blackburn with Darwen Blackburn with Darwen -#> 2 Ormesby Redcar Redcar and Cleveland Redcar and Cleveland -#> 3 Burn Valley Hartlepool Hartlepool Hartlepool -#> 4 Beardwood with Lammack Blackburn Blackburn with Darwen Blackburn with Darwen -#> 5 De Bruce Hartlepool Hartlepool Hartlepool -#> 6 St Germain's Redcar Redcar and Cleveland Redcar and Cleveland -#> region_name country_name ward_code pcon_code lad_code new_la_code -#> 1 North West England E05001621 E14000570 E06000008 E06000008 -#> 2 North East England E05001518 E14000891 E06000003 E06000003 -#> 3 North East England E05008942 E14000733 E06000001 E06000001 -#> 4 North West England E05001622 E14000570 E06000008 E06000008 -#> 5 North East England E05008943 E14000733 E06000001 E06000001 -#> 6 North East England E05001519 E14000891 E06000003 E06000003 -#> region_code country_code -#> 1 E12000002 E92000001 -#> 2 E12000001 E92000001 -#> 3 E12000001 E92000001 -#> 4 E12000002 E92000001 -#> 5 E12000001 E92000001 -#> 6 E12000001 E92000001 - -# Get all countries -dfeR::countries -#> country_code country_name -#> 1 E92000001 England -#> 2 K02000001 United Kingdom -#> 3 K03000001 Great Britain -#> 4 K04000001 England and Wales -#> 5 N92000002 Northern Ireland -#> 6 S92000003 Scotland -#> 7 W92000004 Wales -#> 8 z England, Wales and Northern Ireland -#> 9 z Outside of England and unknown -#> 10 z Outside of the United Kingdom and unknown - -# Get all PCon names and codes for 2024 -fetch_pcons(2024) |> - head() # show first 5 rows only -#> pcon_code pcon_name -#> 1 S14000045 Midlothian -#> 2 S14000027 Na h-Eileanan an Iar -#> 3 S14000021 East Renfrewshire -#> 4 S14000048 North Ayrshire and Arran -#> 5 S14000051 Orkney and Shetland -#> 6 E14001440 Redcar - -# Get All LADs in Scotland in 2017 -fetch_lads(2017, "Scotland") |> - head() # show first 5 rows only -#> lad_code lad_name -#> 1 S12000019 Midlothian -#> 2 S12000015 Fife -#> 3 S12000014 Falkirk -#> 4 S12000013 Na h-Eileanan Siar -#> 5 S12000018 Inverclyde -#> 6 S12000011 East Renfrewshire - -# Get all LAs in Scotland and Northern Ireland in 2022 -fetch_las(2022, c("Scotland", "Northern Ireland")) |> - head() # show first 5 rows only -#> new_la_code la_name -#> 1 N09000003 Belfast -#> 2 N09000004 Causeway Coast and Glens -#> 3 N09000002 Armagh City, Banbridge and Craigavon -#> 4 N09000005 Derry City and Strabane -#> 5 N09000001 Antrim and Newtownabbey -#> 6 N09000006 Fermanagh and Omagh - -# Get all Welsh wards for 2021 -fetch_wards(2021, "Wales") |> - head() # show first 5 rows only -#> ward_code ward_name -#> 1 W05000981 Aethwy -#> 2 W05000982 Bro Aberffraw -#> 3 W05000983 Bro Rhosyr -#> 4 W05000107 Tregarth & Mynydd Llandygai -#> 5 W05000984 Caergybi -#> 6 W05000985 Canolbarth Môn - -# The following have no specific years available and return all values -fetch_regions() -#> region_code region_name -#> 1 E12000001 North East -#> 2 E12000002 North West -#> 3 E12000003 Yorkshire and The Humber -#> 4 E12000004 East Midlands -#> 5 E12000005 West Midlands -#> 6 E12000006 East of England -#> 7 E12000007 London -#> 8 E12000008 South East -#> 9 E12000009 South West -#> 10 E13000001 Inner London -#> 11 E13000002 Outer London -#> 12 z Outside of England and unknown -#> 13 z Outside of the United Kingdom and unknown -#> 14 z Outside of England -#> 15 z Outside of United Kingdom -#> 16 z Unknown - -fetch_countries() -#> country_code country_name -#> 1 E92000001 England -#> 2 K02000001 United Kingdom -#> 3 K03000001 Great Britain -#> 4 K04000001 England and Wales -#> 5 N92000002 Northern Ireland -#> 6 S92000003 Scotland -#> 7 W92000004 Wales -#> 8 z England, Wales and Northern Ireland -#> 9 z Outside of England and unknown -#> 10 z Outside of the United Kingdom and unknown -``` - -For more details on all the functions available in this package, and -examples of how to use them, please see our [dfeR package reference -documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html). + + + +# dfeR + + + +[![R-CMD-check](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml) +[![pkgdown](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml) +[![Codecov test +coverage](https://codecov.io/gh/dfe-analytical-services/dfeR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/dfe-analytical-services/dfeR?branch=main) +[![Lifecycle: +experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) + + +The goal of dfeR is to help standardise R programming across the +Department for Education (DfE), and facilitate sharing of business +specific functions, making our code easier to read and write. + +Credit to [lauraselby](https://github.com/lauraselby) for the logo +featuring Frederick! + +## Scope + +This package is open to all of DfE and anything we think could be useful +to other programmers and analysts can be contributed. + +Functionality for dfeR is expected to focus around the following: + +1. DfE specific formatting and helper functions +2. Working with DfE databases +3. Templates for analytical projects +4. API wrappers commonly needed in DfE analysis (where they don’t have + their own separate package) +5. Geography lookup files and helper functions + +Documentation for what has been included in the package so far is on our +[pkgdown site](https://dfe-analytical-services.github.io/dfeR/). + +### Relevant other packages + +We also maintain the +[dfeshiny](https://github.com/dfe-analytical-services/dfeshiny) package, +and expect any functions specific to R Shiny applications will live +there. + +For connecting to data in the [explore education +statistics](https://explore-education-statistics.service.gov.uk/), we +are building the +[eesyapi](https://github.com/dfe-analytical-services/eesyapi.R) package. + +There is a [giasr](https://github.com/dfe-analytical-services/giasr) +package, which has been developed for connecting to data in the [get +information about schools +service](https://get-information-schools.service.gov.uk/). + +While we have some DfE specific data in the dfeR package taken from the +[Open Geography Portal](https://geoportal.statistics.gov.uk/). If you’re +looking at getting new data from the portal it is also worth looking at +the [boundr](https://github.com/francisbarton/boundr) package, as this +gives more functions for directly extracting data from there. + +------------------------------------------------------------------------ + +## Installation + +dfeR is available on CRAN and you can install directly from there: + +``` r +install.packages("dfeR") +``` + +You can install the development version from GitHub. + +If you are using +[renv](https://rstudio.github.io/renv/articles/renv.html) in your +project (recommended): + +``` r +renv::install("dfe-analytical-services/dfeR") +``` + +Otherwise: + +``` r +# install.packages("devtools") +devtools::install_github("dfe-analytical-services/dfeR") +``` + +------------------------------------------------------------------------ + +## Proxy + +The need for setting proxy settings in order to be able to work with R +and Git within the DfE estate has now ended. If you previously run the +proxy script in previous versions of the dfeR package, then contact the +[Statistics Development +Team](mailto:statistics.development@education.gov.uk) to assist in +cleaning out your system settings. + +------------------------------------------------------------------------ + +## Contributing + +Ideas for dfeR should first be raised as a [GitHub +issue](https://github.com/dfe-analytical-services/dfeR) after which +anyone is free to write the code and create a pull request for review. + +For more details on contributing to dfeR, see our [contributing +guidelines](https://dfe-analytical-services.github.io/dfeR/CONTRIBUTING.html). + +------------------------------------------------------------------------ + +## Code of Conduct + +Please note that the dfeR project is released with a [Contributor Code +of +Conduct](https://dfe-analytical-services.github.io/dfeR/CODE_OF_CONDUCT.html). +By contributing to this project, you agree to abide by its terms. + +------------------------------------------------------------------------ + +## Examples + +Here are some example functions from within the package: + +``` r +library(dfeR) + +# Prettify large numbers +pretty_num(111111111, gbp = TRUE) +#> [1] "£111 million" +pretty_num(-11^8, dp = -1) +#> [1] "-210 million" + +# Convert bytes to readable size +pretty_filesize(77777777) +#> [1] "77.78 MB" + +# Calculate elapsed time and present prettily +start <- Sys.time() +end <- Sys.time() + 789890 +pretty_time_taken(start, end) +#> [1] "219 hours 24 minutes 50 seconds" + +# Round 5's up instead of bankers round used by round() in base R +round_five_up(2.5) +#> [1] 3 +round(2.5) # base R +#> [1] 2 + +# Custom formatting for academic and financial years +format_ay(202425) +#> [1] "2024/25" +format_fy(202425) +#> [1] "2024-25" +format_ay_reverse("2024/25") +#> [1] "202425" +format_fy_reverse("2024-25") +#> [1] "202425" + +# Get Ward to PCon to LAD to LA to Rgn to Ctry lookup file +my_data <- dfeR::wd_pcon_lad_la_rgn_ctry +head(my_data) # show first 5 rows in console +#> first_available_year_included most_recent_year_included +#> 1 2017 2017 +#> 2 2017 2017 +#> 3 2017 2020 +#> 4 2017 2017 +#> 5 2017 2020 +#> 6 2017 2017 +#> ward_name pcon_name lad_name la_name +#> 1 Bastwell Blackburn Blackburn with Darwen Blackburn with Darwen +#> 2 Ormesby Redcar Redcar and Cleveland Redcar and Cleveland +#> 3 Burn Valley Hartlepool Hartlepool Hartlepool +#> 4 Beardwood with Lammack Blackburn Blackburn with Darwen Blackburn with Darwen +#> 5 De Bruce Hartlepool Hartlepool Hartlepool +#> 6 St Germain's Redcar Redcar and Cleveland Redcar and Cleveland +#> region_name country_name ward_code pcon_code lad_code new_la_code +#> 1 North West England E05001621 E14000570 E06000008 E06000008 +#> 2 North East England E05001518 E14000891 E06000003 E06000003 +#> 3 North East England E05008942 E14000733 E06000001 E06000001 +#> 4 North West England E05001622 E14000570 E06000008 E06000008 +#> 5 North East England E05008943 E14000733 E06000001 E06000001 +#> 6 North East England E05001519 E14000891 E06000003 E06000003 +#> region_code country_code +#> 1 E12000002 E92000001 +#> 2 E12000001 E92000001 +#> 3 E12000001 E92000001 +#> 4 E12000002 E92000001 +#> 5 E12000001 E92000001 +#> 6 E12000001 E92000001 + +# Get all countries +dfeR::countries +#> country_code country_name +#> 1 E92000001 England +#> 2 K02000001 United Kingdom +#> 3 K03000001 Great Britain +#> 4 K04000001 England and Wales +#> 5 N92000002 Northern Ireland +#> 6 S92000003 Scotland +#> 7 W92000004 Wales +#> 8 z England, Wales and Northern Ireland +#> 9 z Outside of England and unknown +#> 10 z Outside of the United Kingdom and unknown + +# Get all PCon names and codes for 2024 +fetch_pcons(2024) |> + head() # show first 5 rows only +#> pcon_code pcon_name +#> 1 S14000045 Midlothian +#> 2 S14000027 Na h-Eileanan an Iar +#> 3 S14000021 East Renfrewshire +#> 4 S14000048 North Ayrshire and Arran +#> 5 S14000051 Orkney and Shetland +#> 6 E14001440 Redcar + +# Get All LADs in Scotland in 2017 +fetch_lads(2017, "Scotland") |> + head() # show first 5 rows only +#> lad_code lad_name +#> 1 S12000019 Midlothian +#> 2 S12000015 Fife +#> 3 S12000014 Falkirk +#> 4 S12000013 Na h-Eileanan Siar +#> 5 S12000018 Inverclyde +#> 6 S12000011 East Renfrewshire + +# Get all LAs in Scotland and Northern Ireland in 2022 +fetch_las(2022, c("Scotland", "Northern Ireland")) |> + head() # show first 5 rows only +#> new_la_code la_name +#> 1 N09000003 Belfast +#> 2 N09000004 Causeway Coast and Glens +#> 3 N09000002 Armagh City, Banbridge and Craigavon +#> 4 N09000005 Derry City and Strabane +#> 5 N09000001 Antrim and Newtownabbey +#> 6 N09000006 Fermanagh and Omagh + +# Get all Welsh wards for 2021 +fetch_wards(2021, "Wales") |> + head() # show first 5 rows only +#> ward_code ward_name +#> 1 W05000981 Aethwy +#> 2 W05000982 Bro Aberffraw +#> 3 W05000983 Bro Rhosyr +#> 4 W05000107 Tregarth & Mynydd Llandygai +#> 5 W05000984 Caergybi +#> 6 W05000985 Canolbarth Môn + +# The following have no specific years available and return all values +fetch_regions() +#> region_code region_name +#> 1 E12000001 North East +#> 2 E12000002 North West +#> 3 E12000003 Yorkshire and The Humber +#> 4 E12000004 East Midlands +#> 5 E12000005 West Midlands +#> 6 E12000006 East of England +#> 7 E12000007 London +#> 8 E12000008 South East +#> 9 E12000009 South West +#> 10 E13000001 Inner London +#> 11 E13000002 Outer London +#> 12 z Outside of England and unknown +#> 13 z Outside of the United Kingdom and unknown +#> 14 z Outside of England +#> 15 z Outside of United Kingdom +#> 16 z Unknown + +fetch_countries() +#> country_code country_name +#> 1 E92000001 England +#> 2 K02000001 United Kingdom +#> 3 K03000001 Great Britain +#> 4 K04000001 England and Wales +#> 5 N92000002 Northern Ireland +#> 6 S92000003 Scotland +#> 7 W92000004 Wales +#> 8 z England, Wales and Northern Ireland +#> 9 z Outside of England and unknown +#> 10 z Outside of the United Kingdom and unknown +``` + +For more details on all the functions available in this package, and +examples of how to use them, please see our [dfeR package reference +documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html). diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..5bdf9eb --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,7 @@ +## R CMD check results + +0 errors | 0 warnings | 1 notes + +This submission follows the initial publishing of the package 2 days ago, and fixes the issues raised by the CRAN team. + +* removed package tests that tested the speed of function execution diff --git a/dfeR.Rproj b/dfeR.Rproj index 38b9011..43c8221 100644 --- a/dfeR.Rproj +++ b/dfeR.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 38a07b34-b094-4238-b7cd-d00ce125499e RestoreWorkspace: No SaveWorkspace: No diff --git a/README_template.md b/inst/README_template.md similarity index 100% rename from README_template.md rename to inst/README_template.md diff --git a/man/check_fetch_location_inputs.Rd b/man/check_fetch_location_inputs.Rd deleted file mode 100644 index 163a1e7..0000000 --- a/man/check_fetch_location_inputs.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fetch_utils.R -\name{check_fetch_location_inputs} -\alias{check_fetch_location_inputs} -\title{Validation for fetch location lookups} -\usage{ -check_fetch_location_inputs(year_input, country_input) -} -\arguments{ -\item{year_input}{the value of the years input} - -\item{country_input}{the value of the countries input} -} -\value{ -nothing, unless a failure, and then it will give an error -} -\description{ -Validation for fetch location lookups -} -\keyword{internal} diff --git a/man/create_time_series_lookup.Rd b/man/create_time_series_lookup.Rd deleted file mode 100644 index 9547671..0000000 --- a/man/create_time_series_lookup.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/datasets_utils.R -\name{create_time_series_lookup} -\alias{create_time_series_lookup} -\title{Smush lookups together to make a time series} -\usage{ -create_time_series_lookup(lookups_list) -} -\arguments{ -\item{lookups_list}{list of data frames of new lookup table, -usually the output of tidy_raw_lookup} -} -\value{ -single data.frame of all lookup files combined -} -\description{ -Take a list of tidied files, likely produced by the tidy_raw_lookup -function append together -} -\details{ -Updates the \code{first_available_year_included} and \code{most_recent_year_included} -columns so that they are accurate for the full ser-EES -} -\keyword{internal} diff --git a/man/dfeR-package.Rd b/man/dfeR-package.Rd index 7a25dbb..f4e2ff5 100644 --- a/man/dfeR-package.Rd +++ b/man/dfeR-package.Rd @@ -4,11 +4,11 @@ \name{dfeR-package} \alias{dfeR} \alias{dfeR-package} -\title{dfeR: Common DfE R tasks} +\title{dfeR: Common Department for Education Analysis Tasks} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -This package contains R functions to allow DfE analysts to re-use code for common analytical tasks that are undertaken across the Department. +Preferred methods for common analytical tasks that are undertaken across the Department, including number formatting, project templates and curated reference data. } \seealso{ Useful links: @@ -30,10 +30,12 @@ Authors: Other contributors: \itemize{ + \item Department for Education, England \email{statistics.development@education.gov.uk} [copyright holder] \item Jen Machin \email{jen.machin@education.gov.uk} [contributor] \item Jake Tufts \email{jake.tufts@education.gov.uk} [contributor] \item Rich Bielby \email{richard.bielby@education.gov.uk} (\href{https://orcid.org/0000-0001-9070-9969}{ORCID}) [contributor] \item Menna Zayed \email{menna.zayed@education.gov.uk} [contributor] + \item Lauren Snaathorst \email{lauren.snaathorst@education.gov.uk} [contributor] } } diff --git a/man/fetch_locations.Rd b/man/fetch_locations.Rd deleted file mode 100644 index c5d6e92..0000000 --- a/man/fetch_locations.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fetch_utils.R -\name{fetch_locations} -\alias{fetch_locations} -\title{Fetch locations for a given lookup} -\usage{ -fetch_locations(lookup_data, cols, year, countries) -} -\arguments{ -\item{lookup_data}{lookup data to use to extract locations from} - -\item{cols}{columns to extract from the main lookup table} - -\item{year}{year of locations to extract, "All" will skip any filtering and -return all possible locations} - -\item{countries}{countries for locations to be take from, "All" will skip -any filtering and return all} -} -\value{ -a data frame of location names and codes -} -\description{ -Helper function for the fetch_xxx() functions to save repeating code -} -\keyword{internal} diff --git a/man/geog_time_identifiers.Rd b/man/geog_time_identifiers.Rd index f5a0da6..2166f15 100644 --- a/man/geog_time_identifiers.Rd +++ b/man/geog_time_identifiers.Rd @@ -12,7 +12,7 @@ A character vector with 38 potential column names in snake case format. } \source{ curated by explore.statistics@education.gov.uk. -\href{https://shorturl.at/j4532}{Get guidance on time and geography data.} +\href{https://www.shorturl.at/j4532}{Guidance on time and geography data.} } \usage{ geog_time_identifiers diff --git a/man/get_lad_region.Rd b/man/get_lad_region.Rd deleted file mode 100644 index 03e8b0f..0000000 --- a/man/get_lad_region.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/datasets_utils.R -\name{get_lad_region} -\alias{get_lad_region} -\title{Get LAD to Region lookup} -\usage{ -get_lad_region(year) -} -\arguments{ -\item{year}{last two digits of the year of the lookup, available years are: -2017, 2018, 2019, 2020, 2022, 2023} -} -\value{ -data.frame for the individual year of the lookup -} -\description{ -Helper function to extract data from the Ward-LAD-Region-County-Country file -} -\keyword{internal} diff --git a/man/get_ons_api_data.Rd b/man/get_ons_api_data.Rd index f754e76..80a3a75 100644 --- a/man/get_ons_api_data.Rd +++ b/man/get_ons_api_data.Rd @@ -17,7 +17,7 @@ Geography Portal} \item{query_params}{query parameters to pass into the API, see the ESRI documentation for more information on query parameters - -\href{https://shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)}} +\href{https://www.shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)}} \item{batch_size}{the number of rows per query. This is 250 by default, if you hit errors then try lowering this. The API has a limit of 1000 to 2000 @@ -53,15 +53,17 @@ widely you should also look at the \href{https://github.com/francisbarton/boundr}{boundr package}. } \examples{ -if (interactive()) { - # Specify some parameters - get_ons_api_data( - data_id = "LAD23_RGN23_EN_LU", - query_params = - list(outFields = "column1, column2", outSR = "4326", f = "json") - ) +# Fetch everything from a data set +dfeR::get_ons_api_data(data_id = "LAD23_RGN23_EN_LU") - # Just fetch everything - get_ons_api_data(data_id = "LAD23_RGN23_EN_LU") -} +# Specify the columns you want +dfeR::get_ons_api_data( + "RGN_DEC_2023_EN_NC", + query_params = list( + where = "1=1", + outFields = "RGN23CD,RGN23NM", + outSR = 4326, + f = "json" + ) +) } diff --git a/man/get_wd_pcon_lad_la.Rd b/man/get_wd_pcon_lad_la.Rd deleted file mode 100644 index a19f429..0000000 --- a/man/get_wd_pcon_lad_la.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/datasets_utils.R -\name{get_wd_pcon_lad_la} -\alias{get_wd_pcon_lad_la} -\title{Get Ward-PCon-LAD-LA data} -\usage{ -get_wd_pcon_lad_la(year) -} -\arguments{ -\item{year}{last two digits of the year of the lookup, available years are: -2017, 2019, 2020, 2021, 2022, 2023, 2024} -} -\value{ -data.frame for the individual year of the lookup -} -\description{ -Helper function to extract data from the Ward-PCon-LAD-UTLA file -} -\keyword{internal} diff --git a/man/tidy_raw_lookup.Rd b/man/tidy_raw_lookup.Rd deleted file mode 100644 index 1468c29..0000000 --- a/man/tidy_raw_lookup.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/datasets_utils.R -\name{tidy_raw_lookup} -\alias{tidy_raw_lookup} -\title{Tidy a lookup file from the Open Geography Portal} -\usage{ -tidy_raw_lookup(raw_lookup_file) -} -\arguments{ -\item{raw_lookup_file}{data.frame of a lookup file downloaded from Open -Geography Portal, e.g. the output of \code{get_ons_api_data()}, or any other -data frame from R memory} -} -\value{ -a data frame of a tidied lookup file -Function to rename columns using the dfeR::ons_geog_shorthands table - -col_name single column name to be updated based on the shorthand -lookup table - -string for new column name if a match was found, if no match found -then the original name is returned -} -\description{ -Takes a file from the open geography portal and tidies it ready for -appending to an existing lookup -} -\keyword{internal} diff --git a/man/toggle_message.Rd b/man/toggle_message.Rd index d50c987..75410b7 100644 --- a/man/toggle_message.Rd +++ b/man/toggle_message.Rd @@ -13,6 +13,9 @@ toggle_message(..., verbose) \item{verbose}{logical, usually a variable passed from the function you are using this within} } +\value{ +No return value, called for side effects +} \description{ Quick expansion to the \code{message()} function aimed for use in functions for an easy addition of a global verbose TRUE / FALSE argument to toggle the diff --git a/man/z_replace.Rd b/man/z_replace.Rd index 687ebf5..53d0707 100644 --- a/man/z_replace.Rd +++ b/man/z_replace.Rd @@ -25,7 +25,7 @@ values for columns that are not for time or geography. \description{ Replaces \code{NA} values in tables except for ones in time and geography columns that must be included in DfE official statistics. -\href{https://shorturl.at/chy76}{Get more guidance on Open Data Standards.} +\href{https://www.shorturl.at/chy76}{Guidance on our Open Data Standards.} } \details{ Names of geography and time columns that are used in this function can be diff --git a/tests/testthat/test-z_replace.R b/tests/testthat/test-z_replace.R index e8559c7..2783fcf 100644 --- a/tests/testthat/test-z_replace.R +++ b/tests/testthat/test-z_replace.R @@ -56,7 +56,7 @@ test_that("Error messages are as expected in non-empty frames", { # testing error for non character strings in replacement_alt expect_error( z_replace(df, replacement_alt = 1), - cat( + paste0( "You provided a numeric input for replacement_alt.\n", "Please amend replace it with a character vector." ) @@ -65,7 +65,7 @@ test_that("Error messages are as expected in non-empty frames", { # testing error for multiple vectors in replacement_alt expect_error( z_replace(df, replacement_alt = c("a", "z", "x")), - cat( + paste0( "You provided multiple values for replacement_alt.\n", "Please, only provide a single value." ) @@ -103,45 +103,6 @@ test_that("exclude_columns works", { )) }) - -# Checking speed of the function - -# make this reproducible -set.seed(123) -# create table with randomly generated numbers -df <- data.frame( - a = sample(1:1000, 10000, replace = TRUE), - b = sample(1:1000, 10000, replace = TRUE), - c = sample(1:1000, 10000, replace = TRUE), - d = sample(1:1000, 10000, replace = TRUE), - e = sample(1:1000, 10000, replace = TRUE), - f = sample(1:1000, 10000, replace = TRUE), - e = sample(1:1000, 10000, replace = TRUE), - h = sample(1:1000, 10000, replace = TRUE), - i = sample(1:1000, 10000, replace = TRUE), - j = sample(1:1000, 10000, replace = TRUE), - school_urn = sample(1:1000, 10000, replace = TRUE) -) - -# putting NAs in the table -df <- df %>% - dplyr::mutate(across( - a:school_urn, - ~ dplyr::if_else(. < 300, as.double(NA), .) - )) - -start_time <- Sys.time() -z_replace(df) -end_time <- Sys.time() -test_time <- difftime(end_time, start_time, units = "secs") - -# calculating the time it takes - -# testing that the speed is less than 0.25 second -test_that("Speed of the function", { - expect_equal(test_time < 0.25, TRUE) -}) - # Check error message for empty data frame # create table @@ -166,12 +127,5 @@ df <- data.frame( ) test_that("Formatting of column names are checked", { - expect_error( - z_replace(df), - cat( - "Your table has geography and/or time column(s) that are not", - "in snake_case.\nPlease amend your column names to match the formatting", - "to dfeR::geog_time_identifiers." - ) - ) + expect_error(z_replace(df)) })