diff --git a/.Rbuildignore b/.Rbuildignore
index b9c98a3..f4c5d16 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -11,3 +11,4 @@
^data-raw$
^doc$
^Meta$
+^cran-comments\.md$
diff --git a/DESCRIPTION b/DESCRIPTION
index 7bbb0b9..c023d75 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,20 +1,22 @@
Type: Package
Package: dfeR
-Title: Common DfE R tasks
-Version: 0.6.1.9000
+Title: Common Department for Education Analysis Tasks
+Version: 1.0.1
Authors@R: c(
person("Cam", "Race", , "cameron.race@education.gov.uk", role = c("aut", "cre")),
+ person("Department for Education, England", , , "statistics.development@education.gov.uk", role = "cph"),
person("Laura", "Selby", , "laura.selby@education.gov.uk", role = "aut"),
person("Adam", "Robinson", role = "aut"),
person("Jen", "Machin", , "jen.machin@education.gov.uk", role = "ctb"),
person("Jake", "Tufts", , "jake.tufts@education.gov.uk", role = "ctb"),
person("Rich", "Bielby", , "richard.bielby@education.gov.uk", role = "ctb",
comment = c(ORCID = "0000-0001-9070-9969")),
- person("Menna", "Zayed", , "menna.zayed@education.gov.uk", role = "ctb")
+ person("Menna", "Zayed", , "menna.zayed@education.gov.uk", role = "ctb"),
+ person("Lauren", "Snaathorst", , "lauren.snaathorst@education.gov.uk", role = "ctb")
)
-Description: This package contains R functions to allow DfE analysts to
- re-use code for common analytical tasks that are undertaken across the
- Department.
+Description: Preferred methods for common analytical tasks that are
+ undertaken across the Department, including number formatting, project
+ templates and curated reference data.
License: GPL (>= 3)
URL: https://dfe-analytical-services.github.io/dfeR/,
https://github.com/dfe-analytical-services/dfeR
diff --git a/NEWS.md b/NEWS.md
index 39dd128..9dc5187 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,11 @@
-# dfeR (development version)
+# dfeR 1.0.1
-Added lookup data geog_time_identifiers
+Fix the spacing and printing of the z_replace() warning message, updating the eesyapi URL in the README and removed extraneous package tests.
+
+# dfeR 1.0.0
+
+Initial CRAN release.
+Added lookup data geog_time_identifiers.
Added z_replace() to replace NA values in tables except for ones in geography and time columns that match ones in geog_time_identifiers.
# dfeR 0.6.1
diff --git a/R/create_project.R b/R/create_project.R
index 722741f..fd9c7a1 100644
--- a/R/create_project.R
+++ b/R/create_project.R
@@ -77,7 +77,7 @@ create_project <- function(
"call those functions, run\n",
"# `source('R/helper_functions.R')` at the start of your ",
"script.\n\n",
- "print('Your scripts and functions should be in ",
+ "message('Your scripts and functions should be in ",
"the R folder.')"
)
)
@@ -175,7 +175,10 @@ create_project <- function(
# Create the readme -----
- file.copy("README_template.md", file.path(path, "README.md"))
+ file.copy(
+ system.file(package = "dfeR", "README_template.md"),
+ file.path(path, "README.md")
+ )
# .renvignore
file.create(paste0(path, "/.renvignore"))
@@ -242,8 +245,6 @@ create_project <- function(
)
}
-
-
# Create a .Rprofile with a custom welcome message
if (!file.exists(paste0(path, "/.Rprofile"))) {
file.create(paste0(path, "/.Rprofile"))
@@ -267,13 +268,9 @@ create_project <- function(
)
writeLines(rprofile_content, paste0(path, "/.Rprofile"))
-
-
-
-
# Successful project creation message (or delete project if fails)
if (successful_creation) {
- cat(
+ message(
paste0(
"\n\n",
"****************************************************************\n",
diff --git a/R/datasets_documentation.R b/R/datasets_documentation.R
index cffb579..28e638a 100644
--- a/R/datasets_documentation.R
+++ b/R/datasets_documentation.R
@@ -114,5 +114,5 @@
#' @format ## `geog_time_identifiers`
#' A character vector with 38 potential column names in snake case format.
#' @source curated by explore.statistics@@education.gov.uk.
-#' \href{https://shorturl.at/j4532}{Get guidance on time and geography data.}
+#' \href{https://www.shorturl.at/j4532}{Guidance on time and geography data.}
"geog_time_identifiers"
diff --git a/R/datasets_utils.R b/R/datasets_utils.R
index 20dded5..db5e98e 100644
--- a/R/datasets_utils.R
+++ b/R/datasets_utils.R
@@ -19,6 +19,7 @@
#' data frame from R memory
#'
#' @keywords internal
+#' @noRd
#' @return a data frame of a tidied lookup file
tidy_raw_lookup <- function(raw_lookup_file) {
if (!is.data.frame(raw_lookup_file)) {
@@ -142,6 +143,7 @@ tidy_raw_lookup <- function(raw_lookup_file) {
#' usually the output of tidy_raw_lookup
#'
#' @return single data.frame of all lookup files combined
+#' @noRd
create_time_series_lookup <- function(lookups_list) {
# Input validation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Added some quick checks based on the assumptions we make in this function
@@ -234,6 +236,7 @@ create_time_series_lookup <- function(lookups_list) {
#' @return data.frame for the individual year of the lookup
#'
#' @keywords internal
+#' @noRd
get_wd_pcon_lad_la <- function(year) {
# Crude way to grab 2 digits, works for anything that isn't in the noughties
year_end <- year %% 100
@@ -304,6 +307,7 @@ get_wd_pcon_lad_la <- function(year) {
#' @return data.frame for the individual year of the lookup
#'
#' @keywords internal
+#' @noRd
get_lad_region <- function(year) {
# Crude way to grab 2 digits, works for anything that isn't in the noughties
year_end <- year %% 100
diff --git a/R/fetch_utils.R b/R/fetch_utils.R
index 480e4c7..9437835 100644
--- a/R/fetch_utils.R
+++ b/R/fetch_utils.R
@@ -5,6 +5,7 @@
#'
#' @return nothing, unless a failure, and then it will give an error
#' @keywords internal
+#' @noRd
check_fetch_location_inputs <- function(year_input, country_input) {
if (year_input != "All") {
if (!grepl("^\\d{4}$", as.character(year_input))) {
@@ -39,6 +40,7 @@ check_fetch_location_inputs <- function(year_input, country_input) {
#'
#' @return a data frame of location names and codes
#' @keywords internal
+#' @noRd
fetch_locations <- function(lookup_data, cols, year, countries) {
# Return only the cols we specified
# We know their position from the dplyr selection of the lookup
diff --git a/R/get_ons_api_data.R b/R/get_ons_api_data.R
index a7b43da..a0fda85 100644
--- a/R/get_ons_api_data.R
+++ b/R/get_ons_api_data.R
@@ -22,7 +22,7 @@
#' Geography Portal
#' @param query_params query parameters to pass into the API, see the ESRI
#' documentation for more information on query parameters -
-#' \href{https://shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)}
+#' \href{https://www.shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)}
#' @param batch_size the number of rows per query. This is 250 by default, if
#' you hit errors then try lowering this. The API has a limit of 1000 to 2000
#' rows per query, and in truth, the actual limit for our method is lower as
@@ -36,17 +36,19 @@
#' @return parsed data.frame of geographic names and codes
#'
#' @examples
-#' if (interactive()) {
-#' # Specify some parameters
-#' get_ons_api_data(
-#' data_id = "LAD23_RGN23_EN_LU",
-#' query_params =
-#' list(outFields = "column1, column2", outSR = "4326", f = "json")
-#' )
+#' # Fetch everything from a data set
+#' dfeR::get_ons_api_data(data_id = "LAD23_RGN23_EN_LU")
#'
-#' # Just fetch everything
-#' get_ons_api_data(data_id = "LAD23_RGN23_EN_LU")
-#' }
+#' # Specify the columns you want
+#' dfeR::get_ons_api_data(
+#' "RGN_DEC_2023_EN_NC",
+#' query_params = list(
+#' where = "1=1",
+#' outFields = "RGN23CD,RGN23NM",
+#' outSR = 4326,
+#' f = "json"
+#' )
+#' )
get_ons_api_data <- function(data_id,
query_params =
list(
diff --git a/R/toggle_message.R b/R/toggle_message.R
index 2bfbef5..92b26ea 100644
--- a/R/toggle_message.R
+++ b/R/toggle_message.R
@@ -10,6 +10,8 @@
#' @param verbose logical, usually a variable passed from the function you are
#' using this within
#'
+#' @return No return value, called for side effects
+#'
#' @export
#'
#' @examples
diff --git a/R/z_replace.R b/R/z_replace.R
index d595f1c..7c27558 100644
--- a/R/z_replace.R
+++ b/R/z_replace.R
@@ -3,7 +3,7 @@
#' @description
#' Replaces `NA` values in tables except for ones in time and geography
#' columns that must be included in DfE official statistics.
-#' \href{https://shorturl.at/chy76}{Get more guidance on Open Data Standards.}
+#' \href{https://www.shorturl.at/chy76}{Guidance on our Open Data Standards.}
#'
#' @details
@@ -87,7 +87,7 @@ z_replace <- function(data,
stop(
"Your table has geography and/or time column(s) that are not ",
"in snake_case.\nPlease amend your column names to match the formatting",
- "to dfeR::geog_time_identifiers."
+ " of dfeR::geog_time_identifiers."
)
}
diff --git a/README.Rmd b/README.Rmd
index b69e95b..5c6543e 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,7 +13,7 @@ knitr::opts_chunk$set(
)
```
-# dfeR
+# dfeR
[![R-CMD-check](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml)
@@ -38,13 +38,13 @@ Functionality for dfeR is expected to focus around the following:
4. API wrappers commonly needed in DfE analysis (where they don't have their own separate package)
5. Geography lookup files and helper functions
-Documentation for what has been included in the package so far is on our [pkgdown site](http://dfe-analytical-services.github.io/dfeR/).
+Documentation for what has been included in the package so far is on our [pkgdown site](https://dfe-analytical-services.github.io/dfeR/).
### Relevant other packages
We also maintain the [dfeshiny](https://github.com/dfe-analytical-services/dfeshiny) package, and expect any functions specific to R Shiny applications will live there.
-For connecting to data in the [explore education statistics](https://explore-education-statistics.service.gov.uk/), we are building the [eesyapi](https://github.com/dfe-analytical-services/eesyapi) package.
+For connecting to data in the [explore education statistics](https://explore-education-statistics.service.gov.uk/), we are building the [eesyapi](https://github.com/dfe-analytical-services/eesyapi.R) package.
There is a [giasr](https://github.com/dfe-analytical-services/giasr) package, which has been developed for connecting to data in the [get information about schools service](https://get-information-schools.service.gov.uk/).
@@ -54,7 +54,13 @@ While we have some DfE specific data in the dfeR package taken from the [Open Ge
## Installation
-dfeR is not currently available on CRAN. For the time being you can install the development version from GitHub.
+dfeR is available on CRAN and you can install directly from there:
+
+``` r
+install.packages("dfeR")
+```
+
+You can install the development version from GitHub.
If you are using [renv](https://rstudio.github.io/renv/articles/renv.html) in your project (recommended):
@@ -73,7 +79,7 @@ devtools::install_github("dfe-analytical-services/dfeR")
## Proxy
-The need for setting proxy settings in order to be able to work with R and Git within the DfE estate has now ended. If you previously run the proxy script in previous versions of the dfeR package, then contact the [Statistics Development Team](statistics.development@education.gov.uk) to assist in cleaning out your system settings.
+The need for setting proxy settings in order to be able to work with R and Git within the DfE estate has now ended. If you previously run the proxy script in previous versions of the dfeR package, then contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) to assist in cleaning out your system settings.
---
diff --git a/README.md b/README.md
index 75192b4..5be6f02 100644
--- a/README.md
+++ b/README.md
@@ -1,282 +1,288 @@
-
-
-
-# dfeR
-
-
-
-[![R-CMD-check](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml)
-[![pkgdown](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml)
-[![Codecov test
-coverage](https://codecov.io/gh/dfe-analytical-services/dfeR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/dfe-analytical-services/dfeR?branch=main)
-[![Lifecycle:
-experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
-
-
-The goal of dfeR is to help standardise R programming across the
-Department for Education (DfE), and facilitate sharing of business
-specific functions, making our code easier to read and write.
-
-Credit to [lauraselby](https://github.com/lauraselby) for the logo
-featuring Frederick!
-
-## Scope
-
-This package is open to all of DfE and anything we think could be useful
-to other programmers and analysts can be contributed.
-
-Functionality for dfeR is expected to focus around the following:
-
-1. DfE specific formatting and helper functions
-2. Working with DfE databases
-3. Templates for analytical projects
-4. API wrappers commonly needed in DfE analysis (where they don’t have
- their own separate package)
-5. Geography lookup files and helper functions
-
-Documentation for what has been included in the package so far is on our
-[pkgdown site](http://dfe-analytical-services.github.io/dfeR/).
-
-### Relevant other packages
-
-We also maintain the
-[dfeshiny](https://github.com/dfe-analytical-services/dfeshiny) package,
-and expect any functions specific to R Shiny applications will live
-there.
-
-For connecting to data in the [explore education
-statistics](https://explore-education-statistics.service.gov.uk/), we
-are building the
-[eesyapi](https://github.com/dfe-analytical-services/eesyapi) package.
-
-There is a [giasr](https://github.com/dfe-analytical-services/giasr)
-package, which has been developed for connecting to data in the [get
-information about schools
-service](https://get-information-schools.service.gov.uk/).
-
-While we have some DfE specific data in the dfeR package taken from the
-[Open Geography Portal](https://geoportal.statistics.gov.uk/). If you’re
-looking at getting new data from the portal it is also worth looking at
-the [boundr](https://github.com/francisbarton/boundr) package, as this
-gives more functions for directly extracting data from there.
-
-------------------------------------------------------------------------
-
-## Installation
-
-dfeR is not currently available on CRAN. For the time being you can
-install the development version from GitHub.
-
-If you are using
-[renv](https://rstudio.github.io/renv/articles/renv.html) in your
-project (recommended):
-
-``` r
-renv::install("dfe-analytical-services/dfeR")
-```
-
-Otherwise:
-
-``` r
-# install.packages("devtools")
-devtools::install_github("dfe-analytical-services/dfeR")
-```
-
-------------------------------------------------------------------------
-
-## Proxy
-
-The need for setting proxy settings in order to be able to work with R
-and Git within the DfE estate has now ended. If you previously run the
-proxy script in previous versions of the dfeR package, then contact the
-[Statistics Development Team](statistics.development@education.gov.uk)
-to assist in cleaning out your system settings.
-
-------------------------------------------------------------------------
-
-## Contributing
-
-Ideas for dfeR should first be raised as a [GitHub
-issue](https://github.com/dfe-analytical-services/dfeR) after which
-anyone is free to write the code and create a pull request for review.
-
-For more details on contributing to dfeR, see our [contributing
-guidelines](https://dfe-analytical-services.github.io/dfeR/CONTRIBUTING.html).
-
-------------------------------------------------------------------------
-
-## Code of Conduct
-
-Please note that the dfeR project is released with a [Contributor Code
-of
-Conduct](https://dfe-analytical-services.github.io/dfeR/CODE_OF_CONDUCT.html).
-By contributing to this project, you agree to abide by its terms.
-
-------------------------------------------------------------------------
-
-## Examples
-
-Here are some example functions from within the package:
-
-``` r
-library(dfeR)
-
-# Prettify large numbers
-pretty_num(111111111, gbp = TRUE)
-#> [1] "£111.11 million"
-pretty_num(-11^8, dp = -1)
-#> [1] "-210 million"
-
-# Convert bytes to readable size
-pretty_filesize(77777777)
-#> [1] "77.78 MB"
-
-# Calculate elapsed time and present prettily
-start <- Sys.time()
-end <- Sys.time() + 789890
-pretty_time_taken(start, end)
-#> [1] "219 hours 24 minutes 50 seconds"
-
-# Round 5's up instead of bankers round used by round() in base R
-round_five_up(2.5)
-#> [1] 3
-round(2.5) # base R
-#> [1] 2
-
-# Custom formatting for academic and financial years
-format_ay(202425)
-#> [1] "2024/25"
-format_fy(202425)
-#> [1] "2024-25"
-format_ay_reverse("2024/25")
-#> [1] "202425"
-format_fy_reverse("2024-25")
-#> [1] "202425"
-
-# Get Ward to PCon to LAD to LA to Rgn to Ctry lookup file
-my_data <- dfeR::wd_pcon_lad_la_rgn_ctry
-head(my_data) # show first 5 rows in console
-#> first_available_year_included most_recent_year_included
-#> 1 2017 2017
-#> 2 2017 2017
-#> 3 2017 2020
-#> 4 2017 2017
-#> 5 2017 2020
-#> 6 2017 2017
-#> ward_name pcon_name lad_name la_name
-#> 1 Bastwell Blackburn Blackburn with Darwen Blackburn with Darwen
-#> 2 Ormesby Redcar Redcar and Cleveland Redcar and Cleveland
-#> 3 Burn Valley Hartlepool Hartlepool Hartlepool
-#> 4 Beardwood with Lammack Blackburn Blackburn with Darwen Blackburn with Darwen
-#> 5 De Bruce Hartlepool Hartlepool Hartlepool
-#> 6 St Germain's Redcar Redcar and Cleveland Redcar and Cleveland
-#> region_name country_name ward_code pcon_code lad_code new_la_code
-#> 1 North West England E05001621 E14000570 E06000008 E06000008
-#> 2 North East England E05001518 E14000891 E06000003 E06000003
-#> 3 North East England E05008942 E14000733 E06000001 E06000001
-#> 4 North West England E05001622 E14000570 E06000008 E06000008
-#> 5 North East England E05008943 E14000733 E06000001 E06000001
-#> 6 North East England E05001519 E14000891 E06000003 E06000003
-#> region_code country_code
-#> 1 E12000002 E92000001
-#> 2 E12000001 E92000001
-#> 3 E12000001 E92000001
-#> 4 E12000002 E92000001
-#> 5 E12000001 E92000001
-#> 6 E12000001 E92000001
-
-# Get all countries
-dfeR::countries
-#> country_code country_name
-#> 1 E92000001 England
-#> 2 K02000001 United Kingdom
-#> 3 K03000001 Great Britain
-#> 4 K04000001 England and Wales
-#> 5 N92000002 Northern Ireland
-#> 6 S92000003 Scotland
-#> 7 W92000004 Wales
-#> 8 z England, Wales and Northern Ireland
-#> 9 z Outside of England and unknown
-#> 10 z Outside of the United Kingdom and unknown
-
-# Get all PCon names and codes for 2024
-fetch_pcons(2024) |>
- head() # show first 5 rows only
-#> pcon_code pcon_name
-#> 1 S14000045 Midlothian
-#> 2 S14000027 Na h-Eileanan an Iar
-#> 3 S14000021 East Renfrewshire
-#> 4 S14000048 North Ayrshire and Arran
-#> 5 S14000051 Orkney and Shetland
-#> 6 E14001440 Redcar
-
-# Get All LADs in Scotland in 2017
-fetch_lads(2017, "Scotland") |>
- head() # show first 5 rows only
-#> lad_code lad_name
-#> 1 S12000019 Midlothian
-#> 2 S12000015 Fife
-#> 3 S12000014 Falkirk
-#> 4 S12000013 Na h-Eileanan Siar
-#> 5 S12000018 Inverclyde
-#> 6 S12000011 East Renfrewshire
-
-# Get all LAs in Scotland and Northern Ireland in 2022
-fetch_las(2022, c("Scotland", "Northern Ireland")) |>
- head() # show first 5 rows only
-#> new_la_code la_name
-#> 1 N09000003 Belfast
-#> 2 N09000004 Causeway Coast and Glens
-#> 3 N09000002 Armagh City, Banbridge and Craigavon
-#> 4 N09000005 Derry City and Strabane
-#> 5 N09000001 Antrim and Newtownabbey
-#> 6 N09000006 Fermanagh and Omagh
-
-# Get all Welsh wards for 2021
-fetch_wards(2021, "Wales") |>
- head() # show first 5 rows only
-#> ward_code ward_name
-#> 1 W05000981 Aethwy
-#> 2 W05000982 Bro Aberffraw
-#> 3 W05000983 Bro Rhosyr
-#> 4 W05000107 Tregarth & Mynydd Llandygai
-#> 5 W05000984 Caergybi
-#> 6 W05000985 Canolbarth Môn
-
-# The following have no specific years available and return all values
-fetch_regions()
-#> region_code region_name
-#> 1 E12000001 North East
-#> 2 E12000002 North West
-#> 3 E12000003 Yorkshire and The Humber
-#> 4 E12000004 East Midlands
-#> 5 E12000005 West Midlands
-#> 6 E12000006 East of England
-#> 7 E12000007 London
-#> 8 E12000008 South East
-#> 9 E12000009 South West
-#> 10 E13000001 Inner London
-#> 11 E13000002 Outer London
-#> 12 z Outside of England and unknown
-#> 13 z Outside of the United Kingdom and unknown
-#> 14 z Outside of England
-#> 15 z Outside of United Kingdom
-#> 16 z Unknown
-
-fetch_countries()
-#> country_code country_name
-#> 1 E92000001 England
-#> 2 K02000001 United Kingdom
-#> 3 K03000001 Great Britain
-#> 4 K04000001 England and Wales
-#> 5 N92000002 Northern Ireland
-#> 6 S92000003 Scotland
-#> 7 W92000004 Wales
-#> 8 z England, Wales and Northern Ireland
-#> 9 z Outside of England and unknown
-#> 10 z Outside of the United Kingdom and unknown
-```
-
-For more details on all the functions available in this package, and
-examples of how to use them, please see our [dfeR package reference
-documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html).
+
+
+
+# dfeR
+
+
+
+[![R-CMD-check](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/R-CMD-check.yaml)
+[![pkgdown](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml/badge.svg)](https://github.com/dfe-analytical-services/dfeR/actions/workflows/pkgdown.yaml)
+[![Codecov test
+coverage](https://codecov.io/gh/dfe-analytical-services/dfeR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/dfe-analytical-services/dfeR?branch=main)
+[![Lifecycle:
+experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
+
+
+The goal of dfeR is to help standardise R programming across the
+Department for Education (DfE), and facilitate sharing of business
+specific functions, making our code easier to read and write.
+
+Credit to [lauraselby](https://github.com/lauraselby) for the logo
+featuring Frederick!
+
+## Scope
+
+This package is open to all of DfE and anything we think could be useful
+to other programmers and analysts can be contributed.
+
+Functionality for dfeR is expected to focus around the following:
+
+1. DfE specific formatting and helper functions
+2. Working with DfE databases
+3. Templates for analytical projects
+4. API wrappers commonly needed in DfE analysis (where they don’t have
+ their own separate package)
+5. Geography lookup files and helper functions
+
+Documentation for what has been included in the package so far is on our
+[pkgdown site](https://dfe-analytical-services.github.io/dfeR/).
+
+### Relevant other packages
+
+We also maintain the
+[dfeshiny](https://github.com/dfe-analytical-services/dfeshiny) package,
+and expect any functions specific to R Shiny applications will live
+there.
+
+For connecting to data in the [explore education
+statistics](https://explore-education-statistics.service.gov.uk/), we
+are building the
+[eesyapi](https://github.com/dfe-analytical-services/eesyapi.R) package.
+
+There is a [giasr](https://github.com/dfe-analytical-services/giasr)
+package, which has been developed for connecting to data in the [get
+information about schools
+service](https://get-information-schools.service.gov.uk/).
+
+While we have some DfE specific data in the dfeR package taken from the
+[Open Geography Portal](https://geoportal.statistics.gov.uk/). If you’re
+looking at getting new data from the portal it is also worth looking at
+the [boundr](https://github.com/francisbarton/boundr) package, as this
+gives more functions for directly extracting data from there.
+
+------------------------------------------------------------------------
+
+## Installation
+
+dfeR is available on CRAN and you can install directly from there:
+
+``` r
+install.packages("dfeR")
+```
+
+You can install the development version from GitHub.
+
+If you are using
+[renv](https://rstudio.github.io/renv/articles/renv.html) in your
+project (recommended):
+
+``` r
+renv::install("dfe-analytical-services/dfeR")
+```
+
+Otherwise:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("dfe-analytical-services/dfeR")
+```
+
+------------------------------------------------------------------------
+
+## Proxy
+
+The need for setting proxy settings in order to be able to work with R
+and Git within the DfE estate has now ended. If you previously run the
+proxy script in previous versions of the dfeR package, then contact the
+[Statistics Development
+Team](mailto:statistics.development@education.gov.uk) to assist in
+cleaning out your system settings.
+
+------------------------------------------------------------------------
+
+## Contributing
+
+Ideas for dfeR should first be raised as a [GitHub
+issue](https://github.com/dfe-analytical-services/dfeR) after which
+anyone is free to write the code and create a pull request for review.
+
+For more details on contributing to dfeR, see our [contributing
+guidelines](https://dfe-analytical-services.github.io/dfeR/CONTRIBUTING.html).
+
+------------------------------------------------------------------------
+
+## Code of Conduct
+
+Please note that the dfeR project is released with a [Contributor Code
+of
+Conduct](https://dfe-analytical-services.github.io/dfeR/CODE_OF_CONDUCT.html).
+By contributing to this project, you agree to abide by its terms.
+
+------------------------------------------------------------------------
+
+## Examples
+
+Here are some example functions from within the package:
+
+``` r
+library(dfeR)
+
+# Prettify large numbers
+pretty_num(111111111, gbp = TRUE)
+#> [1] "£111 million"
+pretty_num(-11^8, dp = -1)
+#> [1] "-210 million"
+
+# Convert bytes to readable size
+pretty_filesize(77777777)
+#> [1] "77.78 MB"
+
+# Calculate elapsed time and present prettily
+start <- Sys.time()
+end <- Sys.time() + 789890
+pretty_time_taken(start, end)
+#> [1] "219 hours 24 minutes 50 seconds"
+
+# Round 5's up instead of bankers round used by round() in base R
+round_five_up(2.5)
+#> [1] 3
+round(2.5) # base R
+#> [1] 2
+
+# Custom formatting for academic and financial years
+format_ay(202425)
+#> [1] "2024/25"
+format_fy(202425)
+#> [1] "2024-25"
+format_ay_reverse("2024/25")
+#> [1] "202425"
+format_fy_reverse("2024-25")
+#> [1] "202425"
+
+# Get Ward to PCon to LAD to LA to Rgn to Ctry lookup file
+my_data <- dfeR::wd_pcon_lad_la_rgn_ctry
+head(my_data) # show first 5 rows in console
+#> first_available_year_included most_recent_year_included
+#> 1 2017 2017
+#> 2 2017 2017
+#> 3 2017 2020
+#> 4 2017 2017
+#> 5 2017 2020
+#> 6 2017 2017
+#> ward_name pcon_name lad_name la_name
+#> 1 Bastwell Blackburn Blackburn with Darwen Blackburn with Darwen
+#> 2 Ormesby Redcar Redcar and Cleveland Redcar and Cleveland
+#> 3 Burn Valley Hartlepool Hartlepool Hartlepool
+#> 4 Beardwood with Lammack Blackburn Blackburn with Darwen Blackburn with Darwen
+#> 5 De Bruce Hartlepool Hartlepool Hartlepool
+#> 6 St Germain's Redcar Redcar and Cleveland Redcar and Cleveland
+#> region_name country_name ward_code pcon_code lad_code new_la_code
+#> 1 North West England E05001621 E14000570 E06000008 E06000008
+#> 2 North East England E05001518 E14000891 E06000003 E06000003
+#> 3 North East England E05008942 E14000733 E06000001 E06000001
+#> 4 North West England E05001622 E14000570 E06000008 E06000008
+#> 5 North East England E05008943 E14000733 E06000001 E06000001
+#> 6 North East England E05001519 E14000891 E06000003 E06000003
+#> region_code country_code
+#> 1 E12000002 E92000001
+#> 2 E12000001 E92000001
+#> 3 E12000001 E92000001
+#> 4 E12000002 E92000001
+#> 5 E12000001 E92000001
+#> 6 E12000001 E92000001
+
+# Get all countries
+dfeR::countries
+#> country_code country_name
+#> 1 E92000001 England
+#> 2 K02000001 United Kingdom
+#> 3 K03000001 Great Britain
+#> 4 K04000001 England and Wales
+#> 5 N92000002 Northern Ireland
+#> 6 S92000003 Scotland
+#> 7 W92000004 Wales
+#> 8 z England, Wales and Northern Ireland
+#> 9 z Outside of England and unknown
+#> 10 z Outside of the United Kingdom and unknown
+
+# Get all PCon names and codes for 2024
+fetch_pcons(2024) |>
+ head() # show first 5 rows only
+#> pcon_code pcon_name
+#> 1 S14000045 Midlothian
+#> 2 S14000027 Na h-Eileanan an Iar
+#> 3 S14000021 East Renfrewshire
+#> 4 S14000048 North Ayrshire and Arran
+#> 5 S14000051 Orkney and Shetland
+#> 6 E14001440 Redcar
+
+# Get All LADs in Scotland in 2017
+fetch_lads(2017, "Scotland") |>
+ head() # show first 5 rows only
+#> lad_code lad_name
+#> 1 S12000019 Midlothian
+#> 2 S12000015 Fife
+#> 3 S12000014 Falkirk
+#> 4 S12000013 Na h-Eileanan Siar
+#> 5 S12000018 Inverclyde
+#> 6 S12000011 East Renfrewshire
+
+# Get all LAs in Scotland and Northern Ireland in 2022
+fetch_las(2022, c("Scotland", "Northern Ireland")) |>
+ head() # show first 5 rows only
+#> new_la_code la_name
+#> 1 N09000003 Belfast
+#> 2 N09000004 Causeway Coast and Glens
+#> 3 N09000002 Armagh City, Banbridge and Craigavon
+#> 4 N09000005 Derry City and Strabane
+#> 5 N09000001 Antrim and Newtownabbey
+#> 6 N09000006 Fermanagh and Omagh
+
+# Get all Welsh wards for 2021
+fetch_wards(2021, "Wales") |>
+ head() # show first 5 rows only
+#> ward_code ward_name
+#> 1 W05000981 Aethwy
+#> 2 W05000982 Bro Aberffraw
+#> 3 W05000983 Bro Rhosyr
+#> 4 W05000107 Tregarth & Mynydd Llandygai
+#> 5 W05000984 Caergybi
+#> 6 W05000985 Canolbarth Môn
+
+# The following have no specific years available and return all values
+fetch_regions()
+#> region_code region_name
+#> 1 E12000001 North East
+#> 2 E12000002 North West
+#> 3 E12000003 Yorkshire and The Humber
+#> 4 E12000004 East Midlands
+#> 5 E12000005 West Midlands
+#> 6 E12000006 East of England
+#> 7 E12000007 London
+#> 8 E12000008 South East
+#> 9 E12000009 South West
+#> 10 E13000001 Inner London
+#> 11 E13000002 Outer London
+#> 12 z Outside of England and unknown
+#> 13 z Outside of the United Kingdom and unknown
+#> 14 z Outside of England
+#> 15 z Outside of United Kingdom
+#> 16 z Unknown
+
+fetch_countries()
+#> country_code country_name
+#> 1 E92000001 England
+#> 2 K02000001 United Kingdom
+#> 3 K03000001 Great Britain
+#> 4 K04000001 England and Wales
+#> 5 N92000002 Northern Ireland
+#> 6 S92000003 Scotland
+#> 7 W92000004 Wales
+#> 8 z England, Wales and Northern Ireland
+#> 9 z Outside of England and unknown
+#> 10 z Outside of the United Kingdom and unknown
+```
+
+For more details on all the functions available in this package, and
+examples of how to use them, please see our [dfeR package reference
+documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html).
diff --git a/cran-comments.md b/cran-comments.md
new file mode 100644
index 0000000..5bdf9eb
--- /dev/null
+++ b/cran-comments.md
@@ -0,0 +1,7 @@
+## R CMD check results
+
+0 errors | 0 warnings | 1 notes
+
+This submission follows the initial publishing of the package 2 days ago, and fixes the issues raised by the CRAN team.
+
+* removed package tests that tested the speed of function execution
diff --git a/dfeR.Rproj b/dfeR.Rproj
index 38b9011..43c8221 100644
--- a/dfeR.Rproj
+++ b/dfeR.Rproj
@@ -1,4 +1,5 @@
Version: 1.0
+ProjectId: 38a07b34-b094-4238-b7cd-d00ce125499e
RestoreWorkspace: No
SaveWorkspace: No
diff --git a/README_template.md b/inst/README_template.md
similarity index 100%
rename from README_template.md
rename to inst/README_template.md
diff --git a/man/check_fetch_location_inputs.Rd b/man/check_fetch_location_inputs.Rd
deleted file mode 100644
index 163a1e7..0000000
--- a/man/check_fetch_location_inputs.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fetch_utils.R
-\name{check_fetch_location_inputs}
-\alias{check_fetch_location_inputs}
-\title{Validation for fetch location lookups}
-\usage{
-check_fetch_location_inputs(year_input, country_input)
-}
-\arguments{
-\item{year_input}{the value of the years input}
-
-\item{country_input}{the value of the countries input}
-}
-\value{
-nothing, unless a failure, and then it will give an error
-}
-\description{
-Validation for fetch location lookups
-}
-\keyword{internal}
diff --git a/man/create_time_series_lookup.Rd b/man/create_time_series_lookup.Rd
deleted file mode 100644
index 9547671..0000000
--- a/man/create_time_series_lookup.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/datasets_utils.R
-\name{create_time_series_lookup}
-\alias{create_time_series_lookup}
-\title{Smush lookups together to make a time series}
-\usage{
-create_time_series_lookup(lookups_list)
-}
-\arguments{
-\item{lookups_list}{list of data frames of new lookup table,
-usually the output of tidy_raw_lookup}
-}
-\value{
-single data.frame of all lookup files combined
-}
-\description{
-Take a list of tidied files, likely produced by the tidy_raw_lookup
-function append together
-}
-\details{
-Updates the \code{first_available_year_included} and \code{most_recent_year_included}
-columns so that they are accurate for the full ser-EES
-}
-\keyword{internal}
diff --git a/man/dfeR-package.Rd b/man/dfeR-package.Rd
index 7a25dbb..f4e2ff5 100644
--- a/man/dfeR-package.Rd
+++ b/man/dfeR-package.Rd
@@ -4,11 +4,11 @@
\name{dfeR-package}
\alias{dfeR}
\alias{dfeR-package}
-\title{dfeR: Common DfE R tasks}
+\title{dfeR: Common Department for Education Analysis Tasks}
\description{
\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
-This package contains R functions to allow DfE analysts to re-use code for common analytical tasks that are undertaken across the Department.
+Preferred methods for common analytical tasks that are undertaken across the Department, including number formatting, project templates and curated reference data.
}
\seealso{
Useful links:
@@ -30,10 +30,12 @@ Authors:
Other contributors:
\itemize{
+ \item Department for Education, England \email{statistics.development@education.gov.uk} [copyright holder]
\item Jen Machin \email{jen.machin@education.gov.uk} [contributor]
\item Jake Tufts \email{jake.tufts@education.gov.uk} [contributor]
\item Rich Bielby \email{richard.bielby@education.gov.uk} (\href{https://orcid.org/0000-0001-9070-9969}{ORCID}) [contributor]
\item Menna Zayed \email{menna.zayed@education.gov.uk} [contributor]
+ \item Lauren Snaathorst \email{lauren.snaathorst@education.gov.uk} [contributor]
}
}
diff --git a/man/fetch_locations.Rd b/man/fetch_locations.Rd
deleted file mode 100644
index c5d6e92..0000000
--- a/man/fetch_locations.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/fetch_utils.R
-\name{fetch_locations}
-\alias{fetch_locations}
-\title{Fetch locations for a given lookup}
-\usage{
-fetch_locations(lookup_data, cols, year, countries)
-}
-\arguments{
-\item{lookup_data}{lookup data to use to extract locations from}
-
-\item{cols}{columns to extract from the main lookup table}
-
-\item{year}{year of locations to extract, "All" will skip any filtering and
-return all possible locations}
-
-\item{countries}{countries for locations to be take from, "All" will skip
-any filtering and return all}
-}
-\value{
-a data frame of location names and codes
-}
-\description{
-Helper function for the fetch_xxx() functions to save repeating code
-}
-\keyword{internal}
diff --git a/man/geog_time_identifiers.Rd b/man/geog_time_identifiers.Rd
index f5a0da6..2166f15 100644
--- a/man/geog_time_identifiers.Rd
+++ b/man/geog_time_identifiers.Rd
@@ -12,7 +12,7 @@ A character vector with 38 potential column names in snake case format.
}
\source{
curated by explore.statistics@education.gov.uk.
-\href{https://shorturl.at/j4532}{Get guidance on time and geography data.}
+\href{https://www.shorturl.at/j4532}{Guidance on time and geography data.}
}
\usage{
geog_time_identifiers
diff --git a/man/get_lad_region.Rd b/man/get_lad_region.Rd
deleted file mode 100644
index 03e8b0f..0000000
--- a/man/get_lad_region.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/datasets_utils.R
-\name{get_lad_region}
-\alias{get_lad_region}
-\title{Get LAD to Region lookup}
-\usage{
-get_lad_region(year)
-}
-\arguments{
-\item{year}{last two digits of the year of the lookup, available years are:
-2017, 2018, 2019, 2020, 2022, 2023}
-}
-\value{
-data.frame for the individual year of the lookup
-}
-\description{
-Helper function to extract data from the Ward-LAD-Region-County-Country file
-}
-\keyword{internal}
diff --git a/man/get_ons_api_data.Rd b/man/get_ons_api_data.Rd
index f754e76..80a3a75 100644
--- a/man/get_ons_api_data.Rd
+++ b/man/get_ons_api_data.Rd
@@ -17,7 +17,7 @@ Geography Portal}
\item{query_params}{query parameters to pass into the API, see the ESRI
documentation for more information on query parameters -
-\href{https://shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)}}
+\href{https://www.shorturl.at/5xrJT}{ESRI Query (Feature Service/Layer)}}
\item{batch_size}{the number of rows per query. This is 250 by default, if
you hit errors then try lowering this. The API has a limit of 1000 to 2000
@@ -53,15 +53,17 @@ widely you should also look at the
\href{https://github.com/francisbarton/boundr}{boundr package}.
}
\examples{
-if (interactive()) {
- # Specify some parameters
- get_ons_api_data(
- data_id = "LAD23_RGN23_EN_LU",
- query_params =
- list(outFields = "column1, column2", outSR = "4326", f = "json")
- )
+# Fetch everything from a data set
+dfeR::get_ons_api_data(data_id = "LAD23_RGN23_EN_LU")
- # Just fetch everything
- get_ons_api_data(data_id = "LAD23_RGN23_EN_LU")
-}
+# Specify the columns you want
+dfeR::get_ons_api_data(
+ "RGN_DEC_2023_EN_NC",
+ query_params = list(
+ where = "1=1",
+ outFields = "RGN23CD,RGN23NM",
+ outSR = 4326,
+ f = "json"
+ )
+)
}
diff --git a/man/get_wd_pcon_lad_la.Rd b/man/get_wd_pcon_lad_la.Rd
deleted file mode 100644
index a19f429..0000000
--- a/man/get_wd_pcon_lad_la.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/datasets_utils.R
-\name{get_wd_pcon_lad_la}
-\alias{get_wd_pcon_lad_la}
-\title{Get Ward-PCon-LAD-LA data}
-\usage{
-get_wd_pcon_lad_la(year)
-}
-\arguments{
-\item{year}{last two digits of the year of the lookup, available years are:
-2017, 2019, 2020, 2021, 2022, 2023, 2024}
-}
-\value{
-data.frame for the individual year of the lookup
-}
-\description{
-Helper function to extract data from the Ward-PCon-LAD-UTLA file
-}
-\keyword{internal}
diff --git a/man/tidy_raw_lookup.Rd b/man/tidy_raw_lookup.Rd
deleted file mode 100644
index 1468c29..0000000
--- a/man/tidy_raw_lookup.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/datasets_utils.R
-\name{tidy_raw_lookup}
-\alias{tidy_raw_lookup}
-\title{Tidy a lookup file from the Open Geography Portal}
-\usage{
-tidy_raw_lookup(raw_lookup_file)
-}
-\arguments{
-\item{raw_lookup_file}{data.frame of a lookup file downloaded from Open
-Geography Portal, e.g. the output of \code{get_ons_api_data()}, or any other
-data frame from R memory}
-}
-\value{
-a data frame of a tidied lookup file
-Function to rename columns using the dfeR::ons_geog_shorthands table
-
-col_name single column name to be updated based on the shorthand
-lookup table
-
-string for new column name if a match was found, if no match found
-then the original name is returned
-}
-\description{
-Takes a file from the open geography portal and tidies it ready for
-appending to an existing lookup
-}
-\keyword{internal}
diff --git a/man/toggle_message.Rd b/man/toggle_message.Rd
index d50c987..75410b7 100644
--- a/man/toggle_message.Rd
+++ b/man/toggle_message.Rd
@@ -13,6 +13,9 @@ toggle_message(..., verbose)
\item{verbose}{logical, usually a variable passed from the function you are
using this within}
}
+\value{
+No return value, called for side effects
+}
\description{
Quick expansion to the \code{message()} function aimed for use in functions for
an easy addition of a global verbose TRUE / FALSE argument to toggle the
diff --git a/man/z_replace.Rd b/man/z_replace.Rd
index 687ebf5..53d0707 100644
--- a/man/z_replace.Rd
+++ b/man/z_replace.Rd
@@ -25,7 +25,7 @@ values for columns that are not for time or geography.
\description{
Replaces \code{NA} values in tables except for ones in time and geography
columns that must be included in DfE official statistics.
-\href{https://shorturl.at/chy76}{Get more guidance on Open Data Standards.}
+\href{https://www.shorturl.at/chy76}{Guidance on our Open Data Standards.}
}
\details{
Names of geography and time columns that are used in this function can be
diff --git a/tests/testthat/test-z_replace.R b/tests/testthat/test-z_replace.R
index e8559c7..2783fcf 100644
--- a/tests/testthat/test-z_replace.R
+++ b/tests/testthat/test-z_replace.R
@@ -56,7 +56,7 @@ test_that("Error messages are as expected in non-empty frames", {
# testing error for non character strings in replacement_alt
expect_error(
z_replace(df, replacement_alt = 1),
- cat(
+ paste0(
"You provided a numeric input for replacement_alt.\n",
"Please amend replace it with a character vector."
)
@@ -65,7 +65,7 @@ test_that("Error messages are as expected in non-empty frames", {
# testing error for multiple vectors in replacement_alt
expect_error(
z_replace(df, replacement_alt = c("a", "z", "x")),
- cat(
+ paste0(
"You provided multiple values for replacement_alt.\n",
"Please, only provide a single value."
)
@@ -103,45 +103,6 @@ test_that("exclude_columns works", {
))
})
-
-# Checking speed of the function
-
-# make this reproducible
-set.seed(123)
-# create table with randomly generated numbers
-df <- data.frame(
- a = sample(1:1000, 10000, replace = TRUE),
- b = sample(1:1000, 10000, replace = TRUE),
- c = sample(1:1000, 10000, replace = TRUE),
- d = sample(1:1000, 10000, replace = TRUE),
- e = sample(1:1000, 10000, replace = TRUE),
- f = sample(1:1000, 10000, replace = TRUE),
- e = sample(1:1000, 10000, replace = TRUE),
- h = sample(1:1000, 10000, replace = TRUE),
- i = sample(1:1000, 10000, replace = TRUE),
- j = sample(1:1000, 10000, replace = TRUE),
- school_urn = sample(1:1000, 10000, replace = TRUE)
-)
-
-# putting NAs in the table
-df <- df %>%
- dplyr::mutate(across(
- a:school_urn,
- ~ dplyr::if_else(. < 300, as.double(NA), .)
- ))
-
-start_time <- Sys.time()
-z_replace(df)
-end_time <- Sys.time()
-test_time <- difftime(end_time, start_time, units = "secs")
-
-# calculating the time it takes
-
-# testing that the speed is less than 0.25 second
-test_that("Speed of the function", {
- expect_equal(test_time < 0.25, TRUE)
-})
-
# Check error message for empty data frame
# create table
@@ -166,12 +127,5 @@ df <- data.frame(
)
test_that("Formatting of column names are checked", {
- expect_error(
- z_replace(df),
- cat(
- "Your table has geography and/or time column(s) that are not",
- "in snake_case.\nPlease amend your column names to match the formatting",
- "to dfeR::geog_time_identifiers."
- )
- )
+ expect_error(z_replace(df))
})