From 3a142cf708798a3c25f0d04f1833807c2d284f2a Mon Sep 17 00:00:00 2001 From: cjrace Date: Tue, 27 Feb 2024 11:05:33 +0000 Subject: [PATCH 01/22] fix typo in contributing guide --- .github/CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 54df7a8..5e46bb2 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -46,8 +46,8 @@ Where possible, we'd recommend following the [Test Driven Development (TDD)](htt 1. Write tests for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: -``` r -usethis::usetest("name_of_new_function") +``` r_ +usethis::use_test("name_of_new_function") ``` 2. Write just enough code so that the tests pass. Again, either edit an existing function, or add a new R script using: From fa575a3511f9295915c8413306c5df5df2959a2a Mon Sep 17 00:00:00 2001 From: cjrace Date: Tue, 27 Feb 2024 17:16:42 +0000 Subject: [PATCH 02/22] start to add get_clean_sql and tests (WIP) --- R/get_clean_sql.R | 66 +++++++++++++++++++++++++++++ tests/sql_scripts/simple.sql | 3 ++ tests/testthat/test-get_clean_sql.R | 6 +++ 3 files changed, 75 insertions(+) create mode 100644 R/get_clean_sql.R create mode 100644 tests/sql_scripts/simple.sql create mode 100644 tests/testthat/test-get_clean_sql.R diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R new file mode 100644 index 0000000..87719a2 --- /dev/null +++ b/R/get_clean_sql.R @@ -0,0 +1,66 @@ +#' Get clean SQL +#' +#' @description +#' This function cleans a SQL script, ready for using within R in the DfE. +#' +#' @param filepath path to a SQL script +#' @param additional_settings TRUE or FALSE boolean for the addition of +#' settings at the start of the SQL script +#' @return Cleaned string containing SQL query +#' @export +#' @examples +#' # This assumes you have already set up a database connection +#' # and have assigned to 'con' + +# vignette potential? + +#' # Pull a cleaned version of the SQL file into R +#' sql_query <- get_clean_sql("path_to_sql_file.sql") +#' +#' # Use the cleaned SQL query to query the database +#' \dontrun{dbGetQuery(con, statement = sql_query)} + +get_clean_sql <- function(filepath, additional_settings = FALSE) { + + # check that additional_settings is always NULL or TRUE + + # check filepath can be found and is a SQL file? + + + con <- file(filepath, "r") + sql.string <- "" + + while (TRUE) { + line <- readLines(con, n = 1) + + if (length(line) == 0) { + break + } + + line <- gsub("\\t", " ", line) + line <- gsub("\\n", " ", line) + + if (grepl("--", line) == TRUE) { + line <- paste(sub("--", "/*", line), "*/") + } + + sql.string <- paste(sql.string, line) + } + + close(con) + + if(is.null(additional_settings)){ + # clean up SQL - remove weird sign that appears sometimes + sql.string <- gsub("", " ", sql.string) + + } else if (additional_settings == TRUE){ + # Clean SQL and prefix with settings that sometimes help + sql.string <- paste( + "SET ANSI_PADDING OFF", + "SET NOCOUNT ON;", + gsub("", " ", sql.string) + ) + } + + return(sql.string) +} diff --git a/tests/sql_scripts/simple.sql b/tests/sql_scripts/simple.sql new file mode 100644 index 0000000..8e3865d --- /dev/null +++ b/tests/sql_scripts/simple.sql @@ -0,0 +1,3 @@ +-- Simple SQL script to get all data from my database table + +SELECT * FROM [my_database_table] diff --git a/tests/testthat/test-get_clean_sql.R b/tests/testthat/test-get_clean_sql.R new file mode 100644 index 0000000..cda5b5a --- /dev/null +++ b/tests/testthat/test-get_clean_sql.R @@ -0,0 +1,6 @@ +test_that("Can retrieve basic script", { + expect_equal( + get_clean_sql("sql_scripts/simple.sql"), + TRUE + ) +}) From 1bde4a3ed326133b992cd707be5fc31798fe1e6f Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:56:57 +0000 Subject: [PATCH 03/22] add code cov details to contributing file --- .github/CONTRIBUTING.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5e46bb2..26b87e9 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -46,7 +46,7 @@ Where possible, we'd recommend following the [Test Driven Development (TDD)](htt 1. Write tests for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: -``` r_ +``` r usethis::use_test("name_of_new_function") ``` @@ -87,7 +87,7 @@ We recommend using the [usethis](https://usethis.r-lib.org/index.html) package w Add any packages the package users will need with: ``` r -usethis::use_package(pkgname, type = "imports") +usethis::use_package(pkgname) ``` Add any packages that package developers only may need with: @@ -161,6 +161,16 @@ lintr::lint_package() We use [testthat](https://cran.r-project.org/package=testthat) for unit tests, we expect all new functions to have some level of test coverage. +### Test coverage + +There are GitHub Actions workflows that check and link the package to [codecov.io](https://app.codecov.io/gh/dfe-analytical-services/), this runs automatic scans to check the % of lines in functions that we are testing. On the [dfeR codecov pages](https://app.codecov.io/gh/dfe-analytical-services/dfeR) you can preview the variation by branch and commit to see the impact of changes made. + +You will need to create an account or login using GitHub to see the pages. + +The current % of coverage is shown as a badge on the package [README on GitHub](https://github.com/dfe-analytical-services/dfeR). + +It is worth noting that 100% coverage does not mean that the tests are perfect, it only means that all lines are ran in tests, so it's more a measure of quantity rather than quality. Interesting to see all the same though, and we'd recommend using it to spot any potential elements of more complicated functions that you may have forgotten to test. + ### Spelling The [spelling](https://docs.ropensci.org/spelling/) package is used to check spelling. A custom word list of exceptions for this package exists in the `inst/` folder. From 27d73f5aacfdf53ebca9be2b66b4b4be1005c465 Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:59:23 +0000 Subject: [PATCH 04/22] Add link to reference documentation in the examples section --- README.Rmd | 2 ++ README.md | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/README.Rmd b/README.Rmd index 4c3ad5d..3feb848 100644 --- a/README.Rmd +++ b/README.Rmd @@ -91,3 +91,5 @@ This is a basic example showing the `format_ay()` function: library(dfeR) format_ay(202425) ``` + +For more details on all the functions available in this package, and examples of how to use them, please see our [dfeR package reference documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html). diff --git a/README.md b/README.md index 703c509..5cfc5b4 100644 --- a/README.md +++ b/README.md @@ -104,3 +104,7 @@ library(dfeR) format_ay(202425) #> [1] "2024/25" ``` + +For more details on all the functions available in this package, and +examples of how to use them, please see our [dfeR package reference +documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html). From af9a49cf53bfdc946a0c6308264ad0a76490ac0a Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Wed, 28 Feb 2024 00:29:59 +0000 Subject: [PATCH 05/22] commit current progress adding unit tests (WIP) --- NAMESPACE | 1 + R/get_clean_sql.R | 7 ++++++- man/get_clean_sql.Rd | 29 +++++++++++++++++++++++++++++ tests/testthat/test-get_clean_sql.R | 20 +++++++++++++++++++- 4 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 man/get_clean_sql.Rd diff --git a/NAMESPACE b/NAMESPACE index c00d9fd..0d9896d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand export(format_ay) +export(get_clean_sql) export(round_five_up) importFrom(lifecycle,deprecated) diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R index 87719a2..ddbd9d6 100644 --- a/R/get_clean_sql.R +++ b/R/get_clean_sql.R @@ -22,7 +22,12 @@ get_clean_sql <- function(filepath, additional_settings = FALSE) { - # check that additional_settings is always NULL or TRUE + if(!additional_settings %in% c(TRUE, FALSE, T, F)){ + stop( + "additional_settings must be either TRUE or FALSE" + ) + } + # check filepath can be found and is a SQL file? diff --git a/man/get_clean_sql.Rd b/man/get_clean_sql.Rd new file mode 100644 index 0000000..318745a --- /dev/null +++ b/man/get_clean_sql.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_clean_sql.R +\name{get_clean_sql} +\alias{get_clean_sql} +\title{Get clean SQL} +\usage{ +get_clean_sql(filepath, additional_settings = FALSE) +} +\arguments{ +\item{filepath}{path to a SQL script} + +\item{additional_settings}{TRUE or FALSE boolean for the addition of +settings at the start of the SQL script} +} +\value{ +Cleaned string containing SQL query +} +\description{ +This function cleans a SQL script, ready for using within R in the DfE. +} +\examples{ +# This assumes you have already set up a database connection +# and have assigned to 'con' +# Pull a cleaned version of the SQL file into R +sql_query <- get_clean_sql("path_to_sql_file.sql") + +# Use the cleaned SQL query to query the database +\dontrun{dbGetQuery(con, statement = sql_query)} +} diff --git a/tests/testthat/test-get_clean_sql.R b/tests/testthat/test-get_clean_sql.R index cda5b5a..ad2dabc 100644 --- a/tests/testthat/test-get_clean_sql.R +++ b/tests/testthat/test-get_clean_sql.R @@ -1,6 +1,24 @@ test_that("Can retrieve basic script", { + expect_equal( + get_clean_sql("../sql_scripts/simple.sql"), + " /* Simple SQL script to get all data from my database table */ + SELECT * FROM [my_database_table]", + fixed + ) +}) + +test_that("Adds additional settings", { + # Check that the output starts with the desired lines + expect_true( + grepl( + "^SET ANSI_PADDING OFF SET NOCOUNT ON;", + get_clean_sql("sql_scripts/simple.sql", additional_settings = T) + ) + ) +}) + +test_that("Rejects non-boolean values for additional_settings", { expect_equal( get_clean_sql("sql_scripts/simple.sql"), - TRUE ) }) From 57f892c523447dba44456eec381bdfc1b7b0a916 Mon Sep 17 00:00:00 2001 From: cjrace Date: Tue, 27 Feb 2024 11:05:33 +0000 Subject: [PATCH 06/22] fix typo in contributing guide --- .github/CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 54df7a8..5e46bb2 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -46,8 +46,8 @@ Where possible, we'd recommend following the [Test Driven Development (TDD)](htt 1. Write tests for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: -``` r -usethis::usetest("name_of_new_function") +``` r_ +usethis::use_test("name_of_new_function") ``` 2. Write just enough code so that the tests pass. Again, either edit an existing function, or add a new R script using: From 272bcad632df6aef602793feb9c8fbd57bf06847 Mon Sep 17 00:00:00 2001 From: cjrace Date: Tue, 27 Feb 2024 17:16:42 +0000 Subject: [PATCH 07/22] start to add get_clean_sql and tests (WIP) --- R/get_clean_sql.R | 66 +++++++++++++++++++++++++++++ tests/sql_scripts/simple.sql | 3 ++ tests/testthat/test-get_clean_sql.R | 6 +++ 3 files changed, 75 insertions(+) create mode 100644 R/get_clean_sql.R create mode 100644 tests/sql_scripts/simple.sql create mode 100644 tests/testthat/test-get_clean_sql.R diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R new file mode 100644 index 0000000..87719a2 --- /dev/null +++ b/R/get_clean_sql.R @@ -0,0 +1,66 @@ +#' Get clean SQL +#' +#' @description +#' This function cleans a SQL script, ready for using within R in the DfE. +#' +#' @param filepath path to a SQL script +#' @param additional_settings TRUE or FALSE boolean for the addition of +#' settings at the start of the SQL script +#' @return Cleaned string containing SQL query +#' @export +#' @examples +#' # This assumes you have already set up a database connection +#' # and have assigned to 'con' + +# vignette potential? + +#' # Pull a cleaned version of the SQL file into R +#' sql_query <- get_clean_sql("path_to_sql_file.sql") +#' +#' # Use the cleaned SQL query to query the database +#' \dontrun{dbGetQuery(con, statement = sql_query)} + +get_clean_sql <- function(filepath, additional_settings = FALSE) { + + # check that additional_settings is always NULL or TRUE + + # check filepath can be found and is a SQL file? + + + con <- file(filepath, "r") + sql.string <- "" + + while (TRUE) { + line <- readLines(con, n = 1) + + if (length(line) == 0) { + break + } + + line <- gsub("\\t", " ", line) + line <- gsub("\\n", " ", line) + + if (grepl("--", line) == TRUE) { + line <- paste(sub("--", "/*", line), "*/") + } + + sql.string <- paste(sql.string, line) + } + + close(con) + + if(is.null(additional_settings)){ + # clean up SQL - remove weird sign that appears sometimes + sql.string <- gsub("", " ", sql.string) + + } else if (additional_settings == TRUE){ + # Clean SQL and prefix with settings that sometimes help + sql.string <- paste( + "SET ANSI_PADDING OFF", + "SET NOCOUNT ON;", + gsub("", " ", sql.string) + ) + } + + return(sql.string) +} diff --git a/tests/sql_scripts/simple.sql b/tests/sql_scripts/simple.sql new file mode 100644 index 0000000..8e3865d --- /dev/null +++ b/tests/sql_scripts/simple.sql @@ -0,0 +1,3 @@ +-- Simple SQL script to get all data from my database table + +SELECT * FROM [my_database_table] diff --git a/tests/testthat/test-get_clean_sql.R b/tests/testthat/test-get_clean_sql.R new file mode 100644 index 0000000..cda5b5a --- /dev/null +++ b/tests/testthat/test-get_clean_sql.R @@ -0,0 +1,6 @@ +test_that("Can retrieve basic script", { + expect_equal( + get_clean_sql("sql_scripts/simple.sql"), + TRUE + ) +}) From 47fee5176073f9f5a79c994a9e7058ec237cc845 Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:56:57 +0000 Subject: [PATCH 08/22] add code cov details to contributing file --- .github/CONTRIBUTING.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5e46bb2..26b87e9 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -46,7 +46,7 @@ Where possible, we'd recommend following the [Test Driven Development (TDD)](htt 1. Write tests for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: -``` r_ +``` r usethis::use_test("name_of_new_function") ``` @@ -87,7 +87,7 @@ We recommend using the [usethis](https://usethis.r-lib.org/index.html) package w Add any packages the package users will need with: ``` r -usethis::use_package(pkgname, type = "imports") +usethis::use_package(pkgname) ``` Add any packages that package developers only may need with: @@ -161,6 +161,16 @@ lintr::lint_package() We use [testthat](https://cran.r-project.org/package=testthat) for unit tests, we expect all new functions to have some level of test coverage. +### Test coverage + +There are GitHub Actions workflows that check and link the package to [codecov.io](https://app.codecov.io/gh/dfe-analytical-services/), this runs automatic scans to check the % of lines in functions that we are testing. On the [dfeR codecov pages](https://app.codecov.io/gh/dfe-analytical-services/dfeR) you can preview the variation by branch and commit to see the impact of changes made. + +You will need to create an account or login using GitHub to see the pages. + +The current % of coverage is shown as a badge on the package [README on GitHub](https://github.com/dfe-analytical-services/dfeR). + +It is worth noting that 100% coverage does not mean that the tests are perfect, it only means that all lines are ran in tests, so it's more a measure of quantity rather than quality. Interesting to see all the same though, and we'd recommend using it to spot any potential elements of more complicated functions that you may have forgotten to test. + ### Spelling The [spelling](https://docs.ropensci.org/spelling/) package is used to check spelling. A custom word list of exceptions for this package exists in the `inst/` folder. From 5a2b219f9c56261277accaee07eaa2cabee3c508 Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:59:23 +0000 Subject: [PATCH 09/22] Add link to reference documentation in the examples section --- README.Rmd | 2 ++ README.md | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/README.Rmd b/README.Rmd index 4c3ad5d..3feb848 100644 --- a/README.Rmd +++ b/README.Rmd @@ -91,3 +91,5 @@ This is a basic example showing the `format_ay()` function: library(dfeR) format_ay(202425) ``` + +For more details on all the functions available in this package, and examples of how to use them, please see our [dfeR package reference documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html). diff --git a/README.md b/README.md index 703c509..5cfc5b4 100644 --- a/README.md +++ b/README.md @@ -104,3 +104,7 @@ library(dfeR) format_ay(202425) #> [1] "2024/25" ``` + +For more details on all the functions available in this package, and +examples of how to use them, please see our [dfeR package reference +documentation](https://dfe-analytical-services.github.io/dfeR/reference/index.html). From 75aea11f98ed8a8249d047e459ec76353f6dc745 Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Wed, 28 Feb 2024 00:29:59 +0000 Subject: [PATCH 10/22] commit current progress adding unit tests (WIP) --- NAMESPACE | 1 + R/get_clean_sql.R | 7 ++++++- man/get_clean_sql.Rd | 29 +++++++++++++++++++++++++++++ tests/testthat/test-get_clean_sql.R | 20 +++++++++++++++++++- 4 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 man/get_clean_sql.Rd diff --git a/NAMESPACE b/NAMESPACE index 47667d4..28e6a27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,5 +4,6 @@ export(format_ay) export(format_ay_reverse) export(format_fy) export(format_fy_reverse) +export(get_clean_sql) export(round_five_up) importFrom(lifecycle,deprecated) diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R index 87719a2..ddbd9d6 100644 --- a/R/get_clean_sql.R +++ b/R/get_clean_sql.R @@ -22,7 +22,12 @@ get_clean_sql <- function(filepath, additional_settings = FALSE) { - # check that additional_settings is always NULL or TRUE + if(!additional_settings %in% c(TRUE, FALSE, T, F)){ + stop( + "additional_settings must be either TRUE or FALSE" + ) + } + # check filepath can be found and is a SQL file? diff --git a/man/get_clean_sql.Rd b/man/get_clean_sql.Rd new file mode 100644 index 0000000..318745a --- /dev/null +++ b/man/get_clean_sql.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_clean_sql.R +\name{get_clean_sql} +\alias{get_clean_sql} +\title{Get clean SQL} +\usage{ +get_clean_sql(filepath, additional_settings = FALSE) +} +\arguments{ +\item{filepath}{path to a SQL script} + +\item{additional_settings}{TRUE or FALSE boolean for the addition of +settings at the start of the SQL script} +} +\value{ +Cleaned string containing SQL query +} +\description{ +This function cleans a SQL script, ready for using within R in the DfE. +} +\examples{ +# This assumes you have already set up a database connection +# and have assigned to 'con' +# Pull a cleaned version of the SQL file into R +sql_query <- get_clean_sql("path_to_sql_file.sql") + +# Use the cleaned SQL query to query the database +\dontrun{dbGetQuery(con, statement = sql_query)} +} diff --git a/tests/testthat/test-get_clean_sql.R b/tests/testthat/test-get_clean_sql.R index cda5b5a..ad2dabc 100644 --- a/tests/testthat/test-get_clean_sql.R +++ b/tests/testthat/test-get_clean_sql.R @@ -1,6 +1,24 @@ test_that("Can retrieve basic script", { + expect_equal( + get_clean_sql("../sql_scripts/simple.sql"), + " /* Simple SQL script to get all data from my database table */ + SELECT * FROM [my_database_table]", + fixed + ) +}) + +test_that("Adds additional settings", { + # Check that the output starts with the desired lines + expect_true( + grepl( + "^SET ANSI_PADDING OFF SET NOCOUNT ON;", + get_clean_sql("sql_scripts/simple.sql", additional_settings = T) + ) + ) +}) + +test_that("Rejects non-boolean values for additional_settings", { expect_equal( get_clean_sql("sql_scripts/simple.sql"), - TRUE ) }) From 6c5cf74baf6320bd7ce8682772414768dd517e93 Mon Sep 17 00:00:00 2001 From: cjrace Date: Fri, 8 Mar 2024 23:17:25 +0000 Subject: [PATCH 11/22] fix tests for sql function --- R/get_clean_sql.R | 47 +++++++++++++------------- man/get_clean_sql.Rd | 9 ++++- tests/testthat/test-get_clean_sql.R | 51 +++++++++++++++++++++++++---- 3 files changed, 76 insertions(+), 31 deletions(-) diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R index ddbd9d6..2881ef9 100644 --- a/R/get_clean_sql.R +++ b/R/get_clean_sql.R @@ -11,29 +11,34 @@ #' @examples #' # This assumes you have already set up a database connection #' # and have assigned to 'con' - -# vignette potential? - +#' +#' # For more details see [vignette] +#' #' # Pull a cleaned version of the SQL file into R +#' \dontrun{ #' sql_query <- get_clean_sql("path_to_sql_file.sql") +#' } #' #' # Use the cleaned SQL query to query the database -#' \dontrun{dbGetQuery(con, statement = sql_query)} - +#' \dontrun{ +#' dbGetQuery(con, statement = sql_query) +#' } get_clean_sql <- function(filepath, additional_settings = FALSE) { - - if(!additional_settings %in% c(TRUE, FALSE, T, F)){ + if (!additional_settings %in% c(TRUE, FALSE)) { stop( "additional_settings must be either TRUE or FALSE" ) } + # check filepath leads to a SQL file + if (tolower(tools::file_ext(filepath)) != "sql") { + stop("filepath must point to a SQL script, with a .sql extension") + } - # check filepath can be found and is a SQL file? - - + # The file() function will error if the file can't be found + # Open a connection to the file con <- file(filepath, "r") - sql.string <- "" + sql_string <- "" while (TRUE) { line <- readLines(con, n = 1) @@ -49,23 +54,19 @@ get_clean_sql <- function(filepath, additional_settings = FALSE) { line <- paste(sub("--", "/*", line), "*/") } - sql.string <- paste(sql.string, line) + sql_string <- paste(sql_string, line) } + # Close connection to the file close(con) - if(is.null(additional_settings)){ - # clean up SQL - remove weird sign that appears sometimes - sql.string <- gsub("", " ", sql.string) - - } else if (additional_settings == TRUE){ - # Clean SQL and prefix with settings that sometimes help - sql.string <- paste( + if (additional_settings == TRUE) { + # Prefix with settings that sometimes help + sql_string <- paste( "SET ANSI_PADDING OFF", - "SET NOCOUNT ON;", - gsub("", " ", sql.string) - ) + "SET NOCOUNT ON;" + ) } - return(sql.string) + return(sql_string) } diff --git a/man/get_clean_sql.Rd b/man/get_clean_sql.Rd index 318745a..7637f2f 100644 --- a/man/get_clean_sql.Rd +++ b/man/get_clean_sql.Rd @@ -21,9 +21,16 @@ This function cleans a SQL script, ready for using within R in the DfE. \examples{ # This assumes you have already set up a database connection # and have assigned to 'con' + +# For more details see [vignette] + # Pull a cleaned version of the SQL file into R +\dontrun{ sql_query <- get_clean_sql("path_to_sql_file.sql") +} # Use the cleaned SQL query to query the database -\dontrun{dbGetQuery(con, statement = sql_query)} +\dontrun{ +dbGetQuery(con, statement = sql_query) +} } diff --git a/tests/testthat/test-get_clean_sql.R b/tests/testthat/test-get_clean_sql.R index ad2dabc..100a4fa 100644 --- a/tests/testthat/test-get_clean_sql.R +++ b/tests/testthat/test-get_clean_sql.R @@ -1,9 +1,17 @@ test_that("Can retrieve basic script", { expect_equal( get_clean_sql("../sql_scripts/simple.sql"), - " /* Simple SQL script to get all data from my database table */ - SELECT * FROM [my_database_table]", - fixed + paste0( + " /* Simple SQL script to get all data from my database table", + " */ SELECT * FROM [my_database_table]" + ) + ) + expect_equal( + get_clean_sql("../sql_scripts/simple.SQL"), + paste0( + " /* Simple SQL script to get all data from my database table", + " */ SELECT * FROM [my_database_table]" + ) ) }) @@ -12,13 +20,42 @@ test_that("Adds additional settings", { expect_true( grepl( "^SET ANSI_PADDING OFF SET NOCOUNT ON;", - get_clean_sql("sql_scripts/simple.sql", additional_settings = T) - ) + get_clean_sql("../sql_scripts/simple.sql", additional_settings = TRUE) + ) + ) +}) + +test_that("Doesn't add additional settings", { + # Check that the output doesn't start with the additional lines + expect_false( + grepl( + "^SET ANSI_PADDING OFF SET NOCOUNT ON;", + get_clean_sql("../sql_scripts/simple.sql", additional_settings = FALSE) + ) + ) + # Check that the output doesn't start with the additional lines + expect_false( + grepl( + "^SET ANSI_PADDING OFF SET NOCOUNT ON;", + get_clean_sql("../sql_scripts/simple.sql") + ) ) }) test_that("Rejects non-boolean values for additional_settings", { - expect_equal( - get_clean_sql("sql_scripts/simple.sql"), + expect_error( + get_clean_sql("../sql_scripts/simple.sql", additional_settings = "True"), + "additional_settings must be either TRUE or FALSE" + ) + expect_error( + get_clean_sql("../sql_scripts/simple.sql", additional_settings = ""), + "additional_settings must be either TRUE or FALSE" + ) +}) + +test_that("Rejects file that don't have SQL extension", { + expect_error( + get_clean_sql("../spelling.R"), + "" ) }) From 23b2db1bfc815ddcf4e884fd86470813a1bfc725 Mon Sep 17 00:00:00 2001 From: cjrace Date: Fri, 8 Mar 2024 23:33:57 +0000 Subject: [PATCH 12/22] add roxygen2 examples to contributing file --- .github/CONTRIBUTING.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 26b87e9..8dc648e 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -44,7 +44,7 @@ install.packages("styler") Where possible, we'd recommend following the [Test Driven Development (TDD)](https://testdriven.io/test-driven-development/) approach: -1. Write tests for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: +1. Write tests using [testthat](https://r-pkgs.org/testing-basics.html) for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: ``` r usethis::use_test("name_of_new_function") @@ -56,7 +56,21 @@ usethis::use_test("name_of_new_function") usethis::use_r("name_of_new_function") ``` -3. Add documentation for what you've done. Follow the [roxygen2](https://roxygen2.r-lib.org/articles/rd.html) pattern for comments. +3. Add documentation for what you've done. Follow the [roxygen2](https://roxygen2.r-lib.org/articles/rd.html) pattern for comments. Here's an example of what it looks like for a basic `add()` function: + +``` +#' @description Add together two numbers +#' +#' @param x A number. +#' @param y A number. +#' @return A number. +#' @examples +#' add(1, 1) +#' add(10, 1) +add <- function(x, y) { + x + y +} +``` 4. Continue to improve code while keeping tests passing. You can automatically style code using: From d945bfa86f50e9a0b746c83fc6aa29da53e22422 Mon Sep 17 00:00:00 2001 From: cjrace Date: Sat, 9 Mar 2024 10:26:24 +0000 Subject: [PATCH 13/22] add connecting to sql vignette --- .gitignore | 1 + DESCRIPTION | 3 ++ inst/WORDLIST | 5 +-- vignettes/.gitignore | 2 + vignettes/connecting_to_sql.Rmd | 78 +++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/connecting_to_sql.Rmd diff --git a/.gitignore b/.gitignore index 9168bf8..16add95 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ .httr-oauth .DS_Store docs +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index d0b083b..9d10d08 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,6 +17,8 @@ BugReports: https://github.com/dfe-analytical-services/dfeR/issues Imports: lifecycle Suggests: + knitr, + rmarkdown, spelling, testthat (>= 3.0.0) Config/testthat/edition: 3 @@ -24,3 +26,4 @@ Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 +VignetteBuilder: knitr diff --git a/inst/WORDLIST b/inst/WORDLIST index 226aeef..a4f4c2c 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -3,10 +3,9 @@ Codecov DfE Lifecycle ay -ch +dbplyr dfeshiny -ethz lauraselby +odbc pkgdown renv -stat diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/connecting_to_sql.Rmd b/vignettes/connecting_to_sql.Rmd new file mode 100644 index 0000000..13ff653 --- /dev/null +++ b/vignettes/connecting_to_sql.Rmd @@ -0,0 +1,78 @@ +--- +title: "Connecting to SQL" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{connecting_to_sql} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +R can be used to execute SQL scripts to extract data from a database as well as querying the database directly via R. There are three primary ways to do this: + +1. executing a separate SQL script from R +2. writing strings of SQL code in your R script +3. using [dbplyr](https://dbplyr.tidyverse.org/) to query a database using R code + +Which you use will depend on how comfortable with SQL and R and also if you already have existing SQL scripts that you want to execute or you’re writing new database queries. This vignette focuses on that first example, using the `get_clean_sql()` function to read in a separate SQL script and execute from R. + +For more information on the other two methods, or on troubleshooting the connection between R and SQL in the Department for Education (DfE), please see the [Interacting with a SQL database section of our Analysts' Guide](https://dfe-analytical-services.github.io/analysts-guide/learning-development/r.html#interacting-with-a-sql-database-from-within-r-scripts). + +## Connecting to a database + +Usually in the DfE we use a combination of [odbc](https://odbc.r-dbi.org/) and [DBI](https://dbi.r-dbi.org/) to connect to our SQL databases. + +How you connect will vary depending on whether you're running R code on your laptop, or as a part of a deployed R Shiny, for running code on your laptop you can automatically use your Windows login (a trusted connection) to grant you access to the database, as the package can automatically detect your user details. + +```{r local connection, eval=FALSE} +# Library calls ==== + +library(odbc) +library(DBI) + +# Database connection ==== + +con <- DBI::dbConnect(odbc::odbc(), + Driver = "ODBC Driver 17 for SQL Server", + Server = "server_name", + Database = "database_name", + UID = "", + PWD = "", + Trusted_Connection = "Yes" +) + +``` + +For advice on finding your database details, or connecting to a SQL database from an R Shiny app that is deployed on a server, please contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) who will be able to advise on the setup and steps required. + +## Reading a SQL script into R + +There are a number of standard characters found in SQL scripts that can cause issues when reading in a SQL script within R, we have created the `get_clean_sql()` function to assist with this. Assume you have connected to the database and assigned that connection to a `con` variable, you would then use the following line to read a cleaned version of your SQL script into R. + +```{r reading clean sql, eval=FALSE} +sql_query <- dfeR::get_clean_sql("path_to_sql_file.sql") +``` + +## Executing the SQL query + +Now that the SQL query is saved as a variable in the R environment you can pass that into a function to execute against the database. There's a number of potential ways to do this, though a common way is to use `dbGetQuery()` from the [DBI package](https://dbi.r-dbi.org/), setting the statement as your cleaned SQL query. + +```{r executing sql query, eval=FALSE} +sql_query_result <- DBI::dbGetQuery(con, statement = sql_query) +``` + +## Troubleshooting + +Our first advice if you hit an error, would be to set additional settings while cleaning the SQL script. You can do this with the `additional_settings` argument in the `get_clean_sql()` function. + +```{r reading clean sql additional, eval=FALSE} +sql_query <- dfeR::get_clean_sql("path_to_sql_file.sql", additional_settings = TRUE) +``` + +This will add additional settings to the start of your SQL query that are sometimes necessary for the odbc and DBI connection to correctly execute your query. For further troubleshooting tips, please see the [Interacting with a SQL database section of our Analysts' Guide](https://dfe-analytical-services.github.io/analysts-guide/learning-development/r.html#interacting-with-a-sql-database-from-within-r-scripts), or contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) for support. From 72375589bb8209c81da09912964c18b88f80c3cb Mon Sep 17 00:00:00 2001 From: cjrace Date: Sat, 9 Mar 2024 10:26:45 +0000 Subject: [PATCH 14/22] update get clean sql, contributing and reference documentation --- .github/CONTRIBUTING.md | 28 ++++++++++++++++++++-------- R/get_clean_sql.R | 16 +++++----------- _pkgdown.yml | 13 +++++++++++++ man/get_clean_sql.Rd | 16 +++++----------- 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 8dc648e..939ee06 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -42,7 +42,7 @@ install.packages("lintr") install.packages("styler") ``` -Where possible, we'd recommend following the [Test Driven Development (TDD)](https://testdriven.io/test-driven-development/) approach: +Where possible, we'd recommend following the [Test Driven Development (TDD)](https://testdriven.io/test-driven-development/) approach. Though if you're new to package development, or already have code for a specific function feel free to start with step 2, to copy the function into the package and then go back to step 1 afterwards. 1. Write tests using [testthat](https://r-pkgs.org/testing-basics.html) for the behaviour you want. Either edit an existing test script, or if adding a new function, create a test script using: @@ -78,11 +78,11 @@ add <- function(x, y) { styler::style_pkg() ``` -5. Run a full check of the package. Here's a few ways you can do this: +5. Run a full check of the package using the following functions: ``` r devtools::check() # General package check, can also use Ctrl-Shift-E -lintr::lint_pkg() # Check styling of code +lintr::lint_package() # Check formatting of code spelling::spell_check() # Check for spelling mistakes ``` @@ -90,10 +90,12 @@ spelling::spell_check() # Check for spelling mistakes Keyboard shortcuts for the `devtools` package to use while in RStudio: -* `load_all()` (Ctrl-Shift-L): Load code with dfeR package -* `test()` (Ctrl-Shift-T): Run tests -* `document()` (Ctrl-Shift-D): Rebuild docs and NAMESPACE -* `check()` (Ctrl-Shift-E): Check complete package +``` r +load_all() # (Ctrl-Shift-L): Load code with dfeR package +test() # (Ctrl-Shift-T): Run tests +document() # (Ctrl-Shift-D): Rebuild docs and NAMESPACE +check() # (Ctrl-Shift-E): Check complete package +``` We recommend using the [usethis](https://usethis.r-lib.org/index.html) package where possible for consistency and simplicity. @@ -173,7 +175,9 @@ lintr::lint_package() ### Testing -We use [testthat](https://cran.r-project.org/package=testthat) for unit tests, we expect all new functions to have some level of test coverage. +We use [testthat](https://cran.r-project.org/package=testthat) for unit tests, we expect all new functions to have some level of test coverage. + +If you want to see examples of existing tests for inspiration, take a look inside the `tests/testthat/` folder. ### Test coverage @@ -203,6 +207,14 @@ To automatically pick up genuine new words in the package and add to this list, spelling::update_wordlist() ``` +## Adding vignettes + +Vignettes can be found in the `vignettes/` folder as .Rmd files. To start a new one use: + +``` r +usethis::use_vignette("name_of_vignette") +``` + ## Code of Conduct Please note that the dfeR project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project you agree to abide by its terms. diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R index 2881ef9..b99e6b4 100644 --- a/R/get_clean_sql.R +++ b/R/get_clean_sql.R @@ -1,4 +1,4 @@ -#' Get clean SQL +#' Get a cleaned SQL script into R #' #' @description #' This function cleans a SQL script, ready for using within R in the DfE. @@ -10,18 +10,12 @@ #' @export #' @examples #' # This assumes you have already set up a database connection -#' # and have assigned to 'con' -#' -#' # For more details see [vignette] +#' # and that the filepath for the function exists +#' # For more details see the vignette on connecting to SQL #' #' # Pull a cleaned version of the SQL file into R -#' \dontrun{ -#' sql_query <- get_clean_sql("path_to_sql_file.sql") -#' } -#' -#' # Use the cleaned SQL query to query the database -#' \dontrun{ -#' dbGetQuery(con, statement = sql_query) +#' if(file.exists("your_script.sql")){ +#' sql_query <- get_clean_sql("your_script.sql") #' } get_clean_sql <- function(filepath, additional_settings = FALSE) { if (!additional_settings %in% c(TRUE, FALSE)) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 8c9b13d..99f7549 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -5,3 +5,16 @@ template: bslib: pkgdown-nav-height: 81.4468px code-color: "ffffff" + +reference: +- title: Helper functions + contents: + - round_five_up + +- title: Formatting + contents: + - starts_with("format_") + +- title: Database connections + contents: + - get_clean_sql diff --git a/man/get_clean_sql.Rd b/man/get_clean_sql.Rd index 7637f2f..790400b 100644 --- a/man/get_clean_sql.Rd +++ b/man/get_clean_sql.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/get_clean_sql.R \name{get_clean_sql} \alias{get_clean_sql} -\title{Get clean SQL} +\title{Get a cleaned SQL script into R} \usage{ get_clean_sql(filepath, additional_settings = FALSE) } @@ -20,17 +20,11 @@ This function cleans a SQL script, ready for using within R in the DfE. } \examples{ # This assumes you have already set up a database connection -# and have assigned to 'con' - -# For more details see [vignette] +# and that the filepath for the function exists +# For more details see the vignette on connecting to SQL # Pull a cleaned version of the SQL file into R -\dontrun{ -sql_query <- get_clean_sql("path_to_sql_file.sql") -} - -# Use the cleaned SQL query to query the database -\dontrun{ -dbGetQuery(con, statement = sql_query) +if(file.exists("your_script.sql")){ +sql_query <- get_clean_sql("your_script.sql") } } From 7e6bb473638f4c94f1fc97b1cfaf4cdb9a7344ba Mon Sep 17 00:00:00 2001 From: cjrace Date: Sat, 9 Mar 2024 10:29:55 +0000 Subject: [PATCH 15/22] Increment version number to 0.2.0 --- DESCRIPTION | 2 +- NEWS.md | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9d10d08..711547a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: dfeR Title: Common DfE R tasks -Version: 0.1.1 +Version: 0.2.0 Authors@R: c( person("Cam", "Race", , "cameron.race@education.gov.uk", role = c("aut", "cre")), person("Laura", "Selby", , "laura.selby@education.gov.uk", role = "aut"), diff --git a/NEWS.md b/NEWS.md index cb6ad3e..098dd4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +# dfeR 0.2.0 + +Add function for formatting financial years: + +- format_fy() + +Add reversing functions for academic and financial years: + +- format_ay_reverse() +- format_fy_reverse() + +Add function for grabbing and cleaning a SQL script, and vignette for connecting to SQL. + +- get_clean_sql() + # dfeR 0.1.1 Add default value to decimal place argument of round_five_up() function. From 31441c6f7d7b2f2a6b6289fcfd366f16ab6bc7b8 Mon Sep 17 00:00:00 2001 From: cjrace Date: Sat, 9 Mar 2024 10:31:22 +0000 Subject: [PATCH 16/22] tidy package following lintr advice --- R/get_clean_sql.R | 4 ++-- man/get_clean_sql.Rd | 4 ++-- vignettes/connecting_to_sql.Rmd | 18 ++++++++++-------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/R/get_clean_sql.R b/R/get_clean_sql.R index b99e6b4..4512258 100644 --- a/R/get_clean_sql.R +++ b/R/get_clean_sql.R @@ -14,8 +14,8 @@ #' # For more details see the vignette on connecting to SQL #' #' # Pull a cleaned version of the SQL file into R -#' if(file.exists("your_script.sql")){ -#' sql_query <- get_clean_sql("your_script.sql") +#' if (file.exists("your_script.sql")) { +#' sql_query <- get_clean_sql("your_script.sql") #' } get_clean_sql <- function(filepath, additional_settings = FALSE) { if (!additional_settings %in% c(TRUE, FALSE)) { diff --git a/man/get_clean_sql.Rd b/man/get_clean_sql.Rd index 790400b..49a5c38 100644 --- a/man/get_clean_sql.Rd +++ b/man/get_clean_sql.Rd @@ -24,7 +24,7 @@ This function cleans a SQL script, ready for using within R in the DfE. # For more details see the vignette on connecting to SQL # Pull a cleaned version of the SQL file into R -if(file.exists("your_script.sql")){ -sql_query <- get_clean_sql("your_script.sql") +if (file.exists("your_script.sql")) { + sql_query <- get_clean_sql("your_script.sql") } } diff --git a/vignettes/connecting_to_sql.Rmd b/vignettes/connecting_to_sql.Rmd index 13ff653..f926d59 100644 --- a/vignettes/connecting_to_sql.Rmd +++ b/vignettes/connecting_to_sql.Rmd @@ -39,14 +39,13 @@ library(DBI) # Database connection ==== con <- DBI::dbConnect(odbc::odbc(), - Driver = "ODBC Driver 17 for SQL Server", - Server = "server_name", - Database = "database_name", - UID = "", - PWD = "", - Trusted_Connection = "Yes" + Driver = "ODBC Driver 17 for SQL Server", + Server = "server_name", + Database = "database_name", + UID = "", + PWD = "", + Trusted_Connection = "Yes" ) - ``` For advice on finding your database details, or connecting to a SQL database from an R Shiny app that is deployed on a server, please contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) who will be able to advise on the setup and steps required. @@ -72,7 +71,10 @@ sql_query_result <- DBI::dbGetQuery(con, statement = sql_query) Our first advice if you hit an error, would be to set additional settings while cleaning the SQL script. You can do this with the `additional_settings` argument in the `get_clean_sql()` function. ```{r reading clean sql additional, eval=FALSE} -sql_query <- dfeR::get_clean_sql("path_to_sql_file.sql", additional_settings = TRUE) +sql_query <- dfeR::get_clean_sql( + "path_to_sql_file.sql", + additional_settings = TRUE +) ``` This will add additional settings to the start of your SQL query that are sometimes necessary for the odbc and DBI connection to correctly execute your query. For further troubleshooting tips, please see the [Interacting with a SQL database section of our Analysts' Guide](https://dfe-analytical-services.github.io/analysts-guide/learning-development/r.html#interacting-with-a-sql-database-from-within-r-scripts), or contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) for support. From e39ea6ab841405eb2246d30b376a64c80f2381fc Mon Sep 17 00:00:00 2001 From: cjrace Date: Sat, 9 Mar 2024 10:40:23 +0000 Subject: [PATCH 17/22] remove capitalised extension test (causes test errors on linux) --- tests/testthat/test-get_clean_sql.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/testthat/test-get_clean_sql.R b/tests/testthat/test-get_clean_sql.R index 100a4fa..7aa4ea2 100644 --- a/tests/testthat/test-get_clean_sql.R +++ b/tests/testthat/test-get_clean_sql.R @@ -6,13 +6,6 @@ test_that("Can retrieve basic script", { " */ SELECT * FROM [my_database_table]" ) ) - expect_equal( - get_clean_sql("../sql_scripts/simple.SQL"), - paste0( - " /* Simple SQL script to get all data from my database table", - " */ SELECT * FROM [my_database_table]" - ) - ) }) test_that("Adds additional settings", { From eafd155344d85f68c6a0362910e06b532da2f171 Mon Sep 17 00:00:00 2001 From: cjrace Date: Mon, 11 Mar 2024 15:04:30 +0000 Subject: [PATCH 18/22] add Jen as a contributor to the package --- DESCRIPTION | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 711547a..9752033 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,8 @@ Version: 0.2.0 Authors@R: c( person("Cam", "Race", , "cameron.race@education.gov.uk", role = c("aut", "cre")), person("Laura", "Selby", , "laura.selby@education.gov.uk", role = "aut"), - person("Adam", "Robinson", role = "aut") + person("Adam", "Robinson", role = "aut"), + person("Jen", "Machin", , "jen.machin@education.gov.uk", role = "ctb") ) Description: This package contains R functions to allow DfE analysts to re-use code for common analytical tasks that are undertaken across the @@ -21,9 +22,10 @@ Suggests: rmarkdown, spelling, testthat (>= 3.0.0) +VignetteBuilder: + knitr Config/testthat/edition: 3 Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 -VignetteBuilder: knitr From 0b1c56aac6d6502b71c6be9b6b36731c7deee6ba Mon Sep 17 00:00:00 2001 From: cjrace Date: Tue, 12 Mar 2024 08:46:51 +0000 Subject: [PATCH 19/22] Initial response to PR comments (more to complete on troubleshooting) --- DESCRIPTION | 4 +++- vignettes/connecting_to_sql.Rmd | 33 +++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9752033..6b627b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,9 @@ Authors@R: c( person("Cam", "Race", , "cameron.race@education.gov.uk", role = c("aut", "cre")), person("Laura", "Selby", , "laura.selby@education.gov.uk", role = "aut"), person("Adam", "Robinson", role = "aut"), - person("Jen", "Machin", , "jen.machin@education.gov.uk", role = "ctb") + person("Jen", "Machin", , "jen.machin@education.gov.uk", role = "ctb"), + person("Rich", "Bielby", , "richard.bielby@education.gov.uk", role = "ctb", + comment = c(ORCID = "0000-0001-9070-9969")) ) Description: This package contains R functions to allow DfE analysts to re-use code for common analytical tasks that are undertaken across the diff --git a/vignettes/connecting_to_sql.Rmd b/vignettes/connecting_to_sql.Rmd index f926d59..0bb9f27 100644 --- a/vignettes/connecting_to_sql.Rmd +++ b/vignettes/connecting_to_sql.Rmd @@ -20,7 +20,7 @@ R can be used to execute SQL scripts to extract data from a database as well as 2. writing strings of SQL code in your R script 3. using [dbplyr](https://dbplyr.tidyverse.org/) to query a database using R code -Which you use will depend on how comfortable with SQL and R and also if you already have existing SQL scripts that you want to execute or you’re writing new database queries. This vignette focuses on that first example, using the `get_clean_sql()` function to read in a separate SQL script and execute from R. +Which you use will depend on how comfortable you are with SQL and R and if you already have existing SQL scripts that you want to execute or you’re writing new database queries. This vignette focuses on that first example, using the `get_clean_sql()` function to read in a separate SQL script and execute from R. For more information on the other two methods, or on troubleshooting the connection between R and SQL in the Department for Education (DfE), please see the [Interacting with a SQL database section of our Analysts' Guide](https://dfe-analytical-services.github.io/analysts-guide/learning-development/r.html#interacting-with-a-sql-database-from-within-r-scripts). @@ -28,7 +28,7 @@ For more information on the other two methods, or on troubleshooting the connect Usually in the DfE we use a combination of [odbc](https://odbc.r-dbi.org/) and [DBI](https://dbi.r-dbi.org/) to connect to our SQL databases. -How you connect will vary depending on whether you're running R code on your laptop, or as a part of a deployed R Shiny, for running code on your laptop you can automatically use your Windows login (a trusted connection) to grant you access to the database, as the package can automatically detect your user details. +How you connect will vary depending on whether you're running R code on your laptop, or as a part of a deployed R Shiny app. For running code on your laptop you can automatically use your Windows login (a trusted connection) to grant you access to the database, as the package can automatically detect your user details. ```{r local connection, eval=FALSE} # Library calls ==== @@ -52,7 +52,7 @@ For advice on finding your database details, or connecting to a SQL database fro ## Reading a SQL script into R -There are a number of standard characters found in SQL scripts that can cause issues when reading in a SQL script within R, we have created the `get_clean_sql()` function to assist with this. Assume you have connected to the database and assigned that connection to a `con` variable, you would then use the following line to read a cleaned version of your SQL script into R. +There are a number of standard characters found in SQL scripts that can cause issues when reading in a SQL script within R and we have created the `get_clean_sql()` function to assist with this. Assume you have connected to the database and assigned that connection to a `con` variable, you would then use the following line to read a cleaned version of your SQL script into R. ```{r reading clean sql, eval=FALSE} sql_query <- dfeR::get_clean_sql("path_to_sql_file.sql") @@ -68,6 +68,29 @@ sql_query_result <- DBI::dbGetQuery(con, statement = sql_query) ## Troubleshooting +### Known issues + +The `get_clean_sql()` function does not automatically make every SQL script executable using `dbGetQuery()` from the [DBI package](https://dbi.r-dbi.org/). Here are the currently known scenarios that may cause unexpected behaviours, and need addressing in the SQL script itself. + +1. Title? +``` {SQL title} +USE [SWFC_Project_Restricted] +GO +``` + +2. Stop or end symbols + +The following script shows where two separate outputs would be generated by the SQL script - you'll find that... + +``` {SQL stop symbols} +Something; + +Something 2; +``` + + +### Other errors + Our first advice if you hit an error, would be to set additional settings while cleaning the SQL script. You can do this with the `additional_settings` argument in the `get_clean_sql()` function. ```{r reading clean sql additional, eval=FALSE} @@ -77,4 +100,6 @@ sql_query <- dfeR::get_clean_sql( ) ``` -This will add additional settings to the start of your SQL query that are sometimes necessary for the odbc and DBI connection to correctly execute your query. For further troubleshooting tips, please see the [Interacting with a SQL database section of our Analysts' Guide](https://dfe-analytical-services.github.io/analysts-guide/learning-development/r.html#interacting-with-a-sql-database-from-within-r-scripts), or contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) for support. +This will add additional settings to the start of your SQL query that are sometimes necessary for the odbc and DBI connection to correctly execute your query. + +For further troubleshooting tips, please see the [Interacting with a SQL database section of our Analysts' Guide](https://dfe-analytical-services.github.io/analysts-guide/learning-development/r.html#interacting-with-a-sql-database-from-within-r-scripts), or contact the [Statistics Development Team](mailto:statistics.development@education.gov.uk) for support. From ef627e20e198d77047d2a1bc9b7f6d7ab02d8180 Mon Sep 17 00:00:00 2001 From: cjrace Date: Tue, 12 Mar 2024 18:09:54 +0000 Subject: [PATCH 20/22] fix lint issues --- vignettes/connecting_to_sql.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/connecting_to_sql.Rmd b/vignettes/connecting_to_sql.Rmd index 0bb9f27..3d82067 100644 --- a/vignettes/connecting_to_sql.Rmd +++ b/vignettes/connecting_to_sql.Rmd @@ -83,9 +83,9 @@ GO The following script shows where two separate outputs would be generated by the SQL script - you'll find that... ``` {SQL stop symbols} -Something; +Something -Something 2; +Somethingelse ``` From 6d07780763f3791cdac10172bd83005093f4836a Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Sat, 23 Mar 2024 15:45:33 +0000 Subject: [PATCH 21/22] update connecting to sql vignette to specific select style queries only --- vignettes/connecting_to_sql.Rmd | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/vignettes/connecting_to_sql.Rmd b/vignettes/connecting_to_sql.Rmd index 3d82067..6605b48 100644 --- a/vignettes/connecting_to_sql.Rmd +++ b/vignettes/connecting_to_sql.Rmd @@ -62,36 +62,26 @@ sql_query <- dfeR::get_clean_sql("path_to_sql_file.sql") Now that the SQL query is saved as a variable in the R environment you can pass that into a function to execute against the database. There's a number of potential ways to do this, though a common way is to use `dbGetQuery()` from the [DBI package](https://dbi.r-dbi.org/), setting the statement as your cleaned SQL query. +It's important to note that `dbGetQuery()` is intended to work with 'SELECT' style queries only. If you're doing something that isn't a 'SELECT' query, such as writing back into SQL, consider using the `dbExecute()` or `dbSendQuery()` functions from the [DBI package](https://dbi.r-dbi.org/) instead. + ```{r executing sql query, eval=FALSE} sql_query_result <- DBI::dbGetQuery(con, statement = sql_query) ``` -## Troubleshooting - -### Known issues - -The `get_clean_sql()` function does not automatically make every SQL script executable using `dbGetQuery()` from the [DBI package](https://dbi.r-dbi.org/). Here are the currently known scenarios that may cause unexpected behaviours, and need addressing in the SQL script itself. - -1. Title? -``` {SQL title} -USE [SWFC_Project_Restricted] -GO -``` +As a side note, if your SQL query is short, you could write it directly into the function such as: -2. Stop or end symbols - -The following script shows where two separate outputs would be generated by the SQL script - you'll find that... - -``` {SQL stop symbols} -Something - -Somethingelse +```{r executing sql query inline, eval=FALSE} +sql_query_result <- DBI::dbGetQuery( + con, + statement = "SELECT * FROM [my_database_table]" +) ``` +### Troubleshooting -### Other errors +Our first advice if you hit an error, would be to check that your SQL script runs in SQL Server Management Studio (SSMS) and is a valid SQL 'SELECT' query that returns a single output. -Our first advice if you hit an error, would be to set additional settings while cleaning the SQL script. You can do this with the `additional_settings` argument in the `get_clean_sql()` function. +Assuming that it runs fine in SSMS, the next thing to try is to set additional settings while cleaning the SQL script. You can do this with the `additional_settings` argument in the `get_clean_sql()` function. ```{r reading clean sql additional, eval=FALSE} sql_query <- dfeR::get_clean_sql( From a7ece2f0de78ce762bcf618e6a1696e092219b82 Mon Sep 17 00:00:00 2001 From: cjrace <52536248+cjrace@users.noreply.github.com> Date: Sat, 23 Mar 2024 15:45:49 +0000 Subject: [PATCH 22/22] update wordlist and rebuild package docs --- inst/WORDLIST | 4 ++++ man/dfeR-package.Rd | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index a4f4c2c..8ffe17f 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -2,10 +2,14 @@ CMD Codecov DfE Lifecycle +ORCID +SSMS ay dbplyr dfeshiny +fy lauraselby odbc pkgdown renv +sql diff --git a/man/dfeR-package.Rd b/man/dfeR-package.Rd index 8ef5953..7bb801b 100644 --- a/man/dfeR-package.Rd +++ b/man/dfeR-package.Rd @@ -28,5 +28,11 @@ Authors: \item Adam Robinson } +Other contributors: +\itemize{ + \item Jen Machin \email{jen.machin@education.gov.uk} [contributor] + \item Rich Bielby \email{richard.bielby@education.gov.uk} (\href{https://orcid.org/0000-0001-9070-9969}{ORCID}) [contributor] +} + } \keyword{internal}