From 32049afb75aa6b950d459a2797da0e059353ddac Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:23:56 +0200 Subject: [PATCH 01/11] Draft sequence_linter linter --- R/sequence_linter.R | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 R/sequence_linter.R diff --git a/R/sequence_linter.R b/R/sequence_linter.R new file mode 100644 index 000000000..90cc75784 --- /dev/null +++ b/R/sequence_linter.R @@ -0,0 +1,36 @@ +#' Require usage of `sequence()` where possible +#' +#' [sequence()] is a base R function equivalent to `unlist(lapply(x, seq_len))`, +#' but faster and more readable. +#' +#' @examples +#' # will produce lints +#' lint( +#' text = "unlist(sapply(x, seq_len))", +#' linters = sequence_linter() +#' ) +#' +#' lint( +#' text = "unlist(lapply(x, seq_len))", +#' linters = sequence_linter() +#' ) +#' +#' # okay +#' lint( +#' text = "sapply(x, seq_len)", +#' linters = lengths_linter() +#' ) +#' +#' @evalRd rd_tags("sequence_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +sequence_linter <- make_linter_from_function_xpath( + # only a small subset of loop functionals need to be considered because + # seq_len() output will not have constant length + function_names = c("sapply", "lapply", "map"), + xpath = "parent::expr/parent::expr[ + preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] + and expr/SYMBOL[text() = 'seq_len'] + ]", + lint_message = "Use sequence() to generate a concatenated sequence of seq_len()." +) From 332f861c5addebe72aae5bebee3b4be6270cab31 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:50:53 +0200 Subject: [PATCH 02/11] Fold sequence_linter() into seq_linter() --- R/seq_linter.R | 31 ++++++++++++++++++++++++++++++- R/sequence_linter.R | 36 ------------------------------------ man/seq_linter.Rd | 10 ++++++++++ 3 files changed, 40 insertions(+), 37 deletions(-) delete mode 100644 R/sequence_linter.R diff --git a/R/seq_linter.R b/R/seq_linter.R index decc02c66..2d91592a1 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -26,6 +26,11 @@ #' linters = seq_linter() #' ) #' +#' lint( +#' text = "unlist(lapply(x, seq_len))", +#' linters = seq_linter() +#' ) +#' #' # okay #' lint( #' text = "seq_along(x)", @@ -42,6 +47,11 @@ #' linters = seq_linter() #' ) #' +#' lint( +#' text = "sapply(x, seq_len)", +#' linters = seq_linter() +#' ) +#' #' @evalRd rd_tags("seq_linter") #' @seealso [linters] for a complete list of linters available in lintr. #' @export @@ -66,6 +76,15 @@ seq_linter <- function() { ] ") + map_funcs <- xp_text_in_table(c("sapply", "lapply", "map")) + sequence_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[ { map_funcs } ] + /parent::expr/parent::expr[ + preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] + and expr/SYMBOL[text() = 'seq_len'] + ]" + ) + ## The actual order of the nodes is document order ## In practice we need to handle length(x):1 get_fun <- function(expr, n) { @@ -113,6 +132,16 @@ seq_linter <- function() { ) ) - xml_nodes_to_lints(badx, source_expression, lint_message, type = "warning") + seq_lints <- xml_nodes_to_lints(badx, source_expression, lint_message, type = "warning") + + potential_sequence_calls <- xml_find_all(xml, sequence_xpath) + sequence_lints <- xml_nodes_to_lints( + potential_sequence_calls, + source_expression, + "Use sequence() to generate a concatenated sequence of seq_len().", + type = "warning" + ) + + c(seq_lints, sequence_lints) }) } diff --git a/R/sequence_linter.R b/R/sequence_linter.R deleted file mode 100644 index 90cc75784..000000000 --- a/R/sequence_linter.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Require usage of `sequence()` where possible -#' -#' [sequence()] is a base R function equivalent to `unlist(lapply(x, seq_len))`, -#' but faster and more readable. -#' -#' @examples -#' # will produce lints -#' lint( -#' text = "unlist(sapply(x, seq_len))", -#' linters = sequence_linter() -#' ) -#' -#' lint( -#' text = "unlist(lapply(x, seq_len))", -#' linters = sequence_linter() -#' ) -#' -#' # okay -#' lint( -#' text = "sapply(x, seq_len)", -#' linters = lengths_linter() -#' ) -#' -#' @evalRd rd_tags("sequence_linter") -#' @seealso [linters] for a complete list of linters available in lintr. -#' @export -sequence_linter <- make_linter_from_function_xpath( - # only a small subset of loop functionals need to be considered because - # seq_len() output will not have constant length - function_names = c("sapply", "lapply", "map"), - xpath = "parent::expr/parent::expr[ - preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] - and expr/SYMBOL[text() = 'seq_len'] - ]", - lint_message = "Use sequence() to generate a concatenated sequence of seq_len()." -) diff --git a/man/seq_linter.Rd b/man/seq_linter.Rd index 02c46af72..0666aca40 100644 --- a/man/seq_linter.Rd +++ b/man/seq_linter.Rd @@ -34,6 +34,11 @@ lint( linters = seq_linter() ) +lint( + text = "unlist(lapply(x, seq_len))", + linters = seq_linter() +) + # okay lint( text = "seq_along(x)", @@ -50,6 +55,11 @@ lint( linters = seq_linter() ) +lint( + text = "sapply(x, seq_len)", + linters = seq_linter() +) + } \seealso{ \link{linters} for a complete list of linters available in lintr. From 405644ec45583be605eb28467ed1d73564ea3c08 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:56:50 +0200 Subject: [PATCH 03/11] Add tests --- tests/testthat/test-seq_linter.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/testthat/test-seq_linter.R b/tests/testthat/test-seq_linter.R index 04333f752..c0813dc87 100644 --- a/tests/testthat/test-seq_linter.R +++ b/tests/testthat/test-seq_linter.R @@ -123,6 +123,24 @@ test_that("reverse seq is ok", { ) }) +test_that("finds potential sequence() replacements", { + linter <- seq_linter() + lint_msg <- rex::rex("Use sequence()") + + expect_lint( + "unlist(lapply(x, seq_len))", + lint_msg, + linter + ) + + # Even for prefixed purrr:: calls + expect_lint( + "unlist(purrr::map(x, seq_len))", + lint_msg, + linter + ) +}) + test_that("Message vectorization works for multiple lints", { linter <- seq_linter() @@ -173,6 +191,18 @@ test_that("Message vectorization works for multiple lints", { ), linter ) + + expect_lint( + trim_some("{ + 1:NROW(x) + unlist(lapply(y, seq_len)) + }"), + list( + list(rex::rex("seq_len(NROW(...))", anything, "1:NROW(...)"), line_number = 2L), + list(rex::rex("sequence()"), line_number = 3L) + ), + linter + ) }) test_that("Message recommends rev() correctly", { From 999034c5757cc6bcd57e80025b3b99e4066b1d51 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Fri, 21 Jun 2024 09:58:45 +0200 Subject: [PATCH 04/11] Add NEWS bullet --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 77ace633f..f8fe8a805 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,6 +32,7 @@ ## New and improved features +* `seq_linter()` now includes lints to inform about missed opportunities to use the `sequence()` base R function (#2618, @Bisaloo) * More helpful errors for invalid configs (#2253, @MichaelChirico). * `library_call_linter()` is extended + to encourage all packages to be attached with `library(symbol)`, not `library("symbol", character.only = TRUE)` or "vectorized" approaches looping over package names (part of #884, @MichaelChirico). From 75e56a4fb0fd6d842f0dff3284a34516b8ad6b37 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:01:28 +0200 Subject: [PATCH 05/11] Switch to xml_find_function_calls --- R/seq_linter.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/seq_linter.R b/R/seq_linter.R index 2d91592a1..902da501c 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -76,10 +76,9 @@ seq_linter <- function() { ] ") - map_funcs <- xp_text_in_table(c("sapply", "lapply", "map")) + map_funcs <- c("sapply", "lapply", "map") sequence_xpath <- glue(" - //SYMBOL_FUNCTION_CALL[ { map_funcs } ] - /parent::expr/parent::expr[ + parent::expr/parent::expr[ preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] and expr/SYMBOL[text() = 'seq_len'] ]" @@ -134,7 +133,8 @@ seq_linter <- function() { seq_lints <- xml_nodes_to_lints(badx, source_expression, lint_message, type = "warning") - potential_sequence_calls <- xml_find_all(xml, sequence_xpath) + xml_map_calls <- source_expression$xml_find_function_calls(map_funcs) + potential_sequence_calls <- xml_find_all(xml_map_calls, sequence_xpath) sequence_lints <- xml_nodes_to_lints( potential_sequence_calls, source_expression, From 69b4ad2b9ed860dbfda64e7c03fbc9e358fa8a6a Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:04:37 +0200 Subject: [PATCH 06/11] Change element order in xpath --- R/seq_linter.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/seq_linter.R b/R/seq_linter.R index 902da501c..55deec789 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -78,9 +78,9 @@ seq_linter <- function() { map_funcs <- c("sapply", "lapply", "map") sequence_xpath <- glue(" - parent::expr/parent::expr[ - preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] - and expr/SYMBOL[text() = 'seq_len'] + parent::expr[following-sibling::expr/SYMBOL[text() = 'seq_len']] + /parent::expr/parent::expr[ + expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] ]" ) From 396a239227ee06af3543ff794ad6e9ca57fde702 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:06:21 +0200 Subject: [PATCH 07/11] Support lapply(x, seq) --- R/seq_linter.R | 5 +++-- tests/testthat/test-seq_linter.R | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/seq_linter.R b/R/seq_linter.R index 55deec789..a1011aed2 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -76,9 +76,10 @@ seq_linter <- function() { ] ") - map_funcs <- c("sapply", "lapply", "map") + map_funcs <- c("sapply", "lapply", "map") + seq_funcs <- xp_text_in_table(c("seq_len", "seq")) sequence_xpath <- glue(" - parent::expr[following-sibling::expr/SYMBOL[text() = 'seq_len']] + parent::expr[following-sibling::expr/SYMBOL[ {seq_funcs} ]] /parent::expr/parent::expr[ expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] ]" diff --git a/tests/testthat/test-seq_linter.R b/tests/testthat/test-seq_linter.R index c0813dc87..0717e56f1 100644 --- a/tests/testthat/test-seq_linter.R +++ b/tests/testthat/test-seq_linter.R @@ -133,6 +133,12 @@ test_that("finds potential sequence() replacements", { linter ) + expect_lint( + "unlist(lapply(x, seq))", + lint_msg, + linter + ) + # Even for prefixed purrr:: calls expect_lint( "unlist(purrr::map(x, seq_len))", From 231abff604d831ff0b2263dc2eaebb0f2bfe1829 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:21:50 +0200 Subject: [PATCH 08/11] Simplify xpath --- R/seq_linter.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/seq_linter.R b/R/seq_linter.R index a1011aed2..3291de2bc 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -80,9 +80,7 @@ seq_linter <- function() { seq_funcs <- xp_text_in_table(c("seq_len", "seq")) sequence_xpath <- glue(" parent::expr[following-sibling::expr/SYMBOL[ {seq_funcs} ]] - /parent::expr/parent::expr[ - expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] - ]" + /parent::expr[preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist']]" ) ## The actual order of the nodes is document order From cfc17ee392ffcb0b4daed2f4188a0b205ce9ee68 Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:37:55 +0200 Subject: [PATCH 09/11] Do not lint seq() calls with extra arguments --- R/seq_linter.R | 10 +++++++--- tests/testthat/test-seq_linter.R | 10 ++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/R/seq_linter.R b/R/seq_linter.R index 3291de2bc..79cbeb29d 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -78,10 +78,14 @@ seq_linter <- function() { map_funcs <- c("sapply", "lapply", "map") seq_funcs <- xp_text_in_table(c("seq_len", "seq")) + # count(expr) = 3 because we only want seq() calls without extra arguments sequence_xpath <- glue(" - parent::expr[following-sibling::expr/SYMBOL[ {seq_funcs} ]] - /parent::expr[preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist']]" - ) + parent::expr/parent::expr[ + count(expr) = 3 + and expr/SYMBOL[ {seq_funcs} ] + and preceding-sibling::expr/SYMBOL_FUNCTION_CALL[text() = 'unlist'] + ] + ") ## The actual order of the nodes is document order ## In practice we need to handle length(x):1 diff --git a/tests/testthat/test-seq_linter.R b/tests/testthat/test-seq_linter.R index 0717e56f1..be92fc7eb 100644 --- a/tests/testthat/test-seq_linter.R +++ b/tests/testthat/test-seq_linter.R @@ -147,6 +147,16 @@ test_that("finds potential sequence() replacements", { ) }) +test_that("sequence() is not recommended for complex seq() calls", { + linter <- seq_linter() + + expect_lint( + "unlist(lapply(x, seq, from = 2))", + NULL, + linter + ) +}) + test_that("Message vectorization works for multiple lints", { linter <- seq_linter() From 072ef1317e46d54d4dc44173ab8469d5f9c3f59f Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 09:49:46 +0200 Subject: [PATCH 10/11] Edit false positive example --- R/seq_linter.R | 2 +- man/seq_linter.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/seq_linter.R b/R/seq_linter.R index 79cbeb29d..06ab5c70f 100644 --- a/R/seq_linter.R +++ b/R/seq_linter.R @@ -48,7 +48,7 @@ #' ) #' #' lint( -#' text = "sapply(x, seq_len)", +#' text = "sequence(x)", #' linters = seq_linter() #' ) #' diff --git a/man/seq_linter.Rd b/man/seq_linter.Rd index 0666aca40..f2eca1c41 100644 --- a/man/seq_linter.Rd +++ b/man/seq_linter.Rd @@ -56,7 +56,7 @@ lint( ) lint( - text = "sapply(x, seq_len)", + text = "sequence(x)", linters = seq_linter() ) From 2a8d243920e44930a966fe9f0d223910818e1f6d Mon Sep 17 00:00:00 2001 From: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> Date: Sat, 22 Jun 2024 10:06:01 +0200 Subject: [PATCH 11/11] Add more info about new lints in NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f8fe8a805..d4c3288d9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,7 +32,7 @@ ## New and improved features -* `seq_linter()` now includes lints to inform about missed opportunities to use the `sequence()` base R function (#2618, @Bisaloo) +* `seq_linter()` now includes lints to inform about missed opportunities to use the `sequence()` base R function, e.g. `unlist(lapply(ints, seq))` (#2618, @Bisaloo) * More helpful errors for invalid configs (#2253, @MichaelChirico). * `library_call_linter()` is extended + to encourage all packages to be attached with `library(symbol)`, not `library("symbol", character.only = TRUE)` or "vectorized" approaches looping over package names (part of #884, @MichaelChirico).