Skip to content

Commit

Permalink
Refine select inline criteria to keep arranged computed columns (#…
Browse files Browse the repository at this point in the history
…1446)

Fixes #1437
  • Loading branch information
ejneer authored Mar 4, 2024
1 parent 39d9acf commit 0739c75
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 4 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# dbplyr (development version)

* Refined the `select()` inlining criteria to keep computed columns used to
`arrange()` subqueries that are eliminated by a subsequent select (@ejneer,
#1437).

* dbplyr now exports some tools to work with the internal `table_path` class
which is useful for certain backends that need to work with this
data structure (#1300).
Expand Down
19 changes: 15 additions & 4 deletions R/verb-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ add_select <- function(lazy_query, vars) {
}

is_select <- is_lazy_select_query(lazy_query)
select_can_be_inlined <- is_bijective_projection(vars, vars_data) || !is_true(lazy_query$distinct)
select_can_be_inlined <- select_can_be_inlined(lazy_query, vars)
if (is_select && select_can_be_inlined) {
idx <- vctrs::vec_match(vars, vars_data)

Expand All @@ -131,9 +131,20 @@ add_select <- function(lazy_query, vars) {
)
}

is_bijective_projection <- function(vars, names_prev) {
vars <- unname(vars)
identical(sort(vars), names_prev)
select_can_be_inlined <- function(lazy_query, vars) {
# all computed columns used for ordering (if any) must be present
computed_flag <- purrr::map_lgl(lazy_query$select$expr, is_quosure)
computed_columns <- lazy_query$select$name[computed_flag]

order_vars <- purrr::map_chr(lazy_query$order_by, as_label)
ordered_present <- all(intersect(computed_columns, order_vars) %in% vars)

# if the projection is distinct, it must be bijective
is_distinct <- is_true(lazy_query$distinct)
is_bijective_projection <- identical(sort(unname(vars)), op_vars(lazy_query))
distinct_is_bijective <- !is_distinct || is_bijective_projection

ordered_present && distinct_is_bijective
}

rename_groups <- function(lazy_query, vars) {
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/_snaps/verb-select.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,22 @@
Error in `select()`:
! This tidyselect interface doesn't support predicates.

# arranged computed columns are not inlined away

Code
lf %>% mutate(z = 1) %>% arrange(x, z) %>% select(x)
Condition
Warning:
ORDER BY is ignored in subqueries without LIMIT
i Do you need to move arrange() later in the pipeline or use window_order() instead?
Output
<SQL>
SELECT `x`
FROM (
SELECT `df`.*, 1.0 AS `z`
FROM `df`
) AS `q01`

# multiple selects are collapsed

Code
Expand Down
52 changes: 52 additions & 0 deletions tests/testthat/test-verb-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,31 @@ test_that("select after distinct produces subquery", {
expect_true(lq$x$distinct)
})

test_that("select after arrange produces subquery, if needed", {
lf <- lazy_frame(x = 1)

# shouldn't inline
out <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x)
# should inline
out2 <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x, z)

inner_query <- out$lazy_query$x
expect_s3_class(inner_query, "lazy_select_query")
expect_equal(inner_query$order_by, list(quo(x), quo(z)), ignore_formula_env = TRUE)
expect_equal(
op_vars(inner_query),
c("x", "z")
)
expect_equal(op_vars(out$lazy_query), "x")

# order vars in a subquery are dropped
expect_equal(
inner_query[setdiff(names(inner_query), "order_vars")],
out2$lazy_query[setdiff(names(out2$lazy_query), "order_vars")]
)
})


test_that("rename/relocate after distinct is inlined #1141", {
lf <- lazy_frame(x = 1, y = 1:2)
expect_snapshot({
Expand Down Expand Up @@ -284,6 +309,33 @@ test_that("where() isn't suppored", {
})
})

test_that("arranged computed columns are not inlined away", {
lf <- lazy_frame(x = 1)

# shouldn't inline
out <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x)
# should inline
out2 <- lf %>% mutate(z = 2) %>% arrange(x, z) %>% select(x, z)

inner_query <- out$lazy_query$x
expect_snapshot({
lf %>% mutate(z = 1) %>% arrange(x, z) %>% select(x)
})
expect_s3_class(inner_query, "lazy_select_query")
expect_equal(
inner_query$order_by,
list(quo(x), quo(z)),
ignore_formula_env = TRUE
)
expect_equal(op_vars(inner_query), c("x", "z"))
expect_equal(op_vars(out$lazy_query), "x")
expect_equal(
# order vars in a subquery are dropped
inner_query[setdiff(names(inner_query), "order_vars")],
out2$lazy_query[setdiff(names(out2$lazy_query), "order_vars")]
)
})

# sql_render --------------------------------------------------------------

test_that("multiple selects are collapsed", {
Expand Down

0 comments on commit 0739c75

Please sign in to comment.