Skip to content

Commit

Permalink
Merge pull request #483 from tidyverse/simplify-transmute
Browse files Browse the repository at this point in the history
Use `mutate(.keep = "none")` in `transmute()`
  • Loading branch information
markfairbanks authored Jan 24, 2025
2 parents 0fa0d04 + fcdd5cd commit 75310e3
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 281 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

* `print.dtplyr_step()` gains `n`, `max_extra_cols`, and `max_footer_lines` args (#464)

* `transmute()` preserves row count and avoids unnecessary copies (#470)

# dtplyr 1.3.1

* Fix for failing R CMD check.
Expand Down
43 changes: 6 additions & 37 deletions R/step-subset-transmute.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,10 @@
#' dt <- lazy_dt(dplyr::starwars)
#' dt %>% transmute(name, sh = paste0(species, "/", homeworld))
transmute.dtplyr_step <- function(.data, ...) {
dots <- capture_new_vars(.data, ...)
dots_list <- process_new_vars(.data, dots)
dots <- dots_list$dots

groups <- group_vars(.data)
if (!is_empty(groups)) {
# TODO could check if there is actually anything mutated, e.g. to avoid
# DT[, .(x = x)]
is_group_var <- names(dots) %in% groups
group_dots <- dots[is_group_var]

.data <- mutate(ungroup(.data), !!!group_dots)
.data <- group_by(.data, !!!syms(groups))

dots <- dots[!is_group_var]
}

if (is_empty(dots)) {
# grouping variables have been removed from `dots` so `select()` would
# produce a message "Adding grouping vars".
# As `dplyr::transmute()` doesn't generate a message when adding group vars
# we can also leave it away here
return(select(.data, !!!group_vars(.data)))
}

if (!dots_list$use_braces) {
j <- call2(".", !!!dots)
} else {
j <- mutate_with_braces(dots)$expr
}
vars <- union(group_vars(.data), names(dots))
out <- step_subset_j(.data, vars = vars, j = j)
if (dots_list$need_removal_step) {
out <- select(out, -tidyselect::all_of(dots_list$vars_removed))
}

out
out <- mutate(.data, ..., .keep = "none")
cols_expr <- names(capture_new_vars(.data, ...))
cols_group <- group_vars(.data)
cols_group <- setdiff(cols_group, cols_expr)
cols_retain <- c(cols_group, cols_expr)
select(out, any_of(cols_retain))
}
7 changes: 7 additions & 0 deletions tests/testthat/_snaps/step-call.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
Output
setnames(copy(DT), c("a", "b", "c"), toupper)

# can compute distinct computed variables

Code
dt %>% distinct(z = x + y) %>% show_query()
Output
unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)])

# errors are raised

Code
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-step-call.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ test_that("keeps all variables if requested", {
test_that("can compute distinct computed variables", {
dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt")

expect_equal(
dt %>% distinct(z = x + y) %>% show_query(),
expr(unique(dt[, .(z = x + y)]))
expect_snapshot(
dt %>% distinct(z = x + y) %>% show_query()
)

expect_equal(
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ test_that("generates single calls as expect", {
dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)])
)

expect_equal(
dt %>% transmute(x2 = x * 2) %>% show_query(),
expr(DT[, .(x2 = x * 2)])
)
})

test_that("mutate generates compound expression if needed", {
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-subset-summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@ test_that("simple calls generate expected translations", {
dt %>% summarise(x = mean(x)) %>% show_query(),
expr(DT[, .(x = mean(x))])
)

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
)
})

test_that("can use with across", {
Expand Down
247 changes: 16 additions & 231 deletions tests/testthat/test-step-subset-transmute.R
Original file line number Diff line number Diff line change
@@ -1,249 +1,34 @@
test_that("simple calls generate expected translations", {
test_that("works", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
dt %>% transmute(x) %>% collect(),
dt %>% mutate(x, .keep = "none") %>% collect()
)
})

test_that("transmute generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

expect_equal(
dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(DT[, {
x2 <- x * 2
x4 <- x2 * 2
.(x2, x4)
}])
)
})

test_that("allows multiple assignment to the same variable", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

# when nested
expect_equal(
dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(),
expr(DT[, {
x <- x * 2
x <- x * 2
.(x)
}])
)

# when not nested
expect_equal(
dt %>% transmute(z = 2, y = 3) %>% show_query(),
expr(DT[, .(z = 2, y = 3)])
)
})


test_that("groups are respected", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2)

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% show_query(),
expr(DT[, .(y = 2), keyby = .(x)])
)
})

test_that("grouping vars can be transmuted", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2)
test_that("empty dots preserves groups", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>%
group_by(y)

expect_equal(dt$vars, c("x", "y"))
expect_equal(dt$groups, "x")
expect_equal(
dt %>% show_query(),
expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)])
)

skip("transmuting grouping vars with nesting is not supported")
dt <- lazy_dt(data.table(x = 1), "DT") %>%
group_by(x) %>%
transmute(x = x + 1, y = x + 1, x = y + 1)
res <- dt %>% transmute() %>% collect()

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% collect(),
tibble(x = 4, y = 3) %>% group_by(x)
)
expect_equal(names(res), "y")
})

test_that("empty transmute works", {
dt <- lazy_dt(data.frame(x = 1), "DT")
expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L]))
expect_equal(transmute(dt)$vars, character())
expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L]))
test_that("preserves column order", {
dt <- lazy_dt(data.table(x = 1, y = 1), "DT")

dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x)
expect_equal(transmute(dt_grouped)$vars, "x")
})
res <- dt %>% transmute(y, x) %>% collect()

test_that("only transmuting groups works", {
dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x)
expect_equal(transmute(dt, x) %>% collect(), dt %>% collect())
expect_equal(transmute(dt, x)$vars, "x")
expect_equal(names(res), c("y", "x"))
})

test_that("across() can access previously created variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, across(y, sqrt))
expect_equal(
collect(step),
tibble(y = sqrt(2))
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
y <- sqrt(y)
.(y)
}])
)
})

test_that("new columns take precedence over global variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
y <- 'global var'
step <- transmute(dt, y = 2, z = y + 1)
expect_equal(
collect(step),
tibble(y = 2, z = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y + 1
.(y, z)
}])
)
})

# var = NULL -------------------------------------------------------------

test_that("var = NULL when var is in original data", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}][, `:=`("x", NULL)])
)
})

test_that("var = NULL when var is in final output", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = NULL, y = 3)
expect_equal(
collect(step),
tibble(y = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- NULL
y <- 3
.(y)
}])
)
})

test_that("temp var with nested arguments", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}][, `:=`("y", NULL)])
)
})

test_that("temp var with no new vars added", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, y = NULL)
expect_equal(
collect(step),
tibble()
)
expect_equal(
step$vars,
character()
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
.(y)
}][, `:=`("y", NULL)])
)
})

test_that("var = NULL works when data is grouped", {
dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g)
test_that("works correctly when column is both added and removed in the same call", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

# when var is in original data
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}, keyby = .(g)][, `:=`("x", NULL)])
)
res <- dt %>% transmute(y, z = 3, z = NULL) %>% collect()

# when var is not in original data
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}, keyby = .(g)][, `:=`("y", NULL)])
)
expect_equal(names(res), "y")
})

0 comments on commit 75310e3

Please sign in to comment.