ropensci · tanho63 · Dec 30, 2023 · Dec 27, 2023 · Dec 27, 2023 · Dec 27, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -43,9 +43,8 @@ Imports:
     memoise,
     rlang
 Suggests:
+    arrow,
     spelling,
-    duckdbfs,
-    duckdb,
     readr,
     covr,
     testthat,

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,7 +6,9 @@ export(pb_download)
 export(pb_download_url)
 export(pb_list)
 export(pb_new_release)
+export(pb_read)
 export(pb_release_create)
 export(pb_release_delete)
 export(pb_releases)
 export(pb_upload)
+export(pb_write)
diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,8 @@ before trying API download URLs. This should reduce/eliminate effect of API rate
 limits for pb_download. [#109]
 * `"latest"` release now aligns with GitHub's "latest" release definition [#113]
 * `pb_download_url()` now can return choice of "browser" or "api" download URLs [#116]
+* Add new functions `pb_read()` and `pb_write()` as convenience wrappers around
+pattern of downloading to `tempfile()` and then reading into memory. [#97]
 
 # piggyback 0.1.5
 

diff --git a/R/pb_download_url.R b/R/pb_download_url.R
@@ -70,7 +70,7 @@ pb_download_url <- function(file = NULL,
     file <- file[file %in% df$file_name]
   }
 
-  if(length(file) == 0) return(cli::cli_abort("No download URLs to return."))
+  if(length(file) == 0) return(cli::cli_abort("No download URLs found"))
 
   switch(
     url_type,

diff --git a/R/pb_read.R b/R/pb_read.R
@@ -0,0 +1,90 @@
+#' Read one file into memory
+#'
+#' A convenience wrapper around writing an object to a temporary file and then
+#' uploading to a specified repo/release. This convenience comes at a cost to
+#' performance efficiency, since it first downloads the data to disk and then
+#' reads the data from disk into memory. See `vignette("cloud_native")` for
+#' alternative ways to bypass this flow and work with the data directly.
+#'
+#' @param file string: file name
+#' @param repo string: GH repository name in format "owner/repo". Default
+#' `guess_repo()` tries to guess based on current working directory's git repo
+#' @param tag  string: tag for the GH release, defaults to "latest"
+#' @param read_function function: used to read in the data, where the file is
+#' passed as the first argument and any additional arguments are subsequently
+#' passed in via `...`. Default `guess_read_function(file)` will check the file
+#' extension and try to find an appropriate read function if the extension is one
+#' of rds, csv, tsv, parquet, txt, or json, and will abort if not found.
+#' @param ... additional arguments passed to `read_function` after file
+#' @param .token GitHub authentication token, see [gh::gh_token()]
+#'
+#' @export
+#' @family pb_rw
+#'
+#' @return Result of reading in the file in question.
+#' @examples \donttest{
+#' try({ # try block is to avoid CRAN issues and is not required in ordinary usage
+#'  piggyback::pb_read("mtcars.tsv.gz", repo = "cboettig/piggyback-tests")
+#' })
+#' }
+pb_read <- function(file,
+                    repo = guess_repo(),
+                    tag = "latest",
+                    read_function = guess_read_function(file),
+                    ...,
+                    .token = gh::gh_token()) {
+  stopifnot(
+    is.character(file) && length(file) == 1,
+    is.character(repo) && length(repo) == 1,
+    is.character(tag) && length(tag) == 1,
+    rlang::is_function(read_function)
+  )
+
+  on.exit(unlink(file.path(tempdir(), file)))
+
+  pb_download(
+    file = file,
+    dest = tempdir(check = TRUE),
+    repo = repo,
+    tag = tag,
+    overwrite = TRUE,
+    .token = .token
+  )
+
+  read_function(file.path(tempdir(), file), ...)
+}
+
+#' Guess read function from file extension
+#'
+#' This function accepts a filename and tries to return a valid function for
+#' reading it.
+#'
+#' `guess_read_function` understands the following file extensions:
+#' - rds with `readRDS`
+#' - csv, csv.gz, csv.xz with `utils::read.csv`
+#' - tsv, tsv.gz, tsv.xz with `utils::read.delim`
+#' - parquet with `arrow::read_parquet`
+#' - txt, txt.gz, txt.xz with `readLines`
+#' - json, json.gz, json.xz with `jsonlite::fromJSON`
+#'
+#' @family pb_rw
+#' @param file filename to parse
+#' @return function for reading the file, if found
+#' @keywords internal
+guess_read_function <- function(file){
+  file_ext <- tools::file_ext(gsub(x = file, pattern = ".gz$|.xz$", replacement = ""))
+  if (file_ext == "parquet") rlang::check_installed("arrow")
+
+  read_fn <- switch(
+    file_ext,
+    "rds" = readRDS,
+    "csv" = utils::read.csv,
+    "tsv" = utils::read.delim,
+    "parquet" = arrow::read_parquet,
+    "txt" = readLines,
+    "json" = jsonlite::fromJSON,
+    cli::cli_abort("File type {.val {file_ext}} is not recognized, please provide a {.arg read_function}")
+  )
+
+  return(read_fn)
+}
diff --git a/R/pb_write.R b/R/pb_write.R
@@ -0,0 +1,84 @@
+#' Write one object to repo/release
+#'
+#' A convenience wrapper around writing an object to a temporary file and then
+#' uploading to a specified repo/release.
+#'
+#' @param x object: memory object to save to piggyback
+#' @param file string: file name
+#' @param repo string: GH repository name in format "owner/repo". Default
+#' `guess_repo()` tries to guess based on current working directory's git repo
+#' @param tag  string: tag for the GH release, defaults to "latest"
+#' @param write_function function: used to write an R object to file, where the
+#' object is passed as the first argument, the filename as the second argument,
+#' and any additional arguments are subsequently passed in via `...`. Default
+#' `guess_write_function(file)` will check the file extension and try to find an
+#' appropriate write function if the extension is one of rds, csv, tsv, parquet,
+#' txt, or json, and will abort if not found.
+#' @param ... additional arguments passed to `write_function`
+#' @param .token GitHub authentication token, see [gh::gh_token()]
+#'
+#' @export
+#' @family pb_rw
+#'
+#' @return Writes file to release and returns github API response
+#' @examples \donttest{
+#' \dontshow{if (interactive()) \{}
+#'   pb_write(mtcars, "mtcars.rds", repo = "tanho63/piggyback-tests")
+#'   #> ℹ Uploading to latest release: "v0.0.2".
+#'   #> ℹ Uploading mtcars.rds ...
+#'   #> |===============================================================| 100%
+#' \dontshow{\}}
+#'}
+pb_write <- function(x,
+                     file,
+                     repo = guess_repo(),
+                     tag = "latest",
+                     write_function = guess_write_function(file),
+                     ...,
+                     .token = gh::gh_token()) {
+  stopifnot(
+    is.character(file) && length(file) == 1,
+    is.character(repo) && length(repo) == 1,
+    is.character(tag) && length(tag) == 1,
+    rlang::is_function(write_function)
+  )
+  destfile <- file.path(tempdir(check = TRUE), file)
+  on.exit(try(unlink(destfile)))
+  write_function(x, destfile, ...)
+  pb_upload(destfile, repo = repo, tag = tag, .token = .token)
+}
+
+#' Guess write function from file extension
+#'
+#' This function accepts a filename and tries to return a valid function for
+#' writing to it.
+#'
+#' `guess_write_function` understands the following file extensions:
+#' - rds with `saveRDS`
+#' - csv, csv.gz, csv.xz with `utils::write.csv`
+#' - tsv, tsv.gz, tsv.xz with a modified `utils::write.csv` where sep is set to `"\t"`
+#' - parquet with `arrow::write_parquet`
+#' - txt, txt.gz, txt.xz with `writeLines`
+#' - json, json.gz, json.xz with `jsonlite::write_json`
+#'
+#' @family pb_rw
+#' @param file filename to parse
+#' @return function for reading the file, if found
+#' @keywords internal
+guess_write_function <- function(file){
+  file_ext <- tools::file_ext(gsub(x = file, pattern = ".gz$|.xz$", replacement = ""))
+  if (file_ext == "parquet") rlang::check_installed("arrow")
+
+  write_fn <- switch(
+    file_ext,
+    "rds" = saveRDS,
+    "csv" = utils::write.csv,
+    "tsv" = function(x, file, ..., sep = "\t") utils::write.csv(x = x, file = file, sep = sep, ...),
+    "txt" = writeLines,
+    "parquet" = arrow::write_parquet,
+    "json" = jsonlite::write_json,
+    cli::cli_abort("File type {.val {file_ext}} is not recognized, please provide a {.arg write_function}")
+  )
+
+  return(write_fn)
+}
diff --git a/man/guess_read_function.Rd b/man/guess_read_function.Rd
diff --git a/man/guess_write_function.Rd b/man/guess_write_function.Rd
diff --git a/man/pb_download_url.Rd b/man/pb_download_url.Rd
diff --git a/man/pb_read.Rd b/man/pb_read.Rd