From 21a473705dc4250faca8f78e34f0d2d2c4d5a72d Mon Sep 17 00:00:00 2001
From: Tan <tan@tanho.ca>
Date: Fri, 29 Dec 2023 23:55:37 -0500
Subject: [PATCH] improve pb_read and pb_write documentation

---
 R/pb_read.R                 | 33 +++++++++++++++++++++++++++------
 R/pb_write.R                | 35 +++++++++++++++++++++++++++++------
 man/guess_read_function.Rd  | 37 +++++++++++++++++++++++++++++++++++++
 man/guess_write_function.Rd | 37 +++++++++++++++++++++++++++++++++++++
 man/pb_download_url.Rd      | 29 ++++++++++++++++++++++++++---
 man/pb_read.Rd              | 18 ++++++++++++++----
 man/pb_write.Rd             | 17 ++++++++++++++---
 7 files changed, 184 insertions(+), 22 deletions(-)
 create mode 100644 man/guess_read_function.Rd
 create mode 100644 man/guess_write_function.Rd

diff --git a/R/pb_read.R b/R/pb_read.R
index be61afd..62d8c96 100644
--- a/R/pb_read.R
+++ b/R/pb_read.R
@@ -3,19 +3,23 @@
 #' A convenience wrapper around writing an object to a temporary file and then
 #' uploading to a specified repo/release. This convenience comes at a cost to
 #' performance efficiency, since it first downloads the data to disk and then
-#' reads the data from disk into memory. See `vignette("duckdb_arrow")` for
+#' reads the data from disk into memory. See `vignette("cloud_native")` for
 #' alternative ways to bypass this flow and work with the data directly.
 #'
 #' @param file string: file name
 #' @param repo string: GH repository name in format "owner/repo". Default
 #' `guess_repo()` tries to guess based on current working directory's git repo
 #' @param tag  string: tag for the GH release, defaults to "latest"
-#' @param read_function function: specifies how to read in the data. Default
-#' tries to guess a function based on file extension (csv, rds, parquet, txt, json)
-#' @param ... additional arguments passed to `read_function`
+#' @param read_function function: used to read in the data, where the file is
+#' passed as the first argument and any additional arguments are subsequently
+#' passed in via `...`. Default `guess_read_function(file)` will check the file
+#' extension and try to find an appropriate read function if the extension is one
+#' of rds, csv, tsv, parquet, txt, or json, and will abort if not found.
+#' @param ... additional arguments passed to `read_function` after file
 #' @param .token GitHub authentication token, see [gh::gh_token()]
 #'
 #' @export
+#' @family pb_rw
 #'
 #' @return Result of reading in the file in question.
 #' @examples \donttest{
@@ -50,6 +54,23 @@ pb_read <- function(file,
   read_function(file.path(tempdir(), file), ...)
 }
 
+#' Guess read function from file extension
+#'
+#' This function accepts a filename and tries to return a valid function for
+#' reading it.
+#'
+#' `guess_read_function` understands the following file extensions:
+#' - rds with `readRDS`
+#' - csv, csv.gz, csv.xz with `utils::read.csv`
+#' - tsv, tsv.gz, tsv.xz with `utils::read.delim`
+#' - parquet with `arrow::read_parquet`
+#' - txt, txt.gz, txt.xz with `readLines`
+#' - json, json.gz, json.xz with `jsonlite::fromJSON`
+#'
+#' @family pb_rw
+#' @param file filename to parse
+#' @return function for reading the file, if found
+#' @keywords internal
 guess_read_function <- function(file){
   file_ext <- tools::file_ext(gsub(x = file, pattern = ".gz$|.xz$", replacement = ""))
   if (file_ext == "parquet") rlang::check_installed("arrow")
@@ -57,8 +78,8 @@ guess_read_function <- function(file){
   read_fn <- switch(
     file_ext,
     "rds" = readRDS,
-    "csv" = read.csv,
-    "tsv" = read.delim,
+    "csv" = utils::read.csv,
+    "tsv" = utils::read.delim,
     "parquet" = arrow::read_parquet,
     "txt" = readLines,
     "json" = jsonlite::fromJSON,
diff --git a/R/pb_write.R b/R/pb_write.R
index ff9019e..60b24e1 100644
--- a/R/pb_write.R
+++ b/R/pb_write.R
@@ -8,21 +8,26 @@
 #' @param repo string: GH repository name in format "owner/repo". Default
 #' `guess_repo()` tries to guess based on current working directory's git repo
 #' @param tag  string: tag for the GH release, defaults to "latest"
-#' @param write_function function: specifies how to read in the data. Default
-#' tries to guess a function based on file extension (csv, rds, txt, parquet, json)
+#' @param write_function function: used to write an R object to file, where the
+#' object is passed as the first argument, the filename as the second argument,
+#' and any additional arguments are subsequently passed in via `...`. Default
+#' `guess_write_function(file)` will check the file extension and try to find an
+#' appropriate write function if the extension is one of rds, csv, tsv, parquet,
+#' txt, or json, and will abort if not found.
 #' @param ... additional arguments passed to `write_function`
 #' @param .token GitHub authentication token, see [gh::gh_token()]
 #'
 #' @export
+#' @family pb_rw
 #'
 #' @return Writes file to release and returns github API response
 #' @examples \donttest{
-#' if (interactive()) {
+#' \dontshow{if (interactive()) \{}
 #'   pb_write(mtcars, "mtcars.rds", repo = "tanho63/piggyback-tests")
 #'   #> ℹ Uploading to latest release: "v0.0.2".
 #'   #> ℹ Uploading mtcars.rds ...
 #'   #> |===============================================================| 100%
-#' }
+#' \dontshow{\}}
 #'}
 pb_write <- function(x,
                      file,
@@ -43,6 +48,23 @@ pb_write <- function(x,
   pb_upload(destfile, repo = repo, tag = tag, .token = .token)
 }
 
+#' Guess write function from file extension
+#'
+#' This function accepts a filename and tries to return a valid function for
+#' writing to it.
+#'
+#' `guess_write_function` understands the following file extensions:
+#' - rds with `saveRDS`
+#' - csv, csv.gz, csv.xz with `utils::write.csv`
+#' - tsv, tsv.gz, tsv.xz with a modified `utils::write.csv` where sep is set to `"\t"`
+#' - parquet with `arrow::write_parquet`
+#' - txt, txt.gz, txt.xz with `writeLines`
+#' - json, json.gz, json.xz with `jsonlite::write_json`
+#'
+#' @family pb_rw
+#' @param file filename to parse
+#' @return function for reading the file, if found
+#' @keywords internal
 guess_write_function <- function(file){
   file_ext <- tools::file_ext(gsub(x = file, pattern = ".gz$|.xz$", replacement = ""))
   if (file_ext == "parquet") rlang::check_installed("arrow")
@@ -50,10 +72,11 @@ guess_write_function <- function(file){
   write_fn <- switch(
     file_ext,
     "rds" = saveRDS,
-    "csv" = write.csv,
+    "csv" = utils::write.csv,
+    "tsv" = function(x, file, ..., sep = "\t") utils::write.csv(x = x, file = file, sep = sep, ...),
     "txt" = writeLines,
     "parquet" = arrow::write_parquet,
-    "json" = jsonlite::toJSON,
+    "json" = jsonlite::write_json,
     cli::cli_abort("File type {.val {file_ext}} is not recognized, please provide a {.arg write_function}")
   )
 
diff --git a/man/guess_read_function.Rd b/man/guess_read_function.Rd
new file mode 100644
index 0000000..34480de
--- /dev/null
+++ b/man/guess_read_function.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pb_read.R
+\name{guess_read_function}
+\alias{guess_read_function}
+\title{Guess read function from file extension}
+\usage{
+guess_read_function(file)
+}
+\arguments{
+\item{file}{filename to parse}
+}
+\value{
+function for reading the file, if found
+}
+\description{
+This function accepts a filename and tries to return a valid function for
+reading it.
+}
+\details{
+\code{guess_read_function} understands the following file extensions:
+\itemize{
+\item rds with \code{readRDS}
+\item csv, csv.gz, csv.xz with \code{utils::read.csv}
+\item tsv, tsv.gz, tsv.xz with \code{utils::read.delim}
+\item parquet with \code{arrow::read_parquet}
+\item txt, txt.gz, txt.xz with \code{readLines}
+\item json, json.gz, json.xz with \code{jsonlite::fromJSON}
+}
+}
+\seealso{
+Other pb_rw: 
+\code{\link{guess_write_function}()},
+\code{\link{pb_read}()},
+\code{\link{pb_write}()}
+}
+\concept{pb_rw}
+\keyword{internal}
diff --git a/man/guess_write_function.Rd b/man/guess_write_function.Rd
new file mode 100644
index 0000000..72ee984
--- /dev/null
+++ b/man/guess_write_function.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pb_write.R
+\name{guess_write_function}
+\alias{guess_write_function}
+\title{Guess write function from file extension}
+\usage{
+guess_write_function(file)
+}
+\arguments{
+\item{file}{filename to parse}
+}
+\value{
+function for reading the file, if found
+}
+\description{
+This function accepts a filename and tries to return a valid function for
+writing to it.
+}
+\details{
+\code{guess_write_function} understands the following file extensions:
+\itemize{
+\item rds with \code{saveRDS}
+\item csv, csv.gz, csv.xz with \code{utils::write.csv}
+\item tsv, tsv.gz, tsv.xz with a modified \code{utils::write.csv} where sep is set to \code{"\\t"}
+\item parquet with \code{arrow::write_parquet}
+\item txt, txt.gz, txt.xz with \code{writeLines}
+\item json, json.gz, json.xz with \code{jsonlite::write_json}
+}
+}
+\seealso{
+Other pb_rw: 
+\code{\link{guess_read_function}()},
+\code{\link{pb_read}()},
+\code{\link{pb_write}()}
+}
+\concept{pb_rw}
+\keyword{internal}
diff --git a/man/pb_download_url.Rd b/man/pb_download_url.Rd
index 0172211..757d10d 100644
--- a/man/pb_download_url.Rd
+++ b/man/pb_download_url.Rd
@@ -39,11 +39,34 @@ functions that are able to accept URLs.
 \donttest{
 \dontshow{try(\{}
 
-# returns browser url by default
-pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1")
+# returns browser url by default (and all files if none are specified)
+browser_url <- pb_download_url(
+  repo = "tanho63/piggyback-tests",
+  tag = "v0.0.2"
+  )
+print(browser_url)
+utils::read.csv(browser_url[[1]])
 
 # can return api url if desired
-pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1", url_type = "api")
+api_url <- pb_download_url(
+  "mtcars.csv",
+  repo = "tanho63/piggyback-tests",
+  tag = "v0.0.2"
+  )
+print(api_url)
+
+# for public repositories, this will still work
+utils::read.csv(api_url)
+
+# for private repos, can use httr or curl to fetch and then pass into read function
+gh_pat <- Sys.getenv("GITHUB_PAT")
+
+if(!identical(gh_pat, "")){
+  resp <- httr::GET(api_url, httr::add_headers(Authorization = paste("Bearer", gh_pat)))
+  utils::read.csv(text = httr::content(resp, as = "text"))
+}
+
+# or use pb_read which bundles some of this for you
 
 \dontshow{\})}
 }
diff --git a/man/pb_read.Rd b/man/pb_read.Rd
index aad97db..7b8bd2c 100644
--- a/man/pb_read.Rd
+++ b/man/pb_read.Rd
@@ -21,10 +21,13 @@ pb_read(
 
 \item{tag}{string: tag for the GH release, defaults to "latest"}
 
-\item{read_function}{function: specifies how to read in the data. Default
-tries to guess a function based on file extension (csv, rds, parquet, txt, json)}
+\item{read_function}{function: used to read in the data, where the file is
+passed as the first argument and any additional arguments are subsequently
+passed in via \code{...}. Default \code{guess_read_function(file)} will check the file
+extension and try to find an appropriate read function if the extension is one
+of rds, csv, tsv, parquet, txt, or json, and will abort if not found.}
 
-\item{...}{additional arguments passed to \code{read_function}}
+\item{...}{additional arguments passed to \code{read_function} after file}
 
 \item{.token}{GitHub authentication token, see \code{\link[gh:gh_token]{gh::gh_token()}}}
 }
@@ -35,7 +38,7 @@ Result of reading in the file in question.
 A convenience wrapper around writing an object to a temporary file and then
 uploading to a specified repo/release. This convenience comes at a cost to
 performance efficiency, since it first downloads the data to disk and then
-reads the data from disk into memory. See \code{vignette("duckdb_arrow")} for
+reads the data from disk into memory. See \code{vignette("cloud_native")} for
 alternative ways to bypass this flow and work with the data directly.
 }
 \examples{
@@ -45,3 +48,10 @@ try({ # try block is to avoid CRAN issues and is not required in ordinary usage
 })
 }
 }
+\seealso{
+Other pb_rw: 
+\code{\link{guess_read_function}()},
+\code{\link{guess_write_function}()},
+\code{\link{pb_write}()}
+}
+\concept{pb_rw}
diff --git a/man/pb_write.Rd b/man/pb_write.Rd
index bec9ec6..07b198d 100644
--- a/man/pb_write.Rd
+++ b/man/pb_write.Rd
@@ -24,8 +24,12 @@ pb_write(
 
 \item{tag}{string: tag for the GH release, defaults to "latest"}
 
-\item{write_function}{function: specifies how to read in the data. Default
-tries to guess a function based on file extension (csv, rds, txt, parquet, json)}
+\item{write_function}{function: used to write an R object to file, where the
+object is passed as the first argument, the filename as the second argument,
+and any additional arguments are subsequently passed in via \code{...}. Default
+\code{guess_write_function(file)} will check the file extension and try to find an
+appropriate write function if the extension is one of rds, csv, tsv, parquet,
+txt, or json, and will abort if not found.}
 
 \item{...}{additional arguments passed to \code{write_function}}
 
@@ -40,11 +44,18 @@ uploading to a specified repo/release.
 }
 \examples{
 \donttest{
-if (interactive()) {
+\dontshow{if (interactive()) \{}
   pb_write(mtcars, "mtcars.rds", repo = "tanho63/piggyback-tests")
   #> ℹ Uploading to latest release: "v0.0.2".
   #> ℹ Uploading mtcars.rds ...
   #> |===============================================================| 100\%
+\dontshow{\}}
 }
 }
+\seealso{
+Other pb_rw: 
+\code{\link{guess_read_function}()},
+\code{\link{guess_write_function}()},
+\code{\link{pb_read}()}
 }
+\concept{pb_rw}