From 08f4667e12f720d36a39b441cc5ad64e7d367e62 Mon Sep 17 00:00:00 2001 From: brshallo Date: Thu, 15 Aug 2024 11:06:20 -0700 Subject: [PATCH 1/7] use `fun()` instead of `fun` across docs, fixes #383 --- R/boot.R | 2 +- R/caret.R | 8 ++++---- R/initial_split.R | 14 +++++++------- R/labels.R | 2 +- R/nest.R | 2 +- R/rsplit.R | 4 ++-- R/tidy.R | 12 ++++++------ man/as.data.frame.rsplit.Rd | 4 ++-- man/bootstraps.Rd | 2 +- man/group_bootstraps.Rd | 2 +- man/initial_split.Rd | 10 +++++----- man/labels.rset.Rd | 2 +- man/nested_cv.Rd | 2 +- man/rsample2caret.Rd | 8 ++++---- man/tidy.rsplit.Rd | 10 +++++----- vignettes/Working_with_rsets.Rmd | 4 ++-- 16 files changed, 44 insertions(+), 44 deletions(-) diff --git a/R/boot.R b/R/boot.R index 887de4bd..fa8fb651 100644 --- a/R/boot.R +++ b/R/boot.R @@ -17,7 +17,7 @@ #' @param times The number of bootstrap samples. #' @param apparent A logical. Should an extra resample be added where the #' analysis and holdout subset are the entire data set. This is required for -#' some estimators used by the `summary` function that require the apparent +#' some estimators used by the `summary()` function that require the apparent #' error rate. #' @export #' @return A tibble with classes `bootstraps`, `rset`, `tbl_df`, `tbl`, and diff --git a/R/caret.R b/R/caret.R index 6b2808b3..609fbc19 100644 --- a/R/caret.R +++ b/R/caret.R @@ -4,10 +4,10 @@ #' \pkg{rsample} and \pkg{caret}. #' #' @param object An `rset` object. Currently, -#' `nested_cv` is not supported. -#' @return `rsample2caret` returns a list that mimics the +#' `nested_cv()` is not supported. +#' @return `rsample2caret()` returns a list that mimics the #' `index` and `indexOut` elements of a -#' `trainControl` object. `caret2rsample` returns an +#' `trainControl` object. `caret2rsample()` returns an #' `rset` object of the appropriate class. #' @export rsample2caret <- function(object, data = c("analysis", "assessment")) { @@ -23,7 +23,7 @@ rsample2caret <- function(object, data = c("analysis", "assessment")) { } #' @rdname rsample2caret -#' @param ctrl An object produced by `trainControl` that has +#' @param ctrl An object produced by `caret::trainControl()` that has #' had the `index` and `indexOut` elements populated by #' integers. One method of getting this is to extract the #' `control` objects from an object produced by `train`. diff --git a/R/initial_split.R b/R/initial_split.R index 66157c76..1de20789 100644 --- a/R/initial_split.R +++ b/R/initial_split.R @@ -1,18 +1,18 @@ #' Simple Training/Test Set Splitting #' -#' `initial_split` creates a single binary split of the data into a training -#' set and testing set. `initial_time_split` does the same, but takes the +#' `initial_split()` creates a single binary split of the data into a training +#' set and testing set. `initial_time_split()` does the same, but takes the #' _first_ `prop` samples for training, instead of a random selection. -#' `group_initial_split` creates splits of the data based +#' `group_initial_split()` creates splits of the data based #' on some grouping variable, so that all data in a "group" is assigned to #' the same split. -#' `training` and `testing` are used to extract the resulting data. +#' `training()` and `testing()` are used to extract the resulting data. #' @template strata_details #' @inheritParams vfold_cv #' @inheritParams make_strata #' @param prop The proportion of data to be retained for modeling/analysis. #' @export -#' @return An `rsplit` object that can be used with the `training` and `testing` +#' @return An `rsplit` object that can be used with the `training()` and `testing()` #' functions to extract the data in each split. #' @examplesIf rlang::is_installed("modeldata") #' set.seed(1353) @@ -176,12 +176,12 @@ group_initial_split <- function(data, group, prop = 3 / 4, ..., strata = NULL, p attrib <- .get_split_args(res, allow_strata_false = TRUE) res <- res$splits[[1]] - + attrib$times <- NULL for (i in names(attrib)) { attr(res, i) <- attrib[[i]] } class(res) <- c("group_initial_split", "initial_split", class(res)) - + res } diff --git a/R/labels.R b/R/labels.R index 53e7bfe6..9cfe767e 100644 --- a/R/labels.R +++ b/R/labels.R @@ -1,7 +1,7 @@ #' Find Labels from rset Object #' #' Produce a vector of resampling labels (e.g. "Fold1") from -#' an `rset` object. Currently, `nested_cv` +#' an `rset` object. Currently, `nested_cv()` #' is not supported. #' #' @param object An `rset` object diff --git a/R/nest.R b/R/nest.R index d3c6b080..72058c85 100644 --- a/R/nest.R +++ b/R/nest.R @@ -1,6 +1,6 @@ #' Nested or Double Resampling #' -#' `nested_cv` can be used to take the results of one resampling procedure +#' `nested_cv()` can be used to take the results of one resampling procedure #' and conduct further resamples within each split. Any type of resampling #' used in `rsample` can be used. #' diff --git a/R/rsplit.R b/R/rsplit.R index 96234934..d19af5a1 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -66,8 +66,8 @@ as.integer.rsplit <- #' #' The analysis or assessment code can be returned as a data #' frame (as dictated by the `data` argument) using -#' `as.data.frame.rsplit`. `analysis` and -#' `assessment` are shortcuts. +#' `as.data.frame.rsplit()`. `analysis()` and +#' `assessment()` are shortcuts. #' @param x An `rsplit` object. #' @param row.names `NULL` or a character vector giving the row names for the data frame. Missing values are not allowed. #' @param optional A logical: should the column names of the data be checked for legality? diff --git a/R/tidy.R b/R/tidy.R index acba8b33..4d268c2a 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -1,6 +1,6 @@ #' Tidy Resampling Object #' -#' The `tidy` function from the \pkg{broom} package can be used on `rset` and +#' The `tidy()` function from the \pkg{broom} package can be used on `rset` and #' `rsplit` objects to generate tibbles with which rows are in the analysis and #' assessment sets. #' @param x A `rset` or `rsplit` object @@ -9,11 +9,11 @@ #' sample for the same row in the original data. #' @inheritParams rlang::args_dots_empty #' @return A tibble with columns `Row` and `Data`. The latter has possible -#' values "Analysis" or "Assessment". For `rset` inputs, identification columns -#' are also returned but their names and values depend on the type of -#' resampling. `vfold_cv` contains a column "Fold" and, if repeats are used, -#' another called "Repeats". `bootstraps` and `mc_cv` use the column -#' "Resample". +#' values "Analysis" or "Assessment". For `rset` inputs, identification +#' columns are also returned but their names and values depend on the type of +#' resampling. For `vfold_cv()`, contains a column "Fold" and, if repeats are +#' used, another called "Repeats". `bootstraps()` and `mc_cv()` use the column +#' "Resample". #' @details Note that for nested resampling, the rows of the inner resample, #' named `inner_Row`, are *relative* row indices and do not correspond to the #' rows in the original data set. diff --git a/man/as.data.frame.rsplit.Rd b/man/as.data.frame.rsplit.Rd index 6a494c15..6de9ad42 100644 --- a/man/as.data.frame.rsplit.Rd +++ b/man/as.data.frame.rsplit.Rd @@ -38,8 +38,8 @@ assessment(x, ...) \description{ The analysis or assessment code can be returned as a data frame (as dictated by the \code{data} argument) using -\code{as.data.frame.rsplit}. \code{analysis} and -\code{assessment} are shortcuts. +\code{as.data.frame.rsplit()}. \code{analysis()} and +\code{assessment()} are shortcuts. } \examples{ library(dplyr) diff --git a/man/bootstraps.Rd b/man/bootstraps.Rd index 164f1835..93854d30 100644 --- a/man/bootstraps.Rd +++ b/man/bootstraps.Rd @@ -33,7 +33,7 @@ of stratifying groups that are too small.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary} function that require the apparent +some estimators used by the \code{summary()} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} diff --git a/man/group_bootstraps.Rd b/man/group_bootstraps.Rd index ad68eb6d..e93dccd3 100644 --- a/man/group_bootstraps.Rd +++ b/man/group_bootstraps.Rd @@ -25,7 +25,7 @@ assessment set within a fold.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary} function that require the apparent +some estimators used by the \code{summary()} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} diff --git a/man/initial_split.Rd b/man/initial_split.Rd index 740bf66f..efc096d0 100644 --- a/man/initial_split.Rd +++ b/man/initial_split.Rd @@ -61,17 +61,17 @@ grouping observations with the same value to either the analysis or assessment set within a fold.} } \value{ -An \code{rsplit} object that can be used with the \code{training} and \code{testing} +An \code{rsplit} object that can be used with the \code{training()} and \code{testing()} functions to extract the data in each split. } \description{ -\code{initial_split} creates a single binary split of the data into a training -set and testing set. \code{initial_time_split} does the same, but takes the +\code{initial_split()} creates a single binary split of the data into a training +set and testing set. \code{initial_time_split()} does the same, but takes the \emph{first} \code{prop} samples for training, instead of a random selection. -\code{group_initial_split} creates splits of the data based +\code{group_initial_split()} creates splits of the data based on some grouping variable, so that all data in a "group" is assigned to the same split. -\code{training} and \code{testing} are used to extract the resulting data. +\code{training()} and \code{testing()} are used to extract the resulting data. } \details{ With a \code{strata} argument, the random sampling is conducted diff --git a/man/labels.rset.Rd b/man/labels.rset.Rd index 6aa2ca12..2bc14356 100644 --- a/man/labels.rset.Rd +++ b/man/labels.rset.Rd @@ -22,7 +22,7 @@ A single character or factor vector. } \description{ Produce a vector of resampling labels (e.g. "Fold1") from -an \code{rset} object. Currently, \code{nested_cv} +an \code{rset} object. Currently, \code{nested_cv()} is not supported. } \examples{ diff --git a/man/nested_cv.Rd b/man/nested_cv.Rd index 826a9c47..3bca035e 100644 --- a/man/nested_cv.Rd +++ b/man/nested_cv.Rd @@ -25,7 +25,7 @@ and a column of nested tibbles called \code{inner_resamples} with the additional resamples. } \description{ -\code{nested_cv} can be used to take the results of one resampling procedure +\code{nested_cv()} can be used to take the results of one resampling procedure and conduct further resamples within each split. Any type of resampling used in \code{rsample} can be used. } diff --git a/man/rsample2caret.Rd b/man/rsample2caret.Rd index ca0605f1..ac712c5b 100644 --- a/man/rsample2caret.Rd +++ b/man/rsample2caret.Rd @@ -11,20 +11,20 @@ caret2rsample(ctrl, data = NULL) } \arguments{ \item{object}{An \code{rset} object. Currently, -\code{nested_cv} is not supported.} +\code{nested_cv()} is not supported.} \item{data}{The data that was originally used to produce the \code{ctrl} object.} -\item{ctrl}{An object produced by \code{trainControl} that has +\item{ctrl}{An object produced by \code{caret::trainControl()} that has had the \code{index} and \code{indexOut} elements populated by integers. One method of getting this is to extract the \code{control} objects from an object produced by \code{train}.} } \value{ -\code{rsample2caret} returns a list that mimics the +\code{rsample2caret()} returns a list that mimics the \code{index} and \code{indexOut} elements of a -\code{trainControl} object. \code{caret2rsample} returns an +\code{trainControl} object. \code{caret2rsample()} returns an \code{rset} object of the appropriate class. } \description{ diff --git a/man/tidy.rsplit.Rd b/man/tidy.rsplit.Rd index 01c419ec..3d75c389 100644 --- a/man/tidy.rsplit.Rd +++ b/man/tidy.rsplit.Rd @@ -26,14 +26,14 @@ sample for the same row in the original data.} } \value{ A tibble with columns \code{Row} and \code{Data}. The latter has possible -values "Analysis" or "Assessment". For \code{rset} inputs, identification columns -are also returned but their names and values depend on the type of -resampling. \code{vfold_cv} contains a column "Fold" and, if repeats are used, -another called "Repeats". \code{bootstraps} and \code{mc_cv} use the column +values "Analysis" or "Assessment". For \code{rset} inputs, identification +columns are also returned but their names and values depend on the type of +resampling. For \code{vfold_cv()}, contains a column "Fold" and, if repeats are +used, another called "Repeats". \code{bootstraps()} and \code{mc_cv()} use the column "Resample". } \description{ -The \code{tidy} function from the \pkg{broom} package can be used on \code{rset} and +The \code{tidy()} function from the \pkg{broom} package can be used on \code{rset} and \code{rsplit} objects to generate tibbles with which rows are in the analysis and assessment sets. } diff --git a/vignettes/Working_with_rsets.Rmd b/vignettes/Working_with_rsets.Rmd index 8f59b2e8..ab112ea1 100644 --- a/vignettes/Working_with_rsets.Rmd +++ b/vignettes/Working_with_rsets.Rmd @@ -109,7 +109,7 @@ example[1:10, setdiff(names(example), names(attrition))] For this model, the `.fitted` value is the linear predictor in log-odds units. -To compute this data set for each of the 100 resamples, we'll use the `map` function from the `purrr` package: +To compute this data set for each of the 100 resamples, we'll use the `map()` function from the `purrr` package: ```{r model_purrr, warning=FALSE} library(purrr) @@ -182,7 +182,7 @@ The calculated 95% confidence interval contains zero, so we don't have evidence ## Bootstrap Estimates of Model Coefficients -Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [`broom` package](https://cran.r-project.org/package=broom) package has a `tidy` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map` can be used to estimate and save these values for each split. +Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [`broom` package](https://cran.r-project.org/package=broom) package has a `tidy()` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map()` can be used to estimate and save these values for each split. ```{r coefs} From 3ca624dd3f196d37fc79ac061f6a473d29d0cc8e Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 14:26:26 +0100 Subject: [PATCH 2/7] `document()` found change from previous PR --- man/make_strata.Rd | 1 - 1 file changed, 1 deletion(-) diff --git a/man/make_strata.Rd b/man/make_strata.Rd index c2b7b434..9d9c50bc 100644 --- a/man/make_strata.Rd +++ b/man/make_strata.Rd @@ -64,7 +64,6 @@ x3 <- factor(x2) table(x3) table(make_strata(x3)) -# `oilType` data from x4 <- rep(LETTERS[1:7], c(37, 26, 3, 7, 11, 10, 2)) table(x4) table(make_strata(x4)) From bf9b80b0fdeb7cfbd7dccdd83eeab31c09037675 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:49:03 +0100 Subject: [PATCH 3/7] Link to function help --- R/boot.R | 2 +- R/caret.R | 2 +- R/form_pred.R | 2 +- R/labels.R | 3 +-- R/permutations.R | 2 +- R/reg_intervals.R | 2 +- R/tidy.R | 4 ++-- man/bootstraps.Rd | 2 +- man/form_pred.Rd | 2 +- man/labels.rset.Rd | 3 +-- man/permutations.Rd | 2 +- man/reg_intervals.Rd | 2 +- man/rsample2caret.Rd | 2 +- man/tidy.rsplit.Rd | 4 ++-- 14 files changed, 16 insertions(+), 18 deletions(-) diff --git a/R/boot.R b/R/boot.R index fa8fb651..edb40593 100644 --- a/R/boot.R +++ b/R/boot.R @@ -17,7 +17,7 @@ #' @param times The number of bootstrap samples. #' @param apparent A logical. Should an extra resample be added where the #' analysis and holdout subset are the entire data set. This is required for -#' some estimators used by the `summary()` function that require the apparent +#' some estimators used by the [summary()] function that require the apparent #' error rate. #' @export #' @return A tibble with classes `bootstraps`, `rset`, `tbl_df`, `tbl`, and diff --git a/R/caret.R b/R/caret.R index 609fbc19..26f66fcc 100644 --- a/R/caret.R +++ b/R/caret.R @@ -4,7 +4,7 @@ #' \pkg{rsample} and \pkg{caret}. #' #' @param object An `rset` object. Currently, -#' `nested_cv()` is not supported. +#' [nested_cv()] is not supported. #' @return `rsample2caret()` returns a list that mimics the #' `index` and `indexOut` elements of a #' `trainControl` object. `caret2rsample()` returns an diff --git a/R/form_pred.R b/R/form_pred.R index 3d166c37..77348511 100644 --- a/R/form_pred.R +++ b/R/form_pred.R @@ -1,6 +1,6 @@ #' Extract Predictor Names from Formula or Terms #' -#' `all.vars` returns all variables used in a formula. This +#' While [all.vars()] returns all variables used in a formula, this #' function only returns the variables explicitly used on the #' right-hand side (i.e., it will not resolve dots unless the #' object is terms with a data set specified). diff --git a/R/labels.R b/R/labels.R index 9cfe767e..131bc635 100644 --- a/R/labels.R +++ b/R/labels.R @@ -1,8 +1,7 @@ #' Find Labels from rset Object #' #' Produce a vector of resampling labels (e.g. "Fold1") from -#' an `rset` object. Currently, `nested_cv()` -#' is not supported. +#' an `rset` object. Currently, [nested_cv()] is not supported. #' #' @param object An `rset` object #' @param make_factor A logical for whether the results should be diff --git a/R/permutations.R b/R/permutations.R index e5a42a13..c9702e48 100644 --- a/R/permutations.R +++ b/R/permutations.R @@ -5,7 +5,7 @@ #' by permuting/shuffling one or more columns. This results in analysis #' samples where some columns are in their original order and some columns #' are permuted to a random order. Unlike other sampling functions in -#' rsample, there is no assessment set and calling `assessment()` on a +#' rsample, there is no assessment set and calling [assessment()] on a #' permutation split will throw an error. #' #' @param data A data frame. diff --git a/R/reg_intervals.R b/R/reg_intervals.R index 50a0cf44..ee8bb3b4 100644 --- a/R/reg_intervals.R +++ b/R/reg_intervals.R @@ -13,7 +13,7 @@ #' @param filter A logical expression used to remove rows from the final result, or `NULL` to keep all rows. #' @param keep_reps Should the individual parameter estimates for each bootstrap #' sample be retained? -#' @param ... Options to pass to the model function (such as `family` for `glm()`). +#' @param ... Options to pass to the model function (such as `family` for [stats::glm()]). #' @return A tibble with columns "term", ".lower", ".estimate", ".upper", #' ".alpha", and ".method". If `keep_reps = TRUE`, an additional list column #' called ".replicates" is also returned. diff --git a/R/tidy.R b/R/tidy.R index 4d268c2a..60a5c6a2 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -11,8 +11,8 @@ #' @return A tibble with columns `Row` and `Data`. The latter has possible #' values "Analysis" or "Assessment". For `rset` inputs, identification #' columns are also returned but their names and values depend on the type of -#' resampling. For `vfold_cv()`, contains a column "Fold" and, if repeats are -#' used, another called "Repeats". `bootstraps()` and `mc_cv()` use the column +#' resampling. For [vfold_cv()], contains a column "Fold" and, if repeats are +#' used, another called "Repeats". [bootstraps()] and [mc_cv()] use the column #' "Resample". #' @details Note that for nested resampling, the rows of the inner resample, #' named `inner_Row`, are *relative* row indices and do not correspond to the diff --git a/man/bootstraps.Rd b/man/bootstraps.Rd index 93854d30..f56a4d8a 100644 --- a/man/bootstraps.Rd +++ b/man/bootstraps.Rd @@ -33,7 +33,7 @@ of stratifying groups that are too small.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary()} function that require the apparent +some estimators used by the \code{\link[=summary]{summary()}} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} diff --git a/man/form_pred.Rd b/man/form_pred.Rd index e80e43e9..189f50c0 100644 --- a/man/form_pred.Rd +++ b/man/form_pred.Rd @@ -16,7 +16,7 @@ object.} A character vector of names } \description{ -\code{all.vars} returns all variables used in a formula. This +While \code{\link[=all.vars]{all.vars()}} returns all variables used in a formula, this function only returns the variables explicitly used on the right-hand side (i.e., it will not resolve dots unless the object is terms with a data set specified). diff --git a/man/labels.rset.Rd b/man/labels.rset.Rd index 2bc14356..96dff7d1 100644 --- a/man/labels.rset.Rd +++ b/man/labels.rset.Rd @@ -22,8 +22,7 @@ A single character or factor vector. } \description{ Produce a vector of resampling labels (e.g. "Fold1") from -an \code{rset} object. Currently, \code{nested_cv()} -is not supported. +an \code{rset} object. Currently, \code{\link[=nested_cv]{nested_cv()}} is not supported. } \examples{ labels(vfold_cv(mtcars)) diff --git a/man/permutations.Rd b/man/permutations.Rd index c2773bed..54c14324 100644 --- a/man/permutations.Rd +++ b/man/permutations.Rd @@ -33,7 +33,7 @@ A permutation sample is the same size as the original data set and is made by permuting/shuffling one or more columns. This results in analysis samples where some columns are in their original order and some columns are permuted to a random order. Unlike other sampling functions in -rsample, there is no assessment set and calling \code{assessment()} on a +rsample, there is no assessment set and calling \code{\link[=assessment]{assessment()}} on a permutation split will throw an error. } \details{ diff --git a/man/reg_intervals.Rd b/man/reg_intervals.Rd index cf3b04a2..23a96fe2 100644 --- a/man/reg_intervals.Rd +++ b/man/reg_intervals.Rd @@ -38,7 +38,7 @@ NULL, 1,001 are used for t-intervals and 2,001 for percentile intervals.} \item{keep_reps}{Should the individual parameter estimates for each bootstrap sample be retained?} -\item{...}{Options to pass to the model function (such as \code{family} for \code{glm()}).} +\item{...}{Options to pass to the model function (such as \code{family} for \code{\link[stats:glm]{stats::glm()}}).} } \value{ A tibble with columns "term", ".lower", ".estimate", ".upper", diff --git a/man/rsample2caret.Rd b/man/rsample2caret.Rd index ac712c5b..e96fc98b 100644 --- a/man/rsample2caret.Rd +++ b/man/rsample2caret.Rd @@ -11,7 +11,7 @@ caret2rsample(ctrl, data = NULL) } \arguments{ \item{object}{An \code{rset} object. Currently, -\code{nested_cv()} is not supported.} +\code{\link[=nested_cv]{nested_cv()}} is not supported.} \item{data}{The data that was originally used to produce the \code{ctrl} object.} diff --git a/man/tidy.rsplit.Rd b/man/tidy.rsplit.Rd index 3d75c389..eca6c81b 100644 --- a/man/tidy.rsplit.Rd +++ b/man/tidy.rsplit.Rd @@ -28,8 +28,8 @@ sample for the same row in the original data.} A tibble with columns \code{Row} and \code{Data}. The latter has possible values "Analysis" or "Assessment". For \code{rset} inputs, identification columns are also returned but their names and values depend on the type of -resampling. For \code{vfold_cv()}, contains a column "Fold" and, if repeats are -used, another called "Repeats". \code{bootstraps()} and \code{mc_cv()} use the column +resampling. For \code{\link[=vfold_cv]{vfold_cv()}}, contains a column "Fold" and, if repeats are +used, another called "Repeats". \code{\link[=bootstraps]{bootstraps()}} and \code{\link[=mc_cv]{mc_cv()}} use the column "Resample". } \description{ From 71f779449f91ee2a0827fa136af753cb666bc8ab Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:49:49 +0100 Subject: [PATCH 4/7] but don't link to the page you're already on --- R/make_groups.R | 2 +- man/make_groups.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/make_groups.R b/R/make_groups.R index d5dabf24..47f43021 100644 --- a/R/make_groups.R +++ b/R/make_groups.R @@ -25,7 +25,7 @@ #' only one) assessment set, but rather allow each observation to be in an #' assessment set zero-or-more times. As a result, those functions don't have #' a `balance` argument, and under the hood always specify `balance = "prop"` -#' when they call [make_groups()]. +#' when they call `make_groups()`. #' #' @keywords internal make_groups <- function(data, diff --git a/man/make_groups.Rd b/man/make_groups.Rd index d2bdc62a..508d9c5e 100644 --- a/man/make_groups.Rd +++ b/man/make_groups.Rd @@ -49,6 +49,6 @@ Similarly, \code{\link[=group_mc_cv]{group_mc_cv()}} and its derivatives don't a only one) assessment set, but rather allow each observation to be in an assessment set zero-or-more times. As a result, those functions don't have a \code{balance} argument, and under the hood always specify \code{balance = "prop"} -when they call \code{\link[=make_groups]{make_groups()}}. +when they call \code{make_groups()}. } \keyword{internal} From 7e2ca02399b1f083c068be883dfe9af66ca91609 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:51:28 +0100 Subject: [PATCH 5/7] misc tidy styling --- R/labels.R | 4 ++-- R/printing.R | 2 +- R/reg_intervals.R | 10 +++++----- R/rsplit.R | 2 +- R/tidy.R | 2 +- man/add_resample_id.Rd | 2 +- man/as.data.frame.rsplit.Rd | 2 +- man/labels.rset.Rd | 2 +- man/reg_intervals.Rd | 10 +++++----- man/tidy.rsplit.Rd | 2 +- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/R/labels.R b/R/labels.R index 131bc635..becce98e 100644 --- a/R/labels.R +++ b/R/labels.R @@ -3,7 +3,7 @@ #' Produce a vector of resampling labels (e.g. "Fold1") from #' an `rset` object. Currently, [nested_cv()] is not supported. #' -#' @param object An `rset` object +#' @param object An `rset` object. #' @param make_factor A logical for whether the results should be #' a character or a factor. #' @param ... Not currently used. @@ -67,7 +67,7 @@ labels.rsplit <- function(object, ...) { #' For a data set, `add_resample_id()` will add at least one new column that #' identifies which resample that the data came from. In most cases, a single #' column is added but for some resampling methods, two or more are added. -#' @param .data A data frame +#' @param .data A data frame. #' @param split A single `rset` object. #' @param dots A single logical: should the id columns be prefixed with a "." #' to avoid name conflicts with `.data`? diff --git a/R/printing.R b/R/printing.R index d182597c..b456c12e 100644 --- a/R/printing.R +++ b/R/printing.R @@ -1,4 +1,4 @@ -## The `pretty` methods below are good for when you need to +## The `pretty()` methods below are good for when you need to ## textually describe the resampling procedure. Note that they ## can have more than one element (in the case of nesting) diff --git a/R/reg_intervals.R b/R/reg_intervals.R index ee8bb3b4..e820ab77 100644 --- a/R/reg_intervals.R +++ b/R/reg_intervals.R @@ -2,13 +2,13 @@ #' #' @param formula An R model formula with one outcome and at least one predictor. #' @param data A data frame. -#' @param model_fn The model to fit. Allowable values are "lm", "glm", -#' "survreg", and "coxph". The latter two require that the `survival` package +#' @param model_fn The model to fit. Allowable values are `"lm"`, `"glm"`, +#' `"survreg"`, and `"coxph"`. The latter two require that the survival package #' be installed. -#' @param type The type of bootstrap confidence interval. Values of "student-t" and -#' "percentile" are allowed. +#' @param type The type of bootstrap confidence interval. Values of `"student-t"` and +#' `"percentile"` are allowed. #' @param times A single integer for the number of bootstrap samples. If left -#' NULL, 1,001 are used for t-intervals and 2,001 for percentile intervals. +#' `NULL`, 1,001 are used for t-intervals and 2,001 for percentile intervals. #' @param alpha Level of significance. #' @param filter A logical expression used to remove rows from the final result, or `NULL` to keep all rows. #' @param keep_reps Should the individual parameter estimates for each bootstrap diff --git a/R/rsplit.R b/R/rsplit.R index 1bc38128..394145a4 100644 --- a/R/rsplit.R +++ b/R/rsplit.R @@ -71,7 +71,7 @@ as.integer.rsplit <- #' @param x An `rsplit` object. #' @param row.names `NULL` or a character vector giving the row names for the data frame. Missing values are not allowed. #' @param optional A logical: should the column names of the data be checked for legality? -#' @param data Either "analysis" or "assessment" to specify which data are returned. +#' @param data Either `"analysis"` or `"assessment"` to specify which data are returned. #' @param ... Not currently used. #' @examples #' library(dplyr) diff --git a/R/tidy.R b/R/tidy.R index 60a5c6a2..39c73ba1 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -3,7 +3,7 @@ #' The `tidy()` function from the \pkg{broom} package can be used on `rset` and #' `rsplit` objects to generate tibbles with which rows are in the analysis and #' assessment sets. -#' @param x A `rset` or `rsplit` object +#' @param x A `rset` or `rsplit` object #' @param unique_ind Should unique row identifiers be returned? For example, #' if `FALSE` then bootstrapping results will include multiple rows in the #' sample for the same row in the original data. diff --git a/man/add_resample_id.Rd b/man/add_resample_id.Rd index a1d6450a..5b73deb1 100644 --- a/man/add_resample_id.Rd +++ b/man/add_resample_id.Rd @@ -7,7 +7,7 @@ add_resample_id(.data, split, dots = FALSE) } \arguments{ -\item{.data}{A data frame} +\item{.data}{A data frame.} \item{split}{A single \code{rset} object.} diff --git a/man/as.data.frame.rsplit.Rd b/man/as.data.frame.rsplit.Rd index 6de9ad42..1ac6b7cb 100644 --- a/man/as.data.frame.rsplit.Rd +++ b/man/as.data.frame.rsplit.Rd @@ -31,7 +31,7 @@ assessment(x, ...) \item{optional}{A logical: should the column names of the data be checked for legality?} -\item{data}{Either "analysis" or "assessment" to specify which data are returned.} +\item{data}{Either \code{"analysis"} or \code{"assessment"} to specify which data are returned.} \item{...}{Not currently used.} } diff --git a/man/labels.rset.Rd b/man/labels.rset.Rd index 96dff7d1..0f314296 100644 --- a/man/labels.rset.Rd +++ b/man/labels.rset.Rd @@ -10,7 +10,7 @@ \method{labels}{vfold_cv}(object, make_factor = FALSE, ...) } \arguments{ -\item{object}{An \code{rset} object} +\item{object}{An \code{rset} object.} \item{make_factor}{A logical for whether the results should be a character or a factor.} diff --git a/man/reg_intervals.Rd b/man/reg_intervals.Rd index 23a96fe2..afc386e9 100644 --- a/man/reg_intervals.Rd +++ b/man/reg_intervals.Rd @@ -21,15 +21,15 @@ reg_intervals( \item{data}{A data frame.} -\item{model_fn}{The model to fit. Allowable values are "lm", "glm", -"survreg", and "coxph". The latter two require that the \code{survival} package +\item{model_fn}{The model to fit. Allowable values are \code{"lm"}, \code{"glm"}, +\code{"survreg"}, and \code{"coxph"}. The latter two require that the survival package be installed.} -\item{type}{The type of bootstrap confidence interval. Values of "student-t" and -"percentile" are allowed.} +\item{type}{The type of bootstrap confidence interval. Values of \code{"student-t"} and +\code{"percentile"} are allowed.} \item{times}{A single integer for the number of bootstrap samples. If left -NULL, 1,001 are used for t-intervals and 2,001 for percentile intervals.} +\code{NULL}, 1,001 are used for t-intervals and 2,001 for percentile intervals.} \item{alpha}{Level of significance.} diff --git a/man/tidy.rsplit.Rd b/man/tidy.rsplit.Rd index eca6c81b..3f2bec7f 100644 --- a/man/tidy.rsplit.Rd +++ b/man/tidy.rsplit.Rd @@ -16,7 +16,7 @@ \method{tidy}{nested_cv}(x, unique_ind = TRUE, ...) } \arguments{ -\item{x}{A \code{rset} or \code{rsplit} object} +\item{x}{A \code{rset} or \code{rsplit} object} \item{unique_ind}{Should unique row identifiers be returned? For example, if \code{FALSE} then bootstrapping results will include multiple rows in the From 6b0b52c3b3c1cadf142b5cd370c1f1f81062e844 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:51:51 +0100 Subject: [PATCH 6/7] doc leftover --- man/group_bootstraps.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/group_bootstraps.Rd b/man/group_bootstraps.Rd index e93dccd3..9a3c7878 100644 --- a/man/group_bootstraps.Rd +++ b/man/group_bootstraps.Rd @@ -25,7 +25,7 @@ assessment set within a fold.} \item{apparent}{A logical. Should an extra resample be added where the analysis and holdout subset are the entire data set. This is required for -some estimators used by the \code{summary()} function that require the apparent +some estimators used by the \code{\link[=summary]{summary()}} function that require the apparent error rate.} \item{...}{These dots are for future extensions and must be empty.} From 8fdd408ba791c8105b5f938d478c23cba5a5b38c Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 4 Sep 2024 16:52:16 +0100 Subject: [PATCH 7/7] Add acknowledgement --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index d5814b8f..ed7ab225 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ * Formatting improvement: package names are now not in backticks anymore (@agmurray, #525). +* Improved documentation and formatting: function names are now more easily identifiable through either `()` at the end or being links to the function documentation (@brshallo , #521). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471).