diff --git a/.Rbuildignore b/.Rbuildignore index 11b5480..944b667 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,7 +5,6 @@ ^\.travis.yml ^\_pkgdown.yml ^LICENSE -^data/. ^vignettes/. ^logo.png ^BioMathlogo.svg diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..ae0acc2 --- /dev/null +++ b/R/data.R @@ -0,0 +1,8 @@ +#' 5002 publications on diabetes prevalence +#' +#' A dataset containing 5002 publications exported from a Citavi project. +#' +#' @format A data frame with 5002 rows and 25 variables. +#' @usage data(diabetesprevalence) +"diabetesprevalence" + diff --git a/R/detect_language.R b/R/detect_language.R index 8d14532..440dbc9 100644 --- a/R/detect_language.R +++ b/R/detect_language.R @@ -12,14 +12,15 @@ #' #' @details #' `r lifecycle::badge("experimental")` \cr -#' Currently this only works for files that were generated while Citavi -#' was set to "English" so that column names are "Short Title" etc. +#' The underyling core function determining the language is \code{textcat::textcat()}. #' #' @examples -#' path <- example_xlsx("3dupsin5refs.xlsx") -#' read_Citavi_xlsx(path) %>% -#' detect_language() %>% -#' dplyr::select(Title, Abstract, det_lang, det_lang_wanted) +#' CitDat <- CitaviR::diabetesprevalence %>% +#' dplyr::slice(1952:1955, 4390:4393) +#' +#' CitDat %>% +#' detect_language() %>% +#' dplyr::select(Abstract, det_lang, det_lang_wanted) #' #' @return A tibble containing at least one additional column: \code{det_lang}. #' @importFrom textcat textcat @@ -29,6 +30,18 @@ #' detect_language <- function (CitDat, fieldsToDetectIn = c("Abstract"), wantedLanguage = c("english")) { + # stop if empty arguments ------------------------------------------- + if (is.null(all_of(fieldsToDetectIn))) { + stop("'fieldsToDetectIn' must not be NULL/NA.") + } + + + # check if fieldsToDetectIn are present ----------------------------------- + if (!all_of(fieldsToDetectIn %in% names(CitDat))) { + stop(paste("Could not be found in dataset column names:\n", + fieldsToDetectIn[fieldsToDetectIn %not_in% names(CitDat)])) + } + # collapse fieldsToDetectIn ----------------------------------------------- CitDat <- CitDat %>% diff --git a/_pkgdown.yml b/_pkgdown.yml index 2a5906b..a7a265f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -51,7 +51,8 @@ reference: - read_Citavi_xlsx - write_Citavi_xlsx -- title: Utility +- title: Utility & Data contents: - '`%not_in%`' - example_xlsx + - diabetesprevalence diff --git a/data-raw/diabetesprevalence.R b/data-raw/diabetesprevalence.R new file mode 100644 index 0000000..7365d8c --- /dev/null +++ b/data-raw/diabetesprevalence.R @@ -0,0 +1,12 @@ +suppressWarnings( + diabetesprevalence <- + CitaviR::read_Citavi_xlsx( + path = "data-raw/diabetesprevalence.xlsx", + keepMarksCols = TRUE, + useYearDerived = TRUE, + setSuggestedColOrder = TRUE, + setSuggestedColTypes = TRUE + ) +) + +usethis::use_data(diabetesprevalence, overwrite = TRUE, internal = FALSE) diff --git a/data-raw/diabetesprevalence.xlsx b/data-raw/diabetesprevalence.xlsx new file mode 100644 index 0000000..eb50cc5 Binary files /dev/null and b/data-raw/diabetesprevalence.xlsx differ diff --git a/data/diabetesprevalence.rda b/data/diabetesprevalence.rda new file mode 100644 index 0000000..86bc5c8 Binary files /dev/null and b/data/diabetesprevalence.rda differ diff --git a/docs/reference/detect_language.html b/docs/reference/detect_language.html index 479b3e9..221872b 100644 --- a/docs/reference/detect_language.html +++ b/docs/reference/detect_language.html @@ -165,22 +165,26 @@
-Currently this only works for files that were generated while Citavi
-was set to "English" so that column names are "Short Title" etc.
textcat::textcat()
.
path <- example_xlsx("3dupsin5refs.xlsx") -read_Citavi_xlsx(path) %>% - detect_language() %>% - dplyr::select(Title, Abstract, det_lang, det_lang_wanted) -#> # A tibble: 5 x 4 -#> Title Abstract det_lang det_lang_wanted -#> <chr> <chr> <chr> <lgl> -#> 1 Estimating broad-sense h~ Broad-sense heritability i~ english TRUE -#> 2 Heritability in plant br~ In plant breeding, heritab~ english TRUE -#> 3 Heritability in Plant Br~ In plant breeding, heritab~ english TRUE -#> 4 Hritability in Plant Bre~ In plant breeding, heritab~ english TRUE -#> 5 More, Larger, Simpler: H~ Traditionally, cultivar ev~ english TRUE+CitDat <- CitaviR::diabetesprevalence %>% + dplyr::slice(1952:1955, 4390:4393) + +CitDat %>% + detect_language() %>% + dplyr::select(Abstract, det_lang, det_lang_wanted) +#> # A tibble: 8 x 3 +#> Abstract det_lang det_lang_wanted +#> <chr> <chr> <lgl> +#> 1 Diabetes mellitus is an important chronic disease ca~ english TRUE +#> 2 BACKGROUND: Cardiovascular diseases are among the mo~ english TRUE +#> 3 BAKGRUNN: Diabetes er assosiert med koronarsykdom, o~ danish FALSE +#> 4 Background: Diabetes is a risk factor for infection ~ english TRUE +#> 5 INTRODUCCIÓN: Las enfermedades que motivan hospitali~ spanish FALSE +#> 6 Type 2 diabetes mellitus prevalence has increased wo~ english TRUE +#> 7 INTRODUCTION: Agenesis of the dorsal pancreas (ADP) ~ english TRUE +#> 8 INTRODUCTION: Diabetic retinopathy (DR) is the leadi~ english TRUE