From 7c3368445002eb72ebf1622651636092a8c42ca3 Mon Sep 17 00:00:00 2001 From: EhrmannS Date: Tue, 13 Feb 2024 02:24:26 +0100 Subject: [PATCH] include datype (data type) and change the previous 'type' to vartype --- NAMESPACE | 1 + R/getClusterVar.R | 2 +- R/getIDVars.R | 2 +- R/getObsVars.R | 4 ++-- R/helpers.R | 13 +++++++++++-- R/schema.R | 38 ++++++++++++++++++++++++-------------- R/setIDVar.R | 46 ++++++++++++++++++++++++++++++++-------------- R/setObsVar.R | 27 +++++++++++++++++++++------ R/validateSchema.R | 8 ++++---- 9 files changed, 97 insertions(+), 44 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 754fc6a..050d428 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,7 @@ importFrom(dplyr,across) importFrom(dplyr,arrange_at) importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,full_join) diff --git a/R/getClusterVar.R b/R/getClusterVar.R index 15d34d5..3520ebc 100644 --- a/R/getClusterVar.R +++ b/R/getClusterVar.R @@ -37,7 +37,7 @@ getClusterVar <- function(schema = NULL, input = NULL){ listedObs <- map(.x = seq_along(variables), .f = function(ix){ theVar <- variables[[ix]] - if(theVar$type == "observed"){ + if(theVar$vartype == "observed"){ is.numeric(theVar$key) } }) diff --git a/R/getIDVars.R b/R/getIDVars.R index 85d8529..737ba3d 100644 --- a/R/getIDVars.R +++ b/R/getIDVars.R @@ -40,7 +40,7 @@ getIDVars <- function(schema = NULL, input = NULL){ idVars <- map(.x = seq_along(variables), .f = function(ix){ # unselect those id variables that are also cluster or group id - if(variables[[ix]]$type == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){ + if(variables[[ix]]$vartype == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){ variables[ix] } }) diff --git a/R/getObsVars.R b/R/getObsVars.R index 43db16f..42f8810 100644 --- a/R/getObsVars.R +++ b/R/getObsVars.R @@ -36,7 +36,7 @@ getObsVars <- function(schema = NULL, input = NULL){ filter <- schema@filter obsVars <- map(.x = seq_along(variables), .f = function(ix){ - if(variables[[ix]]$type == "observed"){ + if(variables[[ix]]$vartype == "observed"){ variables[ix] } }) @@ -45,7 +45,7 @@ getObsVars <- function(schema = NULL, input = NULL){ # if there are listed observed variables, act as if they were clusters listedObs <- map(.x = seq_along(variables), .f = function(ix){ theVar <- variables[[ix]] - if(theVar$type == "observed"){ + if(theVar$vartype == "observed"){ if(is.numeric(theVar$key) | is.list(theVar$key)){ if(!any(0 %in% theVar$key)){ c(theVar$key, theVar$col) diff --git a/R/helpers.R b/R/helpers.R index 7534378..e66b93e 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -29,13 +29,20 @@ } idVars <- map(.x = seq_along(variables), .f = function(ix){ - if(variables[[ix]]$type == "id"){ + if(variables[[ix]]$vartype == "id"){ variables[ix] } }) idVars <- unlist(idVars, recursive = FALSE) + + for(i in seq_along(idVars)){ + # set the desired type + class(input[[which(names(idVars)[i] == names(input))]]) <- idVars[[i]]$datype + + } + obsVars <- map(.x = seq_along(variables), .f = function(ix){ - if(variables[[ix]]$type == "observed"){ + if(variables[[ix]]$vartype == "observed"){ variables[ix] } }) @@ -107,6 +114,8 @@ input[[which(names(obsVars)[i] == names(input))]] <- unlist(theVar) + # ... also set the desired type + class(input[[which(names(obsVars)[i] == names(input))]]) <- obsVars[[i]]$datype } if(length(format$flags$flag) != 0){ diff --git a/R/schema.R b/R/schema.R index c498d8b..74fc2a5 100755 --- a/R/schema.R +++ b/R/schema.R @@ -246,20 +246,25 @@ setValidity(Class = "schema", function(object){ theVariable <- object@variables[[i]] theName <- names(object@variables)[i] - if(!theVariable$type %in% c("id", "observed")){ + if(!theVariable$vartype %in% c("id", "observed")){ errors <- c(errors, paste0("the variables '", theName, "' does must be of type 'id' or 'observed'.")) return(paste0("\n", errors)) } - if(theVariable$type == "id"){ - if(!all(names(theVariable) %in% c("type", "value", "row", "col", "split", "dist", "merge"))){ - errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {type,value,row,col,split,merge,dist}")) + if(theVariable$vartype == "id"){ + if(!all(names(theVariable) %in% c("vartype", "datype", "value", "row", "col", "split", "dist", "merge"))){ + errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {vartype,datype,value,row,col,split,merge,dist}")) } - if(!is.null(theVariable$value)){ - if(!is.character(theVariable$value)){ - errors <- c(errors, paste0("'", theName, "$value' must have a character value.")) + if(!is.null(theVariable$datype)){ + if(!is.character(theVariable$datype)){ + errors <- c(errors, paste0("'", theName, "$datype' must have a character value.")) } } + # if(!is.null(theVariable$value)){ + # if(!is.character(theVariable$value)){ + # errors <- c(errors, paste0("'", theName, "$value' must have a character value.")) + # } + # } if(!is.null(theVariable$split)){ if(!is.character(theVariable$split)){ errors <- c(errors, paste0("'", theName, "$split' must have a character value.")) @@ -280,8 +285,13 @@ setValidity(Class = "schema", function(object){ } } else { - if(!all(names(theVariable) %in% c("type", "factor", "row", "col", "dist", "key", "value"))){ - errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {type,factor,row,col,dist,key,value}")) + if(!all(names(theVariable) %in% c("vartype", "datype", "factor", "row", "col", "dist", "key", "value"))){ + errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {vartype,datype,factor,row,col,dist,key,value}")) + } + if(!is.null(theVariable$datype)){ + if(!is.character(theVariable$datype)){ + errors <- c(errors, paste0("'", theName, "$datype' must have a character value.")) + } } if(!is.null(theVariable$factor)){ if(!is.numeric(theVariable$factor)){ @@ -399,12 +409,12 @@ setMethod(f = "show", }) maxNames <- ifelse(any(nNames > 8), max(nNames), 8) theTypes <- sapply(seq_along(variables), function(x){ - variables[[x]]$type + variables[[x]]$vartype }) # rows theRows <- sapply(seq_along(variables), function(x){ - if(variables[[x]]$type == "id"){ + if(variables[[x]]$vartype == "id"){ if(is.null(variables[[x]]$row)){ "" } else if(is.list(variables[[x]]$row)){ @@ -437,7 +447,7 @@ setMethod(f = "show", } theTops <- sapply(seq_along(variables), function(x){ - if(variables[[x]]$type == "observed"){ + if(variables[[x]]$vartype == "observed"){ if(is.null(variables[[x]]$row)){ "" } else if(is.list(variables[[x]]$row)){ @@ -513,7 +523,7 @@ setMethod(f = "show", # keys theKeys <- sapply(seq_along(variables), function(x){ - if(variables[[x]]$type == "id"){ + if(variables[[x]]$vartype == "id"){ NULL } else { if(!is.null(variables[[x]]$key)){ @@ -540,7 +550,7 @@ setMethod(f = "show", # values theValues <- sapply(seq_along(variables), function(x){ - if(variables[[x]]$type == "id"){ + if(variables[[x]]$vartype == "id"){ NULL } else { if(!is.null(variables[[x]]$value)){ diff --git a/R/setIDVar.R b/R/setIDVar.R index d0d8de9..860c501 100644 --- a/R/setIDVar.R +++ b/R/setIDVar.R @@ -7,6 +7,9 @@ #' already existing schema, provide that schema here (overwrites previous #' information). #' @param name [\code{character(1)}]\cr Name of the new identifying variable. +#' @param type [\code{character(1)}]\cr data type of the new identifying +#' variable. Possible values are \code{"c/character"}, \code{"i/integer"}, +#' \code{"n/numeric"}, \code{"l/logical"}, \code{"D/date"} or \code{"_/skip"}. #' @param value [\code{character(1)}]\cr In case the variable is an implicit #' variable (i.e., which is not in the origin table), specify it here. #' @param columns [\code{integerish(.)}]\cr The column(s) in which the @@ -33,11 +36,12 @@ #' @family functions to describe table arrangement #' @importFrom checkmate assertClass assertCharacter assertLogical #' testIntegerish testList +#' @importFrom dplyr case_when #' @export -setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL, - rows = NULL, split = NULL, merge = NULL, - distinct = FALSE){ +setIDVar <- function(schema = NULL, name = NULL, type = "character", + value = NULL, columns = NULL, rows = NULL, split = NULL, + merge = NULL, distinct = FALSE){ # assertions ---- assertClass(x = schema, classes = "schema", null.ok = TRUE) @@ -50,23 +54,36 @@ setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL, rowList <- testList(x = rows, len = 1) assert(rowInt, rowList) if(rowList) assertSubset(x = names(rows), choices = c("find")) - assertCharacter(x = value, len = 1, any.missing = FALSE, null.ok = TRUE) assertCharacter(x = split, len = 1, any.missing = FALSE, null.ok = TRUE) assertCharacter(x = merge, len = 1, any.missing = FALSE, null.ok = TRUE) assertLogical(x = distinct, any.missing = FALSE, len = 1) + data_type <- case_when( + type %in% c("i", "integer") ~ "integer", + type %in% c("n", "numeric") ~ "numeric", + type %in% c("l", "logical") ~ "logical", + type %in% c("D", "Date") ~ "Date", + type %in% c("_", "skip") ~ "skip", + .default = "character" + ) + + # if type-check should be skipped, don't assert class + if(data_type != "skip" & !is.null(value)){ + assertClass(x = value, classes = data_type) + } + if(is.null(schema)){ schema <- schema_default } - nClusters <- max(lengths(schema@clusters)) - if(nClusters == 0) nClusters <- 1 - prevIDcols <- unlist(lapply(seq_along(schema@variables), function(x){ - if(schema@variables[[x]]$typ == "id"){ - if(is.null(schema@variables[[x]]$row)){ - schema@variables[[x]]$col - } - } - })) + # nClusters <- max(lengths(schema@clusters)) + # if(nClusters == 0) nClusters <- 1 + # prevIDcols <- unlist(lapply(seq_along(schema@variables), function(x){ + # if(schema@variables[[x]]$vartype == "id"){ + # if(is.null(schema@variables[[x]]$row)){ + # schema@variables[[x]]$col + # } + # } + # })) # error management ---- # if(!is.null(columns)){ @@ -143,7 +160,8 @@ setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL, # update schema ---- - temp <- list(type = "id", + temp <- list(vartype = "id", + datype = data_type, value = value, col = columns, row = rows, diff --git a/R/setObsVar.R b/R/setObsVar.R index 3b4a831..5a0e3eb 100644 --- a/R/setObsVar.R +++ b/R/setObsVar.R @@ -7,7 +7,10 @@ #' @param schema [\code{schema(1)}]\cr In case this information is added to an #' already existing schema, provide that schema here (overwrites previous #' information). -#' @param name [\code{character(1)}]\cr Name of the new measured variable. +#' @param name [\code{character(1)}]\cr Name of the new observed variable. +#' @param type [\code{character(1)}]\cr data type of the new observed +#' variable. Possible values are \code{"c/character"}, \code{"i/integer"}, +#' \code{"n/numeric"}, \code{"l/logical"}, \code{"D/date"} or \code{"_/skip"}. #' @param columns [\code{integerish(.)}]\cr The column(s) in which the #' \emph{values} of the new variable are recorded. #' @param top [\code{integerish(.)}]\cr In case the variable is nested in a wide @@ -37,10 +40,12 @@ #' @family functions to describe table arrangement #' @importFrom checkmate assertClass assertIntegerish assertLogical assertSubset #' assertCharacter assertNumeric testIntegerish testCharacter assert +#' @importFrom dplyr case_when #' @export -setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL, - distinct = FALSE, factor = 1, key = NULL, value = NULL){ +setObsVar <- function(schema = NULL, name = NULL, type = "numeric", + columns = NULL, top = NULL, distinct = FALSE, factor = 1, + key = NULL, value = NULL){ # assertions ---- assertClass(x = schema, classes = "schema", null.ok = TRUE) @@ -59,11 +64,20 @@ setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL, assertSubset(x = key, choices = "cluster", empty.ok = FALSE) } + data_type <- case_when( + type %in% c("i", "integer") ~ "integer", + type %in% c("n", "numeric") ~ "numeric", + type %in% c("l", "logical") ~ "logical", + type %in% c("D", "Date") ~ "Date", + type %in% c("_", "skip") ~ "skip", + .default = "character" + ) + if(is.null(schema)){ schema <- schema_default } - nClusters <- max(lengths(schema@clusters)) - if(nClusters == 0) nClusters <- 1 + # nClusters <- max(lengths(schema@clusters)) + # if(nClusters == 0) nClusters <- 1 # error management ---- @@ -72,7 +86,8 @@ setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL, # update schema ---- - temp <- list(type = "observed", + temp <- list(vartype = "observed", + datype = data_type, col = columns, row = top, dist = distinct, diff --git a/R/validateSchema.R b/R/validateSchema.R index a71f087..42a86e5 100644 --- a/R/validateSchema.R +++ b/R/validateSchema.R @@ -115,7 +115,7 @@ validateSchema <- function(schema = NULL, input = NULL){ tempName <- names(variables)[ix] if(!tempName %in% c(groupID, clusterID)){ temp <- variables[[ix]] - if(temp$type == "observed"){ + if(temp$vartype == "observed"){ temp$row } else { NULL @@ -146,7 +146,7 @@ validateSchema <- function(schema = NULL, input = NULL){ } # check whether the variable is wide ---- - if(varProp$type == "observed"){ + if(varProp$vartype == "observed"){ isWide <- map_lgl(.x = seq_along(idCols), function(ix){ if(length(varProp$col) == length(idCols[[ix]])){ all(varProp$col == idCols[[ix]]) @@ -162,7 +162,7 @@ validateSchema <- function(schema = NULL, input = NULL){ # figure out which rows to filter out if(!varProp$dist & !varName %in% c(groupID, clusterID)){ - if(varProp$type == "observed"){ + if(varProp$vartype == "observed"){ if(is.null(varProp$row)){ if(is.null(varProp$key)){ varProp$row <- clusters$row @@ -179,7 +179,7 @@ validateSchema <- function(schema = NULL, input = NULL){ } } - if(varProp$type == "id"){ + if(varProp$vartype == "id"){ if(!is.null(varProp$val)){ varProp$dist <- TRUE }