Skip to content

Commit

Permalink
include datype (data type) and change the previous 'type' to vartype
Browse files Browse the repository at this point in the history
  • Loading branch information
EhrmannS committed Feb 13, 2024
1 parent 76915f9 commit 7c33684
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 44 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ importFrom(dplyr,across)
importFrom(dplyr,arrange_at)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
Expand Down
2 changes: 1 addition & 1 deletion R/getClusterVar.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ getClusterVar <- function(schema = NULL, input = NULL){

listedObs <- map(.x = seq_along(variables), .f = function(ix){
theVar <- variables[[ix]]
if(theVar$type == "observed"){
if(theVar$vartype == "observed"){
is.numeric(theVar$key)
}
})
Expand Down
2 changes: 1 addition & 1 deletion R/getIDVars.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ getIDVars <- function(schema = NULL, input = NULL){

idVars <- map(.x = seq_along(variables), .f = function(ix){
# unselect those id variables that are also cluster or group id
if(variables[[ix]]$type == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
if(variables[[ix]]$vartype == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
variables[ix]
}
})
Expand Down
4 changes: 2 additions & 2 deletions R/getObsVars.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ getObsVars <- function(schema = NULL, input = NULL){
filter <- schema@filter

obsVars <- map(.x = seq_along(variables), .f = function(ix){
if(variables[[ix]]$type == "observed"){
if(variables[[ix]]$vartype == "observed"){
variables[ix]
}
})
Expand All @@ -45,7 +45,7 @@ getObsVars <- function(schema = NULL, input = NULL){
# if there are listed observed variables, act as if they were clusters
listedObs <- map(.x = seq_along(variables), .f = function(ix){
theVar <- variables[[ix]]
if(theVar$type == "observed"){
if(theVar$vartype == "observed"){
if(is.numeric(theVar$key) | is.list(theVar$key)){
if(!any(0 %in% theVar$key)){
c(theVar$key, theVar$col)
Expand Down
13 changes: 11 additions & 2 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,20 @@
}

idVars <- map(.x = seq_along(variables), .f = function(ix){
if(variables[[ix]]$type == "id"){
if(variables[[ix]]$vartype == "id"){
variables[ix]
}
})
idVars <- unlist(idVars, recursive = FALSE)

for(i in seq_along(idVars)){
# set the desired type
class(input[[which(names(idVars)[i] == names(input))]]) <- idVars[[i]]$datype

}

obsVars <- map(.x = seq_along(variables), .f = function(ix){
if(variables[[ix]]$type == "observed"){
if(variables[[ix]]$vartype == "observed"){
variables[ix]
}
})
Expand Down Expand Up @@ -107,6 +114,8 @@

input[[which(names(obsVars)[i] == names(input))]] <- unlist(theVar)

# ... also set the desired type
class(input[[which(names(obsVars)[i] == names(input))]]) <- obsVars[[i]]$datype
}

if(length(format$flags$flag) != 0){
Expand Down
38 changes: 24 additions & 14 deletions R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -246,20 +246,25 @@ setValidity(Class = "schema", function(object){
theVariable <- object@variables[[i]]
theName <- names(object@variables)[i]

if(!theVariable$type %in% c("id", "observed")){
if(!theVariable$vartype %in% c("id", "observed")){
errors <- c(errors, paste0("the variables '", theName, "' does must be of type 'id' or 'observed'."))
return(paste0("\n", errors))
}

if(theVariable$type == "id"){
if(!all(names(theVariable) %in% c("type", "value", "row", "col", "split", "dist", "merge"))){
errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {type,value,row,col,split,merge,dist}"))
if(theVariable$vartype == "id"){
if(!all(names(theVariable) %in% c("vartype", "datype", "value", "row", "col", "split", "dist", "merge"))){
errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {vartype,datype,value,row,col,split,merge,dist}"))
}
if(!is.null(theVariable$value)){
if(!is.character(theVariable$value)){
errors <- c(errors, paste0("'", theName, "$value' must have a character value."))
if(!is.null(theVariable$datype)){
if(!is.character(theVariable$datype)){
errors <- c(errors, paste0("'", theName, "$datype' must have a character value."))
}
}
# if(!is.null(theVariable$value)){
# if(!is.character(theVariable$value)){
# errors <- c(errors, paste0("'", theName, "$value' must have a character value."))
# }
# }
if(!is.null(theVariable$split)){
if(!is.character(theVariable$split)){
errors <- c(errors, paste0("'", theName, "$split' must have a character value."))
Expand All @@ -280,8 +285,13 @@ setValidity(Class = "schema", function(object){
}

} else {
if(!all(names(theVariable) %in% c("type", "factor", "row", "col", "dist", "key", "value"))){
errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {type,factor,row,col,dist,key,value}"))
if(!all(names(theVariable) %in% c("vartype", "datype", "factor", "row", "col", "dist", "key", "value"))){
errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {vartype,datype,factor,row,col,dist,key,value}"))
}
if(!is.null(theVariable$datype)){
if(!is.character(theVariable$datype)){
errors <- c(errors, paste0("'", theName, "$datype' must have a character value."))
}
}
if(!is.null(theVariable$factor)){
if(!is.numeric(theVariable$factor)){
Expand Down Expand Up @@ -399,12 +409,12 @@ setMethod(f = "show",
})
maxNames <- ifelse(any(nNames > 8), max(nNames), 8)
theTypes <- sapply(seq_along(variables), function(x){
variables[[x]]$type
variables[[x]]$vartype
})

# rows
theRows <- sapply(seq_along(variables), function(x){
if(variables[[x]]$type == "id"){
if(variables[[x]]$vartype == "id"){
if(is.null(variables[[x]]$row)){
""
} else if(is.list(variables[[x]]$row)){
Expand Down Expand Up @@ -437,7 +447,7 @@ setMethod(f = "show",
}

theTops <- sapply(seq_along(variables), function(x){
if(variables[[x]]$type == "observed"){
if(variables[[x]]$vartype == "observed"){
if(is.null(variables[[x]]$row)){
""
} else if(is.list(variables[[x]]$row)){
Expand Down Expand Up @@ -513,7 +523,7 @@ setMethod(f = "show",

# keys
theKeys <- sapply(seq_along(variables), function(x){
if(variables[[x]]$type == "id"){
if(variables[[x]]$vartype == "id"){
NULL
} else {
if(!is.null(variables[[x]]$key)){
Expand All @@ -540,7 +550,7 @@ setMethod(f = "show",

# values
theValues <- sapply(seq_along(variables), function(x){
if(variables[[x]]$type == "id"){
if(variables[[x]]$vartype == "id"){
NULL
} else {
if(!is.null(variables[[x]]$value)){
Expand Down
46 changes: 32 additions & 14 deletions R/setIDVar.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#' already existing schema, provide that schema here (overwrites previous
#' information).
#' @param name [\code{character(1)}]\cr Name of the new identifying variable.
#' @param type [\code{character(1)}]\cr data type of the new identifying
#' variable. Possible values are \code{"c/character"}, \code{"i/integer"},
#' \code{"n/numeric"}, \code{"l/logical"}, \code{"D/date"} or \code{"_/skip"}.
#' @param value [\code{character(1)}]\cr In case the variable is an implicit
#' variable (i.e., which is not in the origin table), specify it here.
#' @param columns [\code{integerish(.)}]\cr The column(s) in which the
Expand All @@ -33,11 +36,12 @@
#' @family functions to describe table arrangement
#' @importFrom checkmate assertClass assertCharacter assertLogical
#' testIntegerish testList
#' @importFrom dplyr case_when
#' @export

setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL,
rows = NULL, split = NULL, merge = NULL,
distinct = FALSE){
setIDVar <- function(schema = NULL, name = NULL, type = "character",
value = NULL, columns = NULL, rows = NULL, split = NULL,
merge = NULL, distinct = FALSE){

# assertions ----
assertClass(x = schema, classes = "schema", null.ok = TRUE)
Expand All @@ -50,23 +54,36 @@ setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL,
rowList <- testList(x = rows, len = 1)
assert(rowInt, rowList)
if(rowList) assertSubset(x = names(rows), choices = c("find"))
assertCharacter(x = value, len = 1, any.missing = FALSE, null.ok = TRUE)
assertCharacter(x = split, len = 1, any.missing = FALSE, null.ok = TRUE)
assertCharacter(x = merge, len = 1, any.missing = FALSE, null.ok = TRUE)
assertLogical(x = distinct, any.missing = FALSE, len = 1)

data_type <- case_when(
type %in% c("i", "integer") ~ "integer",
type %in% c("n", "numeric") ~ "numeric",
type %in% c("l", "logical") ~ "logical",
type %in% c("D", "Date") ~ "Date",
type %in% c("_", "skip") ~ "skip",
.default = "character"
)

# if type-check should be skipped, don't assert class
if(data_type != "skip" & !is.null(value)){
assertClass(x = value, classes = data_type)
}

if(is.null(schema)){
schema <- schema_default
}
nClusters <- max(lengths(schema@clusters))
if(nClusters == 0) nClusters <- 1
prevIDcols <- unlist(lapply(seq_along(schema@variables), function(x){
if(schema@variables[[x]]$typ == "id"){
if(is.null(schema@variables[[x]]$row)){
schema@variables[[x]]$col
}
}
}))
# nClusters <- max(lengths(schema@clusters))
# if(nClusters == 0) nClusters <- 1
# prevIDcols <- unlist(lapply(seq_along(schema@variables), function(x){
# if(schema@variables[[x]]$vartype == "id"){
# if(is.null(schema@variables[[x]]$row)){
# schema@variables[[x]]$col
# }
# }
# }))

# error management ----
# if(!is.null(columns)){
Expand Down Expand Up @@ -143,7 +160,8 @@ setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL,


# update schema ----
temp <- list(type = "id",
temp <- list(vartype = "id",
datype = data_type,
value = value,
col = columns,
row = rows,
Expand Down
27 changes: 21 additions & 6 deletions R/setObsVar.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
#' @param schema [\code{schema(1)}]\cr In case this information is added to an
#' already existing schema, provide that schema here (overwrites previous
#' information).
#' @param name [\code{character(1)}]\cr Name of the new measured variable.
#' @param name [\code{character(1)}]\cr Name of the new observed variable.
#' @param type [\code{character(1)}]\cr data type of the new observed
#' variable. Possible values are \code{"c/character"}, \code{"i/integer"},
#' \code{"n/numeric"}, \code{"l/logical"}, \code{"D/date"} or \code{"_/skip"}.
#' @param columns [\code{integerish(.)}]\cr The column(s) in which the
#' \emph{values} of the new variable are recorded.
#' @param top [\code{integerish(.)}]\cr In case the variable is nested in a wide
Expand Down Expand Up @@ -37,10 +40,12 @@
#' @family functions to describe table arrangement
#' @importFrom checkmate assertClass assertIntegerish assertLogical assertSubset
#' assertCharacter assertNumeric testIntegerish testCharacter assert
#' @importFrom dplyr case_when
#' @export

setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL,
distinct = FALSE, factor = 1, key = NULL, value = NULL){
setObsVar <- function(schema = NULL, name = NULL, type = "numeric",
columns = NULL, top = NULL, distinct = FALSE, factor = 1,
key = NULL, value = NULL){

# assertions ----
assertClass(x = schema, classes = "schema", null.ok = TRUE)
Expand All @@ -59,11 +64,20 @@ setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL,
assertSubset(x = key, choices = "cluster", empty.ok = FALSE)
}

data_type <- case_when(
type %in% c("i", "integer") ~ "integer",
type %in% c("n", "numeric") ~ "numeric",
type %in% c("l", "logical") ~ "logical",
type %in% c("D", "Date") ~ "Date",
type %in% c("_", "skip") ~ "skip",
.default = "character"
)

if(is.null(schema)){
schema <- schema_default
}
nClusters <- max(lengths(schema@clusters))
if(nClusters == 0) nClusters <- 1
# nClusters <- max(lengths(schema@clusters))
# if(nClusters == 0) nClusters <- 1

# error management ----

Expand All @@ -72,7 +86,8 @@ setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL,


# update schema ----
temp <- list(type = "observed",
temp <- list(vartype = "observed",
datype = data_type,
col = columns,
row = top,
dist = distinct,
Expand Down
8 changes: 4 additions & 4 deletions R/validateSchema.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ validateSchema <- function(schema = NULL, input = NULL){
tempName <- names(variables)[ix]
if(!tempName %in% c(groupID, clusterID)){
temp <- variables[[ix]]
if(temp$type == "observed"){
if(temp$vartype == "observed"){
temp$row
} else {
NULL
Expand Down Expand Up @@ -146,7 +146,7 @@ validateSchema <- function(schema = NULL, input = NULL){
}

# check whether the variable is wide ----
if(varProp$type == "observed"){
if(varProp$vartype == "observed"){
isWide <- map_lgl(.x = seq_along(idCols), function(ix){
if(length(varProp$col) == length(idCols[[ix]])){
all(varProp$col == idCols[[ix]])
Expand All @@ -162,7 +162,7 @@ validateSchema <- function(schema = NULL, input = NULL){

# figure out which rows to filter out
if(!varProp$dist & !varName %in% c(groupID, clusterID)){
if(varProp$type == "observed"){
if(varProp$vartype == "observed"){
if(is.null(varProp$row)){
if(is.null(varProp$key)){
varProp$row <- clusters$row
Expand All @@ -179,7 +179,7 @@ validateSchema <- function(schema = NULL, input = NULL){
}
}

if(varProp$type == "id"){
if(varProp$vartype == "id"){
if(!is.null(varProp$val)){
varProp$dist <- TRUE
}
Expand Down

0 comments on commit 7c33684

Please sign in to comment.