From 7c3368445002eb72ebf1622651636092a8c42ca3 Mon Sep 17 00:00:00 2001
From: EhrmannS <steffen.ehrmann@posteo.de>
Date: Tue, 13 Feb 2024 02:24:26 +0100
Subject: [PATCH] include datype (data type) and change the previous 'type' to
 vartype

---
 NAMESPACE          |  1 +
 R/getClusterVar.R  |  2 +-
 R/getIDVars.R      |  2 +-
 R/getObsVars.R     |  4 ++--
 R/helpers.R        | 13 +++++++++++--
 R/schema.R         | 38 ++++++++++++++++++++++++--------------
 R/setIDVar.R       | 46 ++++++++++++++++++++++++++++++++--------------
 R/setObsVar.R      | 27 +++++++++++++++++++++------
 R/validateSchema.R |  8 ++++----
 9 files changed, 97 insertions(+), 44 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 754fc6a..050d428 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -43,6 +43,7 @@ importFrom(dplyr,across)
 importFrom(dplyr,arrange_at)
 importFrom(dplyr,bind_cols)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,case_when)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
 importFrom(dplyr,full_join)
diff --git a/R/getClusterVar.R b/R/getClusterVar.R
index 15d34d5..3520ebc 100644
--- a/R/getClusterVar.R
+++ b/R/getClusterVar.R
@@ -37,7 +37,7 @@ getClusterVar <- function(schema = NULL, input = NULL){
 
   listedObs <- map(.x = seq_along(variables), .f = function(ix){
     theVar <- variables[[ix]]
-    if(theVar$type == "observed"){
+    if(theVar$vartype == "observed"){
       is.numeric(theVar$key)
     }
   })
diff --git a/R/getIDVars.R b/R/getIDVars.R
index 85d8529..737ba3d 100644
--- a/R/getIDVars.R
+++ b/R/getIDVars.R
@@ -40,7 +40,7 @@ getIDVars <- function(schema = NULL, input = NULL){
 
   idVars <- map(.x = seq_along(variables), .f = function(ix){
     # unselect those id variables that are also cluster or group id
-    if(variables[[ix]]$type == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
+    if(variables[[ix]]$vartype == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
       variables[ix]
     }
   })
diff --git a/R/getObsVars.R b/R/getObsVars.R
index 43db16f..42f8810 100644
--- a/R/getObsVars.R
+++ b/R/getObsVars.R
@@ -36,7 +36,7 @@ getObsVars <- function(schema = NULL, input = NULL){
   filter <- schema@filter
 
   obsVars <- map(.x = seq_along(variables), .f = function(ix){
-    if(variables[[ix]]$type == "observed"){
+    if(variables[[ix]]$vartype == "observed"){
       variables[ix]
     }
   })
@@ -45,7 +45,7 @@ getObsVars <- function(schema = NULL, input = NULL){
   # if there are listed observed variables, act as if they were clusters
   listedObs <- map(.x = seq_along(variables), .f = function(ix){
     theVar <- variables[[ix]]
-    if(theVar$type == "observed"){
+    if(theVar$vartype == "observed"){
       if(is.numeric(theVar$key) | is.list(theVar$key)){
         if(!any(0 %in% theVar$key)){
           c(theVar$key, theVar$col)
diff --git a/R/helpers.R b/R/helpers.R
index 7534378..e66b93e 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -29,13 +29,20 @@
   }
 
   idVars <- map(.x = seq_along(variables), .f = function(ix){
-    if(variables[[ix]]$type == "id"){
+    if(variables[[ix]]$vartype == "id"){
       variables[ix]
     }
   })
   idVars <- unlist(idVars, recursive = FALSE)
+
+  for(i in seq_along(idVars)){
+    # set the desired type
+    class(input[[which(names(idVars)[i] == names(input))]]) <- idVars[[i]]$datype
+
+  }
+
   obsVars <- map(.x = seq_along(variables), .f = function(ix){
-    if(variables[[ix]]$type == "observed"){
+    if(variables[[ix]]$vartype == "observed"){
       variables[ix]
     }
   })
@@ -107,6 +114,8 @@
 
     input[[which(names(obsVars)[i] == names(input))]] <- unlist(theVar)
 
+    # ... also set the desired type
+    class(input[[which(names(obsVars)[i] == names(input))]]) <- obsVars[[i]]$datype
   }
 
   if(length(format$flags$flag) != 0){
diff --git a/R/schema.R b/R/schema.R
index c498d8b..74fc2a5 100755
--- a/R/schema.R
+++ b/R/schema.R
@@ -246,20 +246,25 @@ setValidity(Class = "schema", function(object){
       theVariable <- object@variables[[i]]
       theName <- names(object@variables)[i]
 
-      if(!theVariable$type %in% c("id", "observed")){
+      if(!theVariable$vartype %in% c("id", "observed")){
         errors <- c(errors, paste0("the variables '", theName, "' does must be of type 'id' or 'observed'."))
         return(paste0("\n", errors))
       }
 
-      if(theVariable$type == "id"){
-        if(!all(names(theVariable) %in% c("type", "value", "row", "col", "split", "dist", "merge"))){
-          errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {type,value,row,col,split,merge,dist}"))
+      if(theVariable$vartype == "id"){
+        if(!all(names(theVariable) %in% c("vartype", "datype", "value", "row", "col", "split", "dist", "merge"))){
+          errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {vartype,datype,value,row,col,split,merge,dist}"))
         }
-        if(!is.null(theVariable$value)){
-          if(!is.character(theVariable$value)){
-            errors <- c(errors, paste0("'", theName, "$value' must have a character value."))
+        if(!is.null(theVariable$datype)){
+          if(!is.character(theVariable$datype)){
+            errors <- c(errors, paste0("'", theName, "$datype' must have a character value."))
           }
         }
+        # if(!is.null(theVariable$value)){
+        #   if(!is.character(theVariable$value)){
+        #     errors <- c(errors, paste0("'", theName, "$value' must have a character value."))
+        #   }
+        # }
         if(!is.null(theVariable$split)){
           if(!is.character(theVariable$split)){
             errors <- c(errors, paste0("'", theName, "$split' must have a character value."))
@@ -280,8 +285,13 @@ setValidity(Class = "schema", function(object){
         }
 
       } else {
-        if(!all(names(theVariable) %in% c("type", "factor", "row", "col", "dist", "key", "value"))){
-          errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {type,factor,row,col,dist,key,value}"))
+        if(!all(names(theVariable) %in% c("vartype", "datype", "factor", "row", "col", "dist", "key", "value"))){
+          errors <- c(errors, paste0("'names(", theName, ")' must be a permutation of set {vartype,datype,factor,row,col,dist,key,value}"))
+        }
+        if(!is.null(theVariable$datype)){
+          if(!is.character(theVariable$datype)){
+            errors <- c(errors, paste0("'", theName, "$datype' must have a character value."))
+          }
         }
         if(!is.null(theVariable$factor)){
           if(!is.numeric(theVariable$factor)){
@@ -399,12 +409,12 @@ setMethod(f = "show",
             })
             maxNames <- ifelse(any(nNames > 8), max(nNames), 8)
             theTypes <- sapply(seq_along(variables), function(x){
-              variables[[x]]$type
+              variables[[x]]$vartype
             })
 
             # rows
             theRows <- sapply(seq_along(variables), function(x){
-              if(variables[[x]]$type == "id"){
+              if(variables[[x]]$vartype == "id"){
                 if(is.null(variables[[x]]$row)){
                   ""
                 } else if(is.list(variables[[x]]$row)){
@@ -437,7 +447,7 @@ setMethod(f = "show",
             }
 
             theTops <- sapply(seq_along(variables), function(x){
-              if(variables[[x]]$type == "observed"){
+              if(variables[[x]]$vartype == "observed"){
                 if(is.null(variables[[x]]$row)){
                   ""
                 } else if(is.list(variables[[x]]$row)){
@@ -513,7 +523,7 @@ setMethod(f = "show",
 
             # keys
             theKeys <- sapply(seq_along(variables), function(x){
-              if(variables[[x]]$type == "id"){
+              if(variables[[x]]$vartype == "id"){
                 NULL
               } else {
                 if(!is.null(variables[[x]]$key)){
@@ -540,7 +550,7 @@ setMethod(f = "show",
 
             # values
             theValues <- sapply(seq_along(variables), function(x){
-              if(variables[[x]]$type == "id"){
+              if(variables[[x]]$vartype == "id"){
                 NULL
               } else {
                 if(!is.null(variables[[x]]$value)){
diff --git a/R/setIDVar.R b/R/setIDVar.R
index d0d8de9..860c501 100644
--- a/R/setIDVar.R
+++ b/R/setIDVar.R
@@ -7,6 +7,9 @@
 #'   already existing schema, provide that schema here (overwrites previous
 #'   information).
 #' @param name [\code{character(1)}]\cr Name of the new identifying variable.
+#' @param type [\code{character(1)}]\cr data type of the new identifying
+#'   variable. Possible values are \code{"c/character"}, \code{"i/integer"},
+#'   \code{"n/numeric"}, \code{"l/logical"}, \code{"D/date"} or \code{"_/skip"}.
 #' @param value [\code{character(1)}]\cr In case the variable is an implicit
 #'   variable (i.e., which is not in the origin table), specify it here.
 #' @param columns [\code{integerish(.)}]\cr The column(s) in which the
@@ -33,11 +36,12 @@
 #' @family functions to describe table arrangement
 #' @importFrom checkmate assertClass assertCharacter assertLogical
 #'   testIntegerish testList
+#' @importFrom dplyr case_when
 #' @export
 
-setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL,
-                     rows = NULL, split = NULL, merge = NULL,
-                     distinct = FALSE){
+setIDVar <- function(schema = NULL, name = NULL, type = "character",
+                     value = NULL, columns = NULL, rows = NULL, split = NULL,
+                     merge = NULL, distinct = FALSE){
 
   # assertions ----
   assertClass(x = schema, classes = "schema", null.ok = TRUE)
@@ -50,23 +54,36 @@ setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL,
   rowList <- testList(x = rows, len = 1)
   assert(rowInt, rowList)
   if(rowList) assertSubset(x = names(rows), choices = c("find"))
-  assertCharacter(x = value, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = split, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertCharacter(x = merge, len = 1, any.missing = FALSE, null.ok = TRUE)
   assertLogical(x = distinct, any.missing = FALSE, len = 1)
 
+  data_type <- case_when(
+    type %in% c("i", "integer") ~ "integer",
+    type %in% c("n", "numeric") ~ "numeric",
+    type %in% c("l", "logical") ~ "logical",
+    type %in% c("D", "Date") ~ "Date",
+    type %in% c("_", "skip") ~ "skip",
+    .default = "character"
+  )
+
+  # if type-check should be skipped, don't assert class
+  if(data_type != "skip" & !is.null(value)){
+    assertClass(x = value, classes = data_type)
+  }
+
   if(is.null(schema)){
     schema <- schema_default
   }
-  nClusters <- max(lengths(schema@clusters))
-  if(nClusters == 0) nClusters <- 1
-  prevIDcols <- unlist(lapply(seq_along(schema@variables), function(x){
-    if(schema@variables[[x]]$typ == "id"){
-      if(is.null(schema@variables[[x]]$row)){
-        schema@variables[[x]]$col
-      }
-    }
-  }))
+  # nClusters <- max(lengths(schema@clusters))
+  # if(nClusters == 0) nClusters <- 1
+  # prevIDcols <- unlist(lapply(seq_along(schema@variables), function(x){
+  #   if(schema@variables[[x]]$vartype == "id"){
+  #     if(is.null(schema@variables[[x]]$row)){
+  #       schema@variables[[x]]$col
+  #     }
+  #   }
+  # }))
 
   # error management ----
   # if(!is.null(columns)){
@@ -143,7 +160,8 @@ setIDVar <- function(schema = NULL, name = NULL, value = NULL, columns = NULL,
 
 
   # update schema ----
-  temp <- list(type = "id",
+  temp <- list(vartype = "id",
+               datype = data_type,
                value = value,
                col = columns,
                row = rows,
diff --git a/R/setObsVar.R b/R/setObsVar.R
index 3b4a831..5a0e3eb 100644
--- a/R/setObsVar.R
+++ b/R/setObsVar.R
@@ -7,7 +7,10 @@
 #' @param schema [\code{schema(1)}]\cr In case this information is added to an
 #'   already existing schema, provide that schema here (overwrites previous
 #'   information).
-#' @param name [\code{character(1)}]\cr Name of the new measured variable.
+#' @param name [\code{character(1)}]\cr Name of the new observed variable.
+#' @param type [\code{character(1)}]\cr data type of the new observed
+#'   variable. Possible values are \code{"c/character"}, \code{"i/integer"},
+#'   \code{"n/numeric"}, \code{"l/logical"}, \code{"D/date"} or \code{"_/skip"}.
 #' @param columns [\code{integerish(.)}]\cr The column(s) in which the
 #'   \emph{values} of the new variable are recorded.
 #' @param top [\code{integerish(.)}]\cr In case the variable is nested in a wide
@@ -37,10 +40,12 @@
 #' @family functions to describe table arrangement
 #' @importFrom checkmate assertClass assertIntegerish assertLogical assertSubset
 #'   assertCharacter assertNumeric testIntegerish testCharacter assert
+#' @importFrom dplyr case_when
 #' @export
 
-setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL,
-                      distinct = FALSE, factor = 1, key = NULL, value = NULL){
+setObsVar <- function(schema = NULL, name = NULL, type = "numeric",
+                      columns = NULL, top = NULL, distinct = FALSE, factor = 1,
+                      key = NULL, value = NULL){
 
   # assertions ----
   assertClass(x = schema, classes = "schema", null.ok = TRUE)
@@ -59,11 +64,20 @@ setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL,
     assertSubset(x = key, choices = "cluster", empty.ok = FALSE)
   }
 
+  data_type <- case_when(
+    type %in% c("i", "integer") ~ "integer",
+    type %in% c("n", "numeric") ~ "numeric",
+    type %in% c("l", "logical") ~ "logical",
+    type %in% c("D", "Date") ~ "Date",
+    type %in% c("_", "skip") ~ "skip",
+    .default = "character"
+  )
+
   if(is.null(schema)){
     schema <- schema_default
   }
-  nClusters <- max(lengths(schema@clusters))
-  if(nClusters == 0) nClusters <- 1
+  # nClusters <- max(lengths(schema@clusters))
+  # if(nClusters == 0) nClusters <- 1
 
   # error management ----
 
@@ -72,7 +86,8 @@ setObsVar <- function(schema = NULL, name = NULL, columns = NULL, top = NULL,
 
 
   # update schema ----
-  temp <- list(type = "observed",
+  temp <- list(vartype = "observed",
+               datype = data_type,
                col = columns,
                row = top,
                dist = distinct,
diff --git a/R/validateSchema.R b/R/validateSchema.R
index a71f087..42a86e5 100644
--- a/R/validateSchema.R
+++ b/R/validateSchema.R
@@ -115,7 +115,7 @@ validateSchema <- function(schema = NULL, input = NULL){
     tempName <- names(variables)[ix]
     if(!tempName %in% c(groupID, clusterID)){
       temp <- variables[[ix]]
-      if(temp$type == "observed"){
+      if(temp$vartype == "observed"){
         temp$row
       } else {
         NULL
@@ -146,7 +146,7 @@ validateSchema <- function(schema = NULL, input = NULL){
     }
 
     # check whether the variable is wide ----
-    if(varProp$type == "observed"){
+    if(varProp$vartype == "observed"){
       isWide <- map_lgl(.x = seq_along(idCols), function(ix){
         if(length(varProp$col) == length(idCols[[ix]])){
           all(varProp$col == idCols[[ix]])
@@ -162,7 +162,7 @@ validateSchema <- function(schema = NULL, input = NULL){
 
     # figure out which rows to filter out
     if(!varProp$dist & !varName %in% c(groupID, clusterID)){
-      if(varProp$type == "observed"){
+      if(varProp$vartype == "observed"){
         if(is.null(varProp$row)){
           if(is.null(varProp$key)){
             varProp$row <- clusters$row
@@ -179,7 +179,7 @@ validateSchema <- function(schema = NULL, input = NULL){
       }
     }
 
-    if(varProp$type == "id"){
+    if(varProp$vartype == "id"){
       if(!is.null(varProp$val)){
         varProp$dist <- TRUE
       }