From 4a7d10189996659e9b43ab7949bdfc4378f8c93d Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 13 Mar 2024 08:23:15 +0100 Subject: [PATCH 1/5] Loop over depth, not over nodes --- R/min_depth_distribution.R | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/R/min_depth_distribution.R b/R/min_depth_distribution.R index 7581eae..ac2bd29 100644 --- a/R/min_depth_distribution.R +++ b/R/min_depth_distribution.R @@ -4,11 +4,8 @@ calculate_tree_depth <- function(frame){ stop("The data frame has to contain columns called 'right daughter' and 'left daughter'! It should be a product of the function getTree(..., labelVar = T).") } - # Both child values of leaf nodes are 0, i.e., lower than min(node_id) frame[["depth"]] <- calculate_tree_depth_( - node_id = seq_len(nrow(frame)), - left_child = frame[["left daughter"]], - right_child = frame[["right daughter"]] + as.matrix(frame[, c("left daughter", "right daughter")]) ) return(frame) } @@ -19,22 +16,29 @@ calculate_tree_depth_ranger <- function(frame){ stop("The data frame has to contain columns called 'rightChild' and 'leftChild'! It should be a product of the function ranger::treeInfo().") } + # Child nodes are zero based, so we increase them by 1 frame[["depth"]] <- calculate_tree_depth_( - node_id = frame[["nodeID"]], - left_child = frame[["leftChild"]], - right_child = frame[["rightChild"]] + as.matrix(frame[, c("leftChild", "rightChild")]) + 1 ) return(frame) } # Internal function used to determine the depth of each node -calculate_tree_depth_ <- function(node_id, left_child, right_child) { - n <- length(node_id) - depth <- numeric(n) - for (i in 2:n) { - parent_node <- left_child %in% node_id[i] | right_child %in% node_id[i] - depth[i] <- depth[parent_node] + 1 +# Node indices are assumed to have values in 1:nrow(childs) +calculate_tree_depth_ <- function(childs) { + n <- nrow(childs) + node_id <- seq_len(n) + depth <- rep(NA, times = n) + j <- depth[1L] <- 0 + ix <- 1 + + # j loops over tree depth + while(anyNA(depth) || j >= n) { + ix <- c(childs[ix, ]) + j <- j + 1 + depth[ix] <- j } + return(depth) } From bc8331ccdbb29aa4be149358303781daed78fa9a Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 13 Mar 2024 18:40:38 +0100 Subject: [PATCH 2/5] integer index and remove leave nodes from index --- R/min_depth_distribution.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/min_depth_distribution.R b/R/min_depth_distribution.R index ac2bd29..aeeec2c 100644 --- a/R/min_depth_distribution.R +++ b/R/min_depth_distribution.R @@ -27,14 +27,14 @@ calculate_tree_depth_ranger <- function(frame){ # Node indices are assumed to have values in 1:nrow(childs) calculate_tree_depth_ <- function(childs) { n <- nrow(childs) - node_id <- seq_len(n) depth <- rep(NA, times = n) j <- depth[1L] <- 0 - ix <- 1 + ix <- 1L # current nodes, initialized with root node index # j loops over tree depth while(anyNA(depth) || j >= n) { - ix <- c(childs[ix, ]) + ix <- as.integer(childs[ix, ]) + ix <- ix[!is.na(ix) & ix >= 1L] # leave nodes do not have childs j <- j + 1 depth[ix] <- j } From 5f3cbcaaa20209f2e09e2b99bdf33154ff792a9c Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 13 Mar 2024 20:02:42 +0100 Subject: [PATCH 3/5] fixed security condition in the while loop --- R/min_depth_distribution.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/min_depth_distribution.R b/R/min_depth_distribution.R index aeeec2c..0831d33 100644 --- a/R/min_depth_distribution.R +++ b/R/min_depth_distribution.R @@ -23,8 +23,8 @@ calculate_tree_depth_ranger <- function(frame){ return(frame) } -# Internal function used to determine the depth of each node -# Node indices are assumed to have values in 1:nrow(childs) +# Internal function used to determine the depth of each node. +# The input is a with left and right child node indices in 1:nrow(childs). calculate_tree_depth_ <- function(childs) { n <- nrow(childs) depth <- rep(NA, times = n) @@ -32,9 +32,9 @@ calculate_tree_depth_ <- function(childs) { ix <- 1L # current nodes, initialized with root node index # j loops over tree depth - while(anyNA(depth) || j >= n) { + while(anyNA(depth) && j < n) { # The second condition is never used ix <- as.integer(childs[ix, ]) - ix <- ix[!is.na(ix) & ix >= 1L] # leave nodes do not have childs + ix <- ix[!is.na(ix) & ix >= 1L] # leaf nodes do not have childs j <- j + 1 depth[ix] <- j } From 02a9bafbf18ee5aba60970f58aa67a43360d695c Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 13 Mar 2024 20:03:51 +0100 Subject: [PATCH 4/5] Fixed comment --- R/min_depth_distribution.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/min_depth_distribution.R b/R/min_depth_distribution.R index 0831d33..6a6ba66 100644 --- a/R/min_depth_distribution.R +++ b/R/min_depth_distribution.R @@ -24,7 +24,7 @@ calculate_tree_depth_ranger <- function(frame){ } # Internal function used to determine the depth of each node. -# The input is a with left and right child node indices in 1:nrow(childs). +# The input is a matrix with left and right child nodes in 1:nrow(childs). calculate_tree_depth_ <- function(childs) { n <- nrow(childs) depth <- rep(NA, times = n) From dbe1920644e2b2ca08c534349f6de78411d63fcd Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 13 Mar 2024 20:07:15 +0100 Subject: [PATCH 5/5] move as.matrix() into function --- R/min_depth_distribution.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/min_depth_distribution.R b/R/min_depth_distribution.R index 6a6ba66..214edb3 100644 --- a/R/min_depth_distribution.R +++ b/R/min_depth_distribution.R @@ -5,7 +5,7 @@ calculate_tree_depth <- function(frame){ It should be a product of the function getTree(..., labelVar = T).") } frame[["depth"]] <- calculate_tree_depth_( - as.matrix(frame[, c("left daughter", "right daughter")]) + frame[, c("left daughter", "right daughter")] ) return(frame) } @@ -18,14 +18,15 @@ calculate_tree_depth_ranger <- function(frame){ } # Child nodes are zero based, so we increase them by 1 frame[["depth"]] <- calculate_tree_depth_( - as.matrix(frame[, c("leftChild", "rightChild")]) + 1 + frame[, c("leftChild", "rightChild")] + 1 ) return(frame) } # Internal function used to determine the depth of each node. -# The input is a matrix with left and right child nodes in 1:nrow(childs). +# The input is a data.frame with left and right child nodes in 1:nrow(childs). calculate_tree_depth_ <- function(childs) { + childs <- as.matrix(childs) n <- nrow(childs) depth <- rep(NA, times = n) j <- depth[1L] <- 0