Skip to content

Commit

Permalink
adding functionality for non-full rank data and missing data
Browse files Browse the repository at this point in the history
  • Loading branch information
olbeck committed Mar 4, 2024
1 parent 9b4d915 commit 81ecb6e
Showing 1 changed file with 61 additions and 13 deletions.
74 changes: 61 additions & 13 deletions R/get-scores.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,34 @@
#'
#' @param feature.matrix A matrix containing responses to questions for organizations.
#' Output from `get_features` function.
#' Rows represent each organization, and columns represent responses to the following questions:
#' Rows represent each organization, and must contain columns that represent responses
#' to the following questions:
#' "P12_LINE_1", "P4_LINE_12", "P4_LINE_28", "P4_LINE_29_30", "P6_LINE_1", "P6_LINE_11A",
#' "P6_LINE_15A", "P6_LINE_18", "P6_LINE_2", "P6_LINE_3", "P6_LINE_8A", "P6_LINE_12_13_14".
#' feature.matrix if a row has an NA value, the entire row will be removed in the output.
#' See \code{vignette("making-gov-scores", package = "governance")} for details on these features.
#'
#' @return A data frame with the original input data and appended 6 factor scores along with a total score.
#' @param missing From psych::factor.scores. If missing is TRUE, missing items
#' are imputed using either the median or mean. If missing is FALSE, the default,
#' scores are found based upon the mean of the available items for each subject.
#' If missing is FALSE, input rows with NA values will not be included in the output.
#'
#' @details This function generates factor scores for observations in the input `feature.matrix` from pre-loaded
#' factor model ( in `factor-objects.Rdata`)
#' @param imput From psych::factor.scores. If missing == TRUE, then missing data
#' can be imputed using "median" or "mean". The number of missing by subject is
#' reported. If impute = "none", missing data are not scored.
#' Median is the default for our usage because all of our feature values are binary.
#'
#' @param scores.by.hand If FALSE, psych::factor.scores is used to calculate features scores.
#' If TRUE, manual calculations are used to calculate the factors scores. This option
#' should only be used if `features.matrix` has no missing values AND is not of full rank
#' (i.e. at least one column is all 0's or 1's).
#' See Appendix of See \code{vignette("making-gov-scores", package = "governance")} for
#' details on this calculation.
#'
#' @return A data frame with the original `features.matrix` input data and
#' appended 6 factor scores along with a total score.
#'
#' @details This function generates factor scores for observations in the input
#' `feature.matrix` from pre-loaded factor model ( in `data/factor-objects.Rdata`)
#'
#' @references
#' Factor objects are loaded from "governance/pkg-funcs/factor-objects.Rdata".
Expand All @@ -24,7 +43,7 @@
#' @export


get_scores <- function(feature.matrix){
get_scores <- function(feature.matrix, missing = TRUE, impute = "median", scores.by.hand = FALSE){
### Function to read in new data, and return appended scores.

### Inputs
Expand Down Expand Up @@ -64,7 +83,7 @@ get_scores <- function(feature.matrix){

# removing NA rows
has.na <- apply(temp.dat, 1, function(row) any(is.na(row)))
if(any(has.na)){
if(any(has.na) & missing == FALSE){
temp.dat <- temp.dat[!has.na, ]
output.sting <- paste((1:nrow(temp.dat))[has.na], collapse = ",")
message(paste("Rows", output.sting, "have NA values in the features. They will not be included in the output."))
Expand All @@ -73,16 +92,45 @@ get_scores <- function(feature.matrix){


### Get New Factor Scores -------------------------
scores <- psych::factor.scores(temp.dat, #new data
model2.6, #original fitted model of features2
rho = rho2, #polychoric correlation of features2
method = "Thurstone") #using regression equation to "predict" new scores.
if(scores.by.hand == FALSE){
#if data is full rank, we can directly use the factor.scores function
scores <- psych::factor.scores(temp.dat, #new data
model2.6, #original fitted model of features2
rho = rho2, #polychoric correlation of features2
method = "Thurstone", #using regression equation to "predict" new scores.
missing = missing,
impute = impute)

scores.keep <- as.data.frame(scores$scores)

}else{ #if data is not full rank, we can do the calculation by hand

#cannot have NA values
if(any(has.na)){
stop("Cannot calculate scores by hand if input data contains any NA values.")
}

#set up matrix multiplication
D <- as.matrix(temp.dat)
C <- rho2
L <- model2.6$loadings

W <- solve(C) %*% L
S <- D %*% W

scores.keep <- as.data.frame(S)
}


### Append scores to data object
scores.keep <- as.data.frame(scores$scores)
scores.keep$total.score <- rowSums(scores.keep)

feature.matrix <- cbind(feature.matrix[!has.na, ], scores.keep)
if(missing == FALSE){
feature.matrix <- cbind(feature.matrix[!has.na, ], scores.keep)
}else{
feature.matrix <- cbind(feature.matrix, scores.keep)
}


### Return
return(feature.matrix)
Expand Down

0 comments on commit 81ecb6e

Please sign in to comment.