Skip to content

Commit

Permalink
Verify module dependencies are installed (#106)
Browse files Browse the repository at this point in the history
* Initial module verification function
* Prevent execution when modules are not verified
* Fix parameter name mismatch
* Revise to use renv public functions only
  • Loading branch information
anthonysena authored Dec 18, 2023
1 parent a185331 commit 28bacf4
Show file tree
Hide file tree
Showing 13 changed files with 330 additions and 18 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export(getModuleList)
export(retrieveConnectionDetails)
export(storeConnectionDetails)
export(unlockKeyring)
export(verifyModuleInstallation)
import(CohortGenerator)
import(DatabaseConnector)
import(dplyr)
Expand Down
5 changes: 4 additions & 1 deletion R/Execution.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ execute <- function(analysisSpecifications,
)
}
modules <- ensureAllModulesInstantiated(analysisSpecifications)
if (isFALSE(modules$allModulesInstalled)) {
stop("Stopping execution due to module issues")
}

if (is.null(executionScriptFolder)) {
executionScriptFolder <- tempfile("strategusTempSettings")
Expand All @@ -79,7 +82,7 @@ execute <- function(analysisSpecifications,
keyringName = keyringName
)
}
dependencies <- extractDependencies(modules)
dependencies <- extractDependencies(modules$modules)


fileName <- generateTargetsScript(
Expand Down
237 changes: 231 additions & 6 deletions R/ModuleInstantiation.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@
#'
#' @template AnalysisSpecifications
#'
#' @template forceVerification
#'
#' @return
#' A tibble listing the instantiated modules.
#' A list containing the install status of all modules
#' (TRUE if all are installed properly) and a tibble listing
#' the instantiated modules.
#'
#' @export
ensureAllModulesInstantiated <- function(analysisSpecifications) {
ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerification = FALSE) {
modules <- getModuleTable(analysisSpecifications, distinct = TRUE)

# Verify only one version per module:
Expand Down Expand Up @@ -73,7 +77,229 @@ ensureAllModulesInstantiated <- function(analysisSpecifications) {
stop(message)
}

return(modules)
# Verify all modules are properly installed
moduleInstallStatus <- list()
for (i in 1:nrow(modules)) {
status <- verifyModuleInstallation(
module = modules$module[i],
version = modules$version[i],
forceVerification = forceVerification
)
moduleInstallStatus[[length(moduleInstallStatus) + 1]] <- status
}
attr(modules, 'moduleInstallStatus') <- moduleInstallStatus

installStatus <- unlist(lapply(moduleInstallStatus, FUN = function(x) { x$moduleInstalled }))
if (!all(installStatus)) {
problemModules <- moduleInstallStatus[!installStatus]
message("There were ", length(problemModules), " issue(s) found with your Strategus modules!")
for (i in seq_along(problemModules)) {
message("Issue #", i, ": Module ", problemModules[[i]]$moduleFolder, " could not install the following R packages:")
print(problemModules[[i]]$issues)
}
message("To fix these issues, open the module project (.Rproj file) at the path specified above and re-run \"renv::restore()\" and correct all issues")
}

return(
list(
allModulesInstalled = all(installStatus),
modules = modules
)
)
}


#' Verify a module is properly installed
#'
#' @description
#' In some instances a module may fail to instantiate and install due to problems
#' when calling renv::restore for the module's renv.lock file. This function
#' will allow you to surface inconsistencies between the module renv.lock file
#' and the module's renv project library. This function will check to that a
#' module has been properly installed using internal functions of the `renv`
#' package. If a module is verified to work via this function, the hash of
#' the module's renv.lock file will be written to a text file in the module
#' directory to indicate that it is ready for use. This will allow subsequent
#' calls to work faster since the initial verification process can take some
#' time.It is possible to re-run the verification of a module
#' by using the `forceVerification` parameter.
#'
#' To fix issues with a module, you will need to open the module's .Rproj in
#' RStudio instance and debug the issues when calling renv::restore().
#'
#' @param module The name of the module to verify (i.e. "CohortGeneratorModule")
#'
#' @param version The version of the module to verify (i.e. "0.2.1")
#'
#' @param silent When TRUE output of this verification process is suppressed
#'
#' @template forceVerification
#'
#' @return
#' A list with the output of the consistency check
#'
#' @export
verifyModuleInstallation <- function(module, version, silent = FALSE, forceVerification = FALSE) {
# Internal helper function
verifyModuleInstallationReturnValue <- function(moduleFolder, moduleInstalled, issues = NULL) {
returnVal <- list(
moduleFolder = moduleFolder,
moduleInstalled = moduleInstalled,
issues = issues
)
return(returnVal)
}

moduleFolder <- getModuleFolder(module, version)
if (!dir.exists(moduleFolder)) {
if (!silent) {
warn("Module ", module, ", Version: ", version, " not found at: ", moduleFolder, ". This means the module was never installed.")
}
return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = FALSE
)
)
}

if (!silent) {
message("Verifying module: ", module, ", (", version, ") at ", moduleFolder, "...", appendLF = F)
}
moduleStatusFileName <- "moduleStatus.txt"
renvLockFileName <- "renv.lock"

# If the lock file doesn't exist, we're not sure if we're dealing with a module.
if (!file.exists(file.path(moduleFolder, renvLockFileName))) {
if (!silent) {
message("ERROR - renv.lock file missing.")
}
return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = FALSE
)
)
}

# Check to see if we've already performed the verification by looking at the
# moduleStatus.txt file to see if the md5 in that file matches the one
# created by hashing the renv.lock file
lockfileContents <- ParallelLogger::loadSettingsFromJson(
fileName = file.path(moduleFolder, renvLockFileName)
)
lockfileHash <- digest::digest(
object = lockfileContents,
algo = "md5"
)
if (!forceVerification && file.exists(file.path(moduleFolder, moduleStatusFileName))) {
lockfileHashFromModuleStatusFile <- SqlRender::readSql(
sourceFile = file.path(moduleFolder, moduleStatusFileName)
)

# If the values match, the module is installed correctly
# return and exit
if (lockfileHashFromModuleStatusFile == lockfileHash) {
if (!silent) {
message("MODULE READY!")
}
return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = TRUE
)
)
}
}


# Now perform the consistency check to verify that the renv::restore()
# process executed successfully. We must do this in the module's context
Strategus:::withModuleRenv(
code = {
# Start by turning off verbose output to hide renv output
verboseOption <- getOption("renv.verbose")
options(renv.verbose = FALSE)
on.exit(options(renv.verbose = verboseOption))

# Get the renv project status and then identify the packages used
# in the project to determine if there were issues when restoring
# the project from the renv.lock file.
projectStatus <- renv::status()

# Identify the list of package dependencies by using
# the data returned from renv::status() and
# renv::dependencies for the project.
library <- names(projectStatus$library$Packages)
lockfile <- names(projectStatus$lockfile$Packages)
packages <- sort(union(renv::dependencies(quiet = TRUE)$Package, "renv"))
packages <- sort(unique(c(library, lockfile, packages)))
projectStatus$packages <- packages
saveRDS(object = list(
library = library,
lockfile = lockfile,
packages = packages
),
file="projectStatus.rds"
)
},
moduleFolder = moduleFolder
)

# The module's project status is written to the
# file system. Now we can get the module status and use the information
# to determine the restoration status
projectStatus <- readRDS(file.path(moduleFolder, "projectStatus.rds"))

library <- projectStatus$library
lockfile <- projectStatus$lockfile
packages <- projectStatus$packages

packageStatus <- data.frame(
package = packages,
installed = packages %in% library,
recorded = packages %in% lockfile,
used = packages %in% packages
)

# If all of the used & recorded packages are installed, then
# return TRUE for the module installed status. If not, return
# FALSE and set an attribute of the list that contains the issues
# discovered
ok <- packageStatus$installed & (packageStatus$used == packageStatus$recorded)
issues <- packageStatus[!ok, , drop = FALSE]
missing <- !issues$installed
issues$installed <- ifelse(issues$installed, "y", "n")
issues$recorded <- ifelse(issues$recorded, "y", "n")
issues$used <- ifelse(issues$used, "y", if (any(missing)) "?" else "n")
issues <- issues[issues$installed == "n" & issues$recorded == "y" & issues$used == "y", ]

moduleInstalled <- nrow(issues) == 0

if (isTRUE(moduleInstalled)) {
if (!silent) {
message("MODULE READY!")
}
# Write the contents of the md5 hash of the module's
# renv.lock file to the file system to note that the
# module's install status was successful and verified
SqlRender::writeSql(
sql = lockfileHash,
targetFile = file.path(moduleFolder, "moduleStatus.txt")
)
} else {
if (!silent) {
message("MODULE HAS ISSUES!")
}
}

return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = moduleInstalled,
issues = issues
)
)
}

getModuleTable <- function(analysisSpecifications, distinct = FALSE) {
Expand Down Expand Up @@ -121,15 +347,14 @@ getModuleMetaData <- function(moduleFolder) {
}

getModuleFolder <- function(module, version) {
assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER"))
moduleFolder <- file.path(Sys.getenv("INSTANTIATED_MODULES_FOLDER"), sprintf("%s_%s", module, version))
invisible(moduleFolder)
}

ensureModuleInstantiated <- function(module, version, remoteRepo, remoteUsername) {
assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER"))
instantiatedModulesFolder <- Sys.getenv("INSTANTIATED_MODULES_FOLDER")
if (instantiatedModulesFolder == "") {
stop("The INSTANTIATED_MODULES_FOLDER environment variable has not been set.")
}
if (!dir.exists(instantiatedModulesFolder)) {
dir.create(instantiatedModulesFolder, recursive = TRUE)
}
Expand Down
9 changes: 8 additions & 1 deletion R/ResultModelCreation.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ createResultDataModels <- function(analysisSpecifications,
checkmate::reportAssertions(collection = errorMessages)

modules <- ensureAllModulesInstantiated(analysisSpecifications)
if (isFALSE(modules$allModulesInstalled)) {
stop("Stopping execution due to module issues")
}


if (is.null(executionScriptFolder)) {
Expand Down Expand Up @@ -141,7 +144,11 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd
version <- moduleSpecification$version
remoteRepo <- moduleSpecification$remoteRepo
remoteUsername <- moduleSpecification$remoteUsername
moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername)
moduleInstallation <- verifyModuleInstallation(module, version)
moduleFolder <- moduleInstallation$moduleFolder
if (isFALSE(moduleInstallation$moduleInstalled)) {
stop("Stopping since module is not properly installed!")
}

# Create job context
moduleExecutionSettings <- executionSettings
Expand Down
6 changes: 5 additions & 1 deletion R/ResultsUpload.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
version <- moduleSpecification$version
remoteRepo <- moduleSpecification$remoteRepo
remoteUsername <- moduleSpecification$remoteUsername
moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername)
moduleInstallation <- verifyModuleInstallation(module, version)
moduleFolder <- moduleInstallation$moduleFolder
if (isFALSE(moduleInstallation$moduleInstalled)) {
stop("Stopping since module is not properly installed!")
}

# Create job context
moduleExecutionSettings <- executionSettings
Expand Down
6 changes: 5 additions & 1 deletion R/RunModule.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec
version <- moduleSpecification$version
remoteRepo <- moduleSpecification$remoteRepo
remoteUsername <- moduleSpecification$remoteUsername
moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername)
moduleInstallation <- verifyModuleInstallation(module, version)
moduleFolder <- moduleInstallation$moduleFolder
if (isFALSE(moduleInstallation$moduleInstalled)) {
stop("Stopping since module is not properly installed!")
}

# Create job context
moduleExecutionSettings <- executionSettings
Expand Down
9 changes: 9 additions & 0 deletions R/Settings.R
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,15 @@ unlockKeyring <- function(keyringName) {
return(keyringLocked)
}

#' @keywords internal
.checkModuleFolderSetting <- function(x) {
if (length(x) == 0 || x == "") {
return(paste0("INSTANTIATED_MODULES_FOLDER environment variable has not been set. INSTANTIATED_MODULES_FOLDER must be set using Sys.setenv(INSTANTIATED_MODULES_FOLDER = \"/somepath\")"))
} else {
return(TRUE)
}
}

#' Used when serializing connection details to retain NULL values
#'
#' @keywords internal
Expand Down
1 change: 1 addition & 0 deletions R/Strategus.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
#' @importFrom methods is
NULL

assertModulesFolderSetting <- checkmate::makeAssertionFunction(.checkModuleFolderSetting)
Loading

0 comments on commit 28bacf4

Please sign in to comment.