From 043e4d67ec52a2a2e786c3ed90a0a8476b2dda10 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Jun 2019 10:39:06 +0200 Subject: [PATCH 1/6] Updated .gitignore and README after repo init. --- .gitignore | 22 +++++++++++++- README.md | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index ecf66f8..f9ac6f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,23 @@ +*.DS_Store +*.project +*.settings +*.md.html +*.includepath +*.vagrant +*.pydevproject + +# ansible retry files +*.retry + +# vim swap files +*.swp + +# Python stuff +__pycache__/ +*.py[cod] +*$py.class + +# Perl stuff !Build/ .last_cover_stats /META.yml @@ -32,4 +52,4 @@ inc/ /Makefile.old /MANIFEST.bak /pm_to_blib -/*.zip +/*.zip \ No newline at end of file diff --git a/README.md b/README.md index ad222df..ca6e217 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,86 @@ # depad-utils -Utilities for deploy admins + +Utilities for deploy admins. + +## 1. How to use this repo and contribute + +We use a standard GitHub workflow except that we use only one branch "*master*" as this is a relatively small repo and we don't need the additional overhead from branches. +``` + ⎛¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯⎞ ⎛¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯⎞ + ⎜ Shared repo a.k.a. "blessed" ⎜ <<< 7: Merge <<< pull request <<< 6: Send <<< ⎜ Your personal online fork a.k.a. "origin" ⎜ + ⎜ github.com/molgenis/depad-utils.git⎜ >>> 1: Fork blessed repo >>>>>>>>>>>>>>>>>>>> ⎜ github.com//depad-utils.git ⎜ + ⎝____________________________________⎠ ⎝__________________________________________________⎠ + v v ʌ + v 2: Clone origin to local disk 5: Push commits to origin + v v ʌ + v ⎛¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯⎞ + `>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3: pull from blessed >>> ⎜ Your personal local clone ⎜ + ⎜ ~/git/depad-utils ⎜ + ⎝__________________________________________________⎠ + v ʌ + `>>> 4: Commit changes to local clone >>>´ +``` + + 1. Fork this repo on GitHub (Once). + 2. Clone to your local computer and setup remotes (Once). + ``` + # + # Clone repo + # + git clone https://github.com/your_github_account/depad-utils.git + # + # Add blessed remote (the source of the source) and prevent direct push. + # + cd depad-utils + git remote add blessed https://github.com/molgenis/depad-utils.git + git remote set-url --push blessed push.disabled + ``` + + 3. Pull from "*blessed*" (Regularly from 3 onwards). + ``` + # + # Pull from blessed master. + # + cd depad-utils + git pull blessed master + ``` + Make changes: edit, add, delete... + + 4. Commit changes to local clone. + ``` + # + # Commit changes. + # + git status + git add some/changed/files + git commit -m 'Describe your changes in a commit message.' + ``` + + 5. Push commits to "*origin*". + ``` + # + # Push commits. + # + git push origin master + ``` + + 6. Go to your fork on GitHub and create a pull request. + + 7. Have one of the other team members review and eventually merge your pull request. + + 3. Back to 3 to pull from "*blessed*" to get your local clone in sync. + ``` + # + # Pull from blessed master. + # + cd depad-utils + git pull blessed master + ``` + etc. + +## 2. Main sections / topics + +``` + depad-utils/ + `-- bin/: various scripts to manage deployment of software modules. +``` \ No newline at end of file From a5d4da9dad0e6d509583f78048ab8418255f2d14 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Jun 2019 10:39:55 +0200 Subject: [PATCH 2/6] Added relocated scripts for deploy admins. --- bin/GetPerlModuleDepTreeFromCPAN.pl | 154 +++++++ bin/generateEasyConfig.R | 492 ++++++++++++++++++++++ bin/hpc-environment-sync.bash | 623 ++++++++++++++++++++++++++++ bin/hpc-environment-sync.cfg | 57 +++ 4 files changed, 1326 insertions(+) create mode 100755 bin/GetPerlModuleDepTreeFromCPAN.pl create mode 100755 bin/generateEasyConfig.R create mode 100755 bin/hpc-environment-sync.bash create mode 100644 bin/hpc-environment-sync.cfg diff --git a/bin/GetPerlModuleDepTreeFromCPAN.pl b/bin/GetPerlModuleDepTreeFromCPAN.pl new file mode 100755 index 0000000..28a288d --- /dev/null +++ b/bin/GetPerlModuleDepTreeFromCPAN.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use Getopt::Long; +use File::Basename; +use Data::Dumper; +use Log::Log4perl qw(:easy); +use CPAN::FindDependencies; + +# +# Define additional global vars. +# +my $perl_version = $]; +my $perl_modules; # Input list of Perl Mods for which to report deps. +my @modules; +my @all_deps; +my $basename = basename($0); +my @moduleArray; +my $log_level = 'INFO'; # The default log level. +my %log_levels = ( + 'ALL' => $ALL, + 'TRACE' => $TRACE, + 'DEBUG' => $DEBUG, + 'INFO' => $INFO, + 'WARN' => $WARN, + 'ERROR' => $ERROR, + 'FATAL' => $FATAL, + 'OFF' => $OFF, +); +my $output_format = 'list'; +my %output_formats = ( + 'list' => 'list', + 'eb' => 'eb', +); + +# +# Get options. +# +Getopt::Long::GetOptions ( + "pm=s" => \$perl_modules, + "ll=s" => \$log_level, + "of:s" => \$output_format, +); + +# +# Configure logging. +# +# Reset log level to default if user specified illegal log level. +$log_level = ( + defined($log_levels{$log_level}) + ? $log_levels{$log_level} + : $log_levels{'INFO'}); +#Log::Log4perl->init('log4perl.properties'); +Log::Log4perl->easy_init( + { + level => $log_level, + file => "STDOUT", + layout => '%d L:%L %p> %m%n' + }, +); +my $logger = Log::Log4perl::get_logger(); + +# +# Parse other inputs. +# +unless (defined($perl_modules) && $perl_modules ne '') { + _Usage(); + exit 1; +} else{ + @modules = split('\s', $perl_modules); +} + +$output_format = ( + defined($output_formats{$output_format}) + ? $output_formats{$output_format} + : $output_formats{'list'}); + +# +## +### Main. +## +# + +foreach my $module (@modules) { + my @deps = CPAN::FindDependencies::finddeps("$module", 'perl' => $perl_version); + push(@all_deps, @deps); +} + +if ($output_format eq 'list') { + foreach my $dep (@all_deps) { + print ' ' x $dep->depth; + if($dep->warning()) { + print '! '; + } else { + print 'v '; + } + print $dep->name, ' [', $dep->distribution(), ']' . "\n"; + } +} elsif ($output_format eq 'eb') { + my @uniq_deps = _Uniq(reverse(@all_deps)); + print 'exts_list = [' . "\n"; + foreach my $dep (@uniq_deps) { + # ('Text::CSV', '1.33', { + # 'source_tmpl': 'Text-CSV-1.33.tar.gz', + # 'source_urls': ['https://cpan.metacpan.org/authors/id/M/MA/MAKAMAKA'], + # }), + + #Test::Warnings [E/ET/ETHER/Test-Warnings-0.026.tar.gz + my $module = $dep->name(); + my $distro = $dep->distribution(); + my $archive; + my $author; + my $version; + if ($distro =~ m|(.+)/(([^/]+)-(v?[0-9.]+).tar.gz)$|) { + $author = $1; + $archive = $2; + $version = $4; + } else { + $logger->fatal('Cannot parse module details: ' . $distro); + exit 1; + } + my $url = 'https://cpan.metacpan.org/authors/id/' . $author; + print ' (\'' . $module . '\', \'' . $version . '\', {' . "\n"; + print ' \'source_tmpl\': \'' . $archive . '\',' . "\n"; + print ' \'source_urls\': [\'' . $url . '\'],' . "\n"; + print ' }),' . "\n"; + } + print ']' . "\n"; +} + +# +## +### Subs. +## +# + +sub _Uniq { + my %seen; + grep(!$seen{$_->name()}++, @_); +} + +sub _Usage { + print STDERR "\n" + . 'Usage:' . "\n\n" + . ' ' . $basename . ' options' . "\n\n" + . 'Available options are:' . "\n\n" + . ' -pm \'[PM]\' Quoted and space sperated list of Perl Modules. E.g. \'My::SPPACE::Seperated List::Of::Modules\'' . "\n" + . ' -of [format] Output Format. One of: list or eb ("exts_list" format for including in an EasyBuild Bundle easyconfig.")' . "\n" + . ' -ll [LEVEL] Log4perl Log Level. One of: ALL, TRACE, DEBUG, INFO (default), WARN, ERROR, FATAL or OFF.' . "\n" + . "\n"; + exit; +} diff --git a/bin/generateEasyConfig.R b/bin/generateEasyConfig.R new file mode 100755 index 0000000..5b89db9 --- /dev/null +++ b/bin/generateEasyConfig.R @@ -0,0 +1,492 @@ +#!/usr/bin/env Rscript + +# +# Hard-coded list of R package repositories. +# +# * Active URLs are used for checking if a package may have been retrieved from that repo. +# * Archive URLs cannot be used by R commands to query the repo, +# but will be added to the EasyConfig and may be used by EasyBuild to download packages. +# +repos = list() +repos$cran$active = c('http://cran.r-project.org/src/contrib/') +repos$cran$archive = c('http://cran.r-project.org/src/contrib/Archive/%(name)s') +repos$bioconductor$active = c('http://www.bioconductor.org/packages/release/bioc/src/contrib/', + 'http://www.bioconductor.org/packages/release/data/annotation/src/contrib/', + 'http://www.bioconductor.org/packages/release/data/experiment/src/contrib/', + 'http://www.bioconductor.org/packages/release/extra/src/contrib/') + +# +## +### Setup environment +## +# +suppressPackageStartupMessages(library(stringr)) +suppressPackageStartupMessages(library(logging)) +logging::basicConfig() + +# +## +### Custom functions +## +# +usage <- function() { + cat(" +Description: + Generates an EasyBuild EasyConfig file from an existing R environment. + Optionally you can first load a specific version of R using module load before generating the *.eb EasyConfig + +Example usage: + module load EasyBuild + module load R + generateEasyConfig.R --tc goolf/1.7.20 \\ + --od /path/to/my/EasyConfigs/r/R/ \\ + --ll WARNING + +Explanation of options: + --tc toolchain/version EasyBuild ToolChain (required). + To get a list of available toolchains (may or may not be already installed): + module load EasyBuild + eb --list-toolchains + To check if a toolchain is already installed and if yes which version is the default: + module -r -t avail -d '^name_of_toolchain$' + --od path Output Directory where the generated *.eb EasyConfig file will be stored (optional). + Will default to the current working directory as determined with getwd(). + Name of the output file follows strict rules + and is automatically generated based on R version and toolchain. + --ll LEVEL Log level (optional). + One of FINEST, FINER, FINE, DEBUG, INFO (default), WARNING, ERROR or CRITICAL. +") + q() +} + +# +# For a list of R packages: +# * Retrieve from a working R installation the package versions +# * Re-order the packages based on their dependencies based on "Depends", "LinkingTo" and "Imports" +# * Try to figure out which repo (or mirror) the package originated from using a list of known repos. +# +# Arguments: +# * packages: A vector with package names (i.e. c('ggplot2', 'RMySQL', 'stringer')) +# * repos: One or more repositories used by packageStatus() to retrieve information on available packages. + +getPackageTree <- function(packages, repos) { + + # + # Local helper function to extract plain package names + # from "Depends", "LinkingTo" and "Imports" statements like for example: + # Depends: R (≥ 3.0.2) + # Imports: evaluate (≥ 0.6), digest, formatR, highr, markdown, stringr (≥ 0.6), yaml (≥ 2.1.5), tools + # + getNamesOnly <- function(string) { + messyPackages <- strsplit(string, ',\\s*', perl=TRUE) + packageNames <- lapply(messyPackages[[1]], function(x) {return(strsplit(x, '[\\s(]', perl=TRUE)[[1]][1])}) + return(unlist(packageNames)) + } + + # + # Local helper function to extract repo name from one of the repo URLs. + getRepoName = function(repo.url) { + repo.name = str_match(repo.url, '(cran)|(bioconductor)')[[1]][1] + return(repo.name) + } + + # + # Local helper function to recursively retrieve a list of all dependencies for a given R package. + # + # Arguments: + # * packageName: Name of a single package. + # * packageStatusOverview: The object returned by packageStatus() + # Returns: + # * packageTree: Character vector with package names, their versions and the repo in which the package was found. + + getDependencies <- function (packageName, packageStatusOverview) { + + packageIndex <- match(packageName, names(packageStatusOverview$inst$Package)) + if (is.na(packageIndex)) { + #logging::levellog(loglevels[['FATAL']], paste('Package', packageName, 'is not installed. Aborting!')) + #usage() + logging::levellog(loglevels[['WARNING']], paste('Package', packageName, 'is not installed!')) + return() + } + + dependencies <- c(getNamesOnly(packageStatusOverview$inst$Depends[packageIndex]), + getNamesOnly(packageStatusOverview$inst$Imports[packageIndex]), + getNamesOnly(packageStatusOverview$inst$LinkingTo[packageIndex])) + dependencies <- dependencies[!is.na(dependencies)] + + logging::levellog(loglevels[['FINE']], paste('Package name:', packageName)) + logging::levellog(loglevels[['FINE']], paste('Dependencies:', paste(dependencies, collapse=', '))) + logging::levellog(loglevels[['FINE']], '-----------------------------------------------') + + # don't need the base packages + packageID <- match(dependencies, packageStatusOverview$inst$Package) + isBase <- packageStatusOverview$inst$Priority[packageID] == 'base' + isBase[is.na(isBase)] <- FALSE + + # take out 'R' + cleanDeps <- dependencies[!isBase & dependencies != 'R'] + + # let's recurse + if (length(cleanDeps) == 0) { + # no more dependencies. We terminate returning package name + return(packageName) + } else { + # recurse + deps <- unlist(lapply(cleanDeps, getDependencies, packageStatusOverview)) + allDeps <- unique(c(deps, packageName)) + return(allDeps) + } + } + + logging::levellog(loglevels[['DEBUG']], 'Retrieving status overview of all installed packages...') + + # + # Change available_packages_filters. + # + # Default: options(available_packages_filters = c("R_version", "OS_type", "subarch", "duplicates")) + # This will fail to report packages when + # * They have been updated in the repo's after they were installed locally + # * and the updated version of the packages has a dependency on a more recent R version. + # The older version of the package as installed locally may still be available from a sub folder of the repo + # like for example http://cran.r-project.org/src/contrib/Archive/... + # but these archive folders lack a PACKAGES.gz file, + # which is required for packageStatus() to figure out what is available. + # + #options(available_packages_filters = c("OS_type", "duplicates")) + # + # For some silly reason the options() above no longer work for some CRAN packages as of R 3.4.x. + # E.g. nlme and foreign are no longer listed as "installed" from CRAN unless all filters are disabled with: + options(available_packages_filters = NULL) + + # + # Get status of all packages (installed and available) and append column for repo. + # + flattenedNames <- names(unlist(repos, recursive = FALSE, use.names = TRUE)) + activeRepoURLs <- unlist(subset(unlist(repos, recursive = FALSE, use.names = TRUE), grepl('.active', flattenedNames)), use.names=FALSE) + packageStatusOverview <- packageStatus(repositories = activeRepoURLs) + packageStatusOverview$inst$Repo <- rep(NA, nrow(packageStatusOverview$inst)) + logging::levellog(loglevels[['DEBUG']], 'Trying to figure out which repo(s) the installed package originated from...') + + for (this.package in rownames(packageStatusOverview$inst)) { + logging::levellog(loglevels[['DEBUG']], paste('This package name:', this.package)) + isBase <- packageStatusOverview$inst$Priority[this.package] == 'base' + isBase[is.na(isBase)] <- FALSE + if (isBase) { + packageStatusOverview$inst[this.package,]$Repo = 'base' + } else { + for (this.repo in names(summary(packageStatusOverview)$Repos)) { + logging::levellog(loglevels[['FINEST']], paste(': repo URL:', this.repo)) + logging::levellog(loglevels[['FINEST']], paste(': names:', paste(names(summary(packageStatusOverview)$Repos[[this.repo]]), collapse=', '))) + packages.installed_from_this_repo = as.list(summary(packageStatusOverview)$Repos[[this.repo]])$installed + if (is.element(this.package, packages.installed_from_this_repo)) { + logging::levellog(loglevels[['FINE']], paste(': found pkg in:', this.repo)) + packageStatusOverview$inst[this.package,]$Repo = getRepoName(this.repo) + } + } + } + logging::levellog(loglevels[['DEBUG']], paste(': repo:', packageStatusOverview$inst[this.package,]$Repo)) + } + + # + # Recursively find installed packages and their dependencies (Names only). + # + allPackages.names <- unique(unlist(lapply(packages, getDependencies, packageStatusOverview))) + allPackages.IDs = match(allPackages.names, packageStatusOverview$inst$Package) + + # + # Report packages. + # + colsOfInterest <- c("Package", "Version", "Repo") + colIDs <- match(colsOfInterest, names(packageStatusOverview$inst)) + allPackages.df <- packageStatusOverview$inst[allPackages.IDs, colIDs] + + return(allPackages.df) +} + +writeEC <- function (fh, version, packages, repos, toolchain.name, toolchain.version) { + + writeLines(" +# +# This EasyBuild config file for R was generated with generateEasyConfig.R +# +", fh) + writeLines("name = 'R'", fh) + writeLines(paste("version = '", version, "'", sep=''), fh) + writeLines("homepage = 'http://www.r-project.org/'", fh) + writeLines('description = """R is a free software environment for statistical computing and graphics."""', fh) + writeLines("moduleclass = 'lang'", fh) + this.line = paste("toolchain = {'name': '", toolchain.name, "', 'version': '", toolchain.version, "'}", sep='') + writeLines(this.line, fh) + writeLines(" +sources = [SOURCE_TAR_GZ] +source_urls = ['http://cran.us.r-project.org/src/base/R-%(version_major)s'] + +# +# Configure options. +# +# NOTE: LAPACK support is built into BLAS, which will be detected correctly when LAPACK_LIBS is *not* specified. +# The summary at the end of the configure output should contain: +# External libraries: ...., BLAS(OpenBLAS), LAPACK(in blas), .... +# +#preconfigopts = 'BLAS_LIBS=\"$LIBBLAS\" LAPACK_LIBS=\"$LIBLAPACK\"' +preconfigopts = 'BLAS_LIBS=\"$LIBBLAS\"' +configopts = '--with-lapack --with-blas --with-pic --enable-threads --with-x=no --enable-R-shlib' +configopts += ' --with-tcl-config=$EBROOTTCL/lib/tclConfig.sh --with-tk-config=$EBROOTTK/lib/tkConfig.sh ' + +# +# Enable graphics capabilities for plotting. +# +configopts += ' --with-cairo --with-libpng --with-jpeglib --with-libtiff' +# +# Some recommended packages may fail in a parallel build (e.g. Matrix) and we're installing them anyway below. +# +configopts += ' --with-recommended-packages=no' + +# +# You may need to include a more recent Python to download R packages from HTTPS based URLs +# when the Python that comes with your OS is too old and you encounter: +# SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure +# In that case make sure to include a Python as builddependency. +# This Python should not be too new: it's dependencies like for example on ncursus should be compatible with R's dependencies. +# For example Python 2.7.11 is too new as it requires ncurses 6.0 whereas our R requires ncurses 5.9. +# The alternative is to replace the https URLs with http URLs in the generated EasyConfig. +# +#builddependencies = [ +# ('Python', '2.7.10') +#] + +dependencies = [ + ('libreadline', '6.3'), + ('ncurses', '5.9'), + ('bzip2', '1.0.6'), + ('XZ', '5.2.2'), + ('libpng', '1.6.29'), # For plotting in R; requires patched libpng.pc.in to add missing zlib dependency. + ('libjpeg-turbo', '1.4.2'), # For plotting in R + ('LibTIFF', '4.0.4'), # For plotting in R + ('Tcl', '8.6.4'), # For Tcl/Tk + ('Tk', '8.6.4', '-no-X11'), # For Tcl/Tk + ('cURL', '7.47.1'), # For RCurl + ('libxml2', '2.9.2'), # For XML + ('cairo', '1.14.10'), # For plotting in R + ('Pango', '1.40.6'), # For plotting in R. + ('Java', '1.8.0_45', '', True), # Java bindings are built if Java is found, might as well provide it. + ('PCRE', '8.38'), # For rphast package. + ('GMP', '6.1.1'), # for igraph +] + +package_name_tmpl = '%(name)s_%(version)s.tar.gz' +", fh) + + for (this.repo in names(repos)) { + writeLines(paste(this.repo, '_options = {', sep=''), fh) + writeLines(" 'source_urls': [", fh) + forget.this = lapply(unlist(repos[this.repo]), + function(url) { + # + # Switch any https URLs to insecure http. + # If you do want to use https make sure you have a recent Python in your build environment. + # See also note on builddependencies above... + # + url = sub('https:', 'http:', url) + writeLines(sprintf(" '%s',", url), fh) + } + ) + writeLines(" ], + 'source_tmpl': package_name_tmpl, +} +", fh) + } + + writeLines(" +# +# R package list. +# * Order of packages is important! +# * Packages should be specified with fixed versions! +# +exts_list = [ + # + # Default libraries; only here to sanity check their presence. + #", fh) + + forget.this = lapply(unlist(subset(packages, Repo == 'base')$Package), function(pkg) {writeLines(sprintf(" '%s',", pkg), fh)}) + + writeLines(" # + # Other packages. + #", fh) + + forget.this = apply(subset(packages, Repo != 'base', select=c('Package', 'Version', 'Repo')), 1, + function(this.pkg) { + this.pkg <- as.list(this.pkg); + writeLines(sprintf(" ('%s', '%s', %s_options),", this.pkg$Package, this.pkg$Version, this.pkg$Repo), fh) + } + ) + writeLines(']',fh) + +} + +# +## +### Main. +## +# + +# +# Read script arguments +# +cargs <- commandArgs(TRUE) +args=NULL +if(length(cargs)>0) { + flags = grep("^--.*",cargs) + values = (1:length(cargs))[-flags] + args[values-1] = cargs[values] + if(length(args) 0) { + lapply(as.list(repolessPackages)$Package, function(repolessPackage) { + logging::levellog(loglevels[['WARN']], paste('Failed to determine repo origin for package', repolessPackage, '.')) + } + ) + if (packagesUnavailable > 1) { + logging::levellog(loglevels[['WARN']], paste('Failed to determine repo origin for', packagesUnavailable, 'packages!')) + } else { + logging::levellog(loglevels[['WARN']], paste('Failed to determine repo origin for', packagesUnavailable, 'package!')) + } +} + +# +# Report R package stats. +# +nsmall = 2 +numberwidth = floor(log(packagesTotal,10))+1 +logging::levellog(loglevels[['INFO']], paste('=======================================================================', paste(rep('=', numberwidth), collapse=''), sep='')) +logging::levellog(loglevels[['INFO']], paste(': Total R packages processed: ', format(packagesTotal, width = numberwidth), sep='')) +this.percentage = round(packagesResolved / packagesTotal * 100, 2) +logging::levellog(loglevels[['INFO']], paste(': * Resolved packages (will be added to EasyConfig): ', format(packagesResolved, width = numberwidth), ' (', format(this.percentage, width = 6, nsmall = nsmall), '%)', sep='')) +this.percentage = round(packagesUnavailable / packagesTotal * 100, 2) +logging::levellog(loglevels[['INFO']], paste(': * Unavailable packages (missing from EasyConfig): ', format(packagesUnavailable, width = numberwidth), ' (', format(this.percentage, width = 6, nsmall = nsmall), '%)', sep='')) +logging::levellog(loglevels[['INFO']], paste('=======================================================================', paste(rep('=', numberwidth), collapse=''), sep='')) + +# +# Create EasyBuild EasyConfig +# +writeEC(fh, R.version.full, installedPackages, repos, toolchain.name, toolchain.version) + +# +# Close file handle. +# +close(fh) + +# +# We are done! +# +logging::levellog(loglevels[['INFO']], 'Finished!') + diff --git a/bin/hpc-environment-sync.bash b/bin/hpc-environment-sync.bash new file mode 100755 index 0000000..7e28e65 --- /dev/null +++ b/bin/hpc-environment-sync.bash @@ -0,0 +1,623 @@ +#!/bin/bash +# +# Script for syncing software and reference data sets +# from primary install/deploy location (${SOURCE_ROOT_PATH}) (/apps/... in file system layout below) +# to tmp file systems (${DESTINATION_MOUNT_POINT_PARENTS[@]}) (/.envsync/... in file system layout below) +# in the UMCG cluster environment. +# +# +## +### Global shared HPC environment file system layout: +## +# +# /apps/software/${package}/${version}/ Software deployed with EasyBuild. +# +# /modules/all/${package}/${version}/ Module files for use with Lmod to modify environment to load/unload software deployed with EasyBuild. +# +# /sources/[a-z]/${package}/ Source code downloaded by EasyBuild. +# +# /data/${provider}/${data_set}/$version/ Reference data sets available to all (Hence not group specific data). +# E.g. the human reference genome. +# Data is unmodified "as is". +# +# /data/${provider}/${data_set}/$version/${package}/${version}/ Reference data indexed / reformatted for use with specific version of software. +# +# /.envsync/tmp*/apps/ Rsynced copies of /apps on various HP tmp file systems. +# +# /groups/${group}/arc*/ Group specific folder for archived data: slow, cheap, shared, with backups. +# /prm*/ Group specific folder for permanent data: slow, expensive, shared, with backups. +# /tmp*/ Group specific folder for temporary data: fast, expensive, shared, without backups. +# /scr*/ Group specific folder for scratch data: fast, expensive, local, without backups. +# +# /home/${user}/ Individual home dirs. +# + +# +## +### Functions. +## +# +function showHelp() { + # + # Display commandline help on STDOUT. + # + cat < /dev/null) || true + local ERROR_MESSAGE=${ERROR_MESSAGE:-Unknown error.} + local errorMessage=${3:-"${ERROR_MESSAGE}"} + local LOG2STDERR=${LOG2STDERR:-1} + local ROLE_USER="$(whoami)" + local REAL_USER="$(logname)" + if [ "${LOG2STDERR}" -eq 0 ]; then + LOG2STDERR=' ' + else + LOG2STDERR='-s' + fi + local SOURCE="${RSYNC_SOURCES[@]:-"${SOURCE_ROOT_PATH}"}" + local DESTINATION="${RSYNC_DESTINATION:-"${DESTINATION_MOUNT_POINT_PARENTS[@]}"}" + local DETAILED_LOGS="$(cat ${RSYNC_LOG} 2> /dev/null)" || true + # + # Notify syslog. + # + logger ${LOG2STDERR} "$(hostname) - ${SCRIPT_NAME}:${PROBLEMATIC_LINE}: FATAL: rsync of ${SOURCE} to ${DESTINATION} by ${ROLE_USER}(${REAL_USER}) FAILED!" + logger ${LOG2STDERR} "$(hostname) - ${SCRIPT_NAME}:${PROBLEMATIC_LINE}: Exit code = ${exit_status}" + logger ${LOG2STDERR} "$(hostname) - ${SCRIPT_NAME}:${PROBLEMATIC_LINE}: Error message = ${errorMessage}" + logger ${LOG2STDERR} "$(hostname) - ${SCRIPT_NAME}:${PROBLEMATIC_LINE}: Details = ${DETAILED_LOGS:-none.}" + # + # Notify admins by e-mail only if this is an automated (cron) sync job (running in "DUMB" pseudo terminal). + # + if [ ${TERM} == 'dumb' ]; then + echo " +Dear ${SYS_GROUP} group, + +It is I, the ${SCRIPT_NAME} script executing on $(hostname) by ${ROLE_USER} (${REAL_USER}). +I gave up at line ${PROBLEMATIC_LINE} and your rsync of ${SOURCE} to ${DESTINATION} FAILED miserably! +The exit code of the last command was ${exit_status} with error message ${errorMessage}. +Further details follow below if available... +Please fix either me, $(hostname) or ${ROLE_USER} (${REAL_USER}), whichever is broken... + +Morituri te salutant! + +=============================================================================== +${DETAILED_LOGS:-} +" | mail -s "rsync of ${SOURCE} to ${DESTINATION} FAILED!" \ + -r "${EMAIL_FROM}" \ + "${EMAIL_TO}" \ + || logger ${LOG2STDERR} "$(hostname) - ${SCRIPT_NAME}:${LINENO}: FATAL: Could not send email." + fi + # + # Clean up. + # + rm -Rf ${TMP_DIR} + # + # Reset trap and exit. + # + trap - EXIT + exit $exit_status +} + +# +# Perform the rsync for all sources that need to be synced to all destinations. +# +function performSync() { + cd ${SOURCE_ROOT_PATH} + for (( i = 0 ; i < ${#RSYNC_SOURCES[@]:-0} ; i++ )) + do + for (( j = 0 ; j < ${#AVAILABLE_DESTINATION_ROOT_DIRS[@]:-0} ; j++ )) + do + RSYNC_SOURCE="${RSYNC_SOURCES[${i}]}" + RSYNC_DESTINATION="${AVAILABLE_DESTINATION_ROOT_DIRS[${j}]}" + echo "INFO: Rsyncing ${RSYNC_SOURCE} to ${RSYNC_DESTINATION}..." + if [ "${LIST}" -eq 1 ]; then + echo '================================================================================================================' >> "${RSYNC_LOG}" + echo " Dry run stats for syncing ${RSYNC_SOURCE} to ${RSYNC_DESTINATION}:" >> "${RSYNC_LOG}" + echo '================================================================================================================' >> "${RSYNC_LOG}" + fi + set +e + rsync ${RSYNC_OPTIONS} \ + "${RSYNC_SOURCE}" \ + "${RSYNC_DESTINATION}" \ + >> "${RSYNC_LOG}" 2>&1 + XVAL=$? + set -e + if [[ ${XVAL} -ne 0 && ${XVAL} -ne 24 ]]; then + reportError ${LINENO} ${XVAL} "Rsync of source (${RSYNC_SOURCE}) to destination (${RSYNC_DESTINATION}) started on ${START_TS} failed." + fi + done + done +} + +function createConfigTemplate () { + (cat > "${SCRIPT_CONFIG}.template" < ${TMP_LOG} + +# +## +### Process commandline arguments. +## +# + +# +# Get commandline arguments. +# +ALL=0 +REFDATA=0 +MODULE=0 +LIST=0 +SOURCE='' +while getopts ":halr:m:" opt; do + case $opt in + h) + showHelp + ;; + a) + ALL=1 + ;; + l) + LIST=1 + ;; + r) + REFDATA=1 + SOURCE="${OPTARG}" + ;; + m) + MODULE=1 + SOURCE="${OPTARG}" + ;; + \?) + reportError ${LINENO} '1' "Invalid option -${OPTARG}. Try \"$(basename $0) -h\" for help." + ;; + :) + reportError ${LINENO} '1' "Option -${OPTARG} requires an argument. Try \"$(basename $0) -h\" for help." + ;; + esac +done + +# +# Make sure there are no extra arguments we did not expect nor need. +# +shift $(($OPTIND - 1)) +if [ ! -z ${1:-} ]; then + reportError ${LINENO} '1' "Invalid argument \"$1\". Try \"$(basename $0) -h\" for help." +fi + +# +# Check commandline arguments. +# +ARG_SUM=$((${ALL}+${REFDATA}+${MODULE})) + +if [ "${ARG_SUM}" -eq 0 ]; then + # + # No commandline arguments specified. + # + showHelp +elif [ "${ARG_SUM}" -gt 1 ]; then + reportError ${LINENO} '1' "Too many mutually exclusive arguments specified. Try \"$(basename $0) -h\" for help." +fi + +# +## +### Create ToDo list. +## +# +declare -a RSYNC_SOURCES=() +if [ ${ALL} -eq 1 ]; then + # + # Add all applications, their modules and reference data to the list of data to rsync. + # + # Note: basically this includes everything except for the sources, which we don't need on cluster nodes. + # + RSYNC_SOURCES+=(${SOFTWARE_DIR_NAME}) + RSYNC_SOURCES+=(${MODULES_DIR_NAME}) + RSYNC_SOURCES+=(${REFDATA_DIR_NAME}) +elif [ ${REFDATA} -eq 1 ]; then + # + # Remove leading ${SOURCE_ROOT_PATH}/data/ from SOURCE if an absolute path was specified. + # + SOURCE="$(echo ${SOURCE} | sed "s|^${SOURCE_ROOT_PATH}/*${REFDATA_DIR_NAME}/*||")" + # + # Find and add only specified reference data to list of data to rsync. + # + cd "${SOURCE_ROOT_PATH}/" 2> ${TMP_LOG} || reportError ${LINENO} $? + if [ -e "${SOURCE_ROOT_PATH}/${REFDATA_DIR_NAME}/${SOURCE}" ]; then + echo "INFO: Found reference data ${SOURCE}." + else + reportError ${LINENO} $? "Cannot find reference data ${SOURCE} in ${SOURCE_ROOT_PATH}/${REFDATA_DIR_NAME}/." + fi + # Create list of RSYNC SOURCES + RSYNC_SOURCES+=("${REFDATA_DIR_NAME}/${SOURCE}") +elif [ ${MODULE} -eq 1 ]; then + # + # Find and add only specified module to list of data to rsync. + # + MODULE_SPEC=(${SOURCE//\// }) || reportError ${LINENO} $? + if [ ${#MODULE_SPEC[@]} -ne 2 ]; then + reportError ${LINENO} $? "Illegal module specification ${SOURCE}. Module must be specified in format MODULE_NAME/MODULE_VERSION." + fi + MODULE_NAME=${MODULE_SPEC[0]} + #echo "BEDUG: MODULE_NAME = ${MODULE_NAME}" + MODULE_VERSION=${MODULE_SPEC[1]} + #echo "DEBUG: MODULE_VERSION = ${MODULE_VERSION}" + if [[ ${MODULE_NAME} == 'ANY' && ${MODULE_VERSION} == 'ANY' ]]; then + # + # Add all applications and their modules to the list of data to rsync. + # + RSYNC_SOURCES+=(${SOFTWARE_DIR_NAME}) + RSYNC_SOURCES+=(${MODULES_DIR_NAME}) + else + # + # Find module: Lmod modules may be present in multiple "category" sub dirs. + # Usually the module file is present only once in the "all" category + # and present in one or more other categories as symlink to the one in "all". + # + # Lmod module files can be in + # * either TCL format for backward compatibility (module file without extension) + # * or Lua format (with *.lua extension). + # + cd "${SOURCE_ROOT_PATH}/${MODULES_DIR_NAME}/" 2> ${TMP_LOG} || reportError ${LINENO} $? + VERSIONED_MODULE_COUNT=$(ls -1 */${MODULE_NAME}/* | grep -c "/${MODULE_NAME}/${MODULE_VERSION}\(\.lua\)\?$") || reportError ${LINENO} $? "Cannot search for module ${MODULE_NAME}/${MODULE_VERSION} in ${SOURCE_ROOT_PATH}/${MODULES_DIR_NAME}/." + declare -a VERSIONED_MODULES=() + if [ ${VERSIONED_MODULE_COUNT} -ge 1 ]; then + VERSIONED_MODULES+=$(ls -1 */${MODULE_NAME}/* | grep "/${MODULE_NAME}/${MODULE_VERSION}\(\.lua\)\?$") + echo "INFO: Found module file(s) for ${SOURCE}." + else + reportError ${LINENO} '1' "Cannot find module ${MODULE_NAME}/${MODULE_VERSION} in ${SOURCE_ROOT_PATH}/${MODULES_DIR_NAME}/." + fi + # + # Find dir where software is installed for this module. + # + if [ -d ${SOURCE_ROOT_PATH}/${SOFTWARE_DIR_NAME}/${MODULE_NAME}/${MODULE_VERSION} ]; then + # + # Create list of RSYNC SOURCES. + # + for VERSIONED_MODULE in ${VERSIONED_MODULES[@]}; do + RSYNC_SOURCES+=("${MODULES_DIR_NAME}/${VERSIONED_MODULE}") + echo "DEBUG: Appended ${MODULES_DIR_NAME}/${VERSIONED_MODULE} to RSYNC_SOURCES." + done + RSYNC_SOURCES+=("${SOFTWARE_DIR_NAME}/${MODULE_NAME}/${MODULE_VERSION}") + echo "DEBUG: Appended ${SOFTWARE_DIR_NAME}/${MODULE_NAME}/${MODULE_VERSION} to RSYNC_SOURCES." + else + reportError ${LINENO} '1' "Cannot find software dir ${MODULE_NAME}/${MODULE_VERSION} in ${SOURCE_ROOT_PATH}/${SOFTWARE_DIR_NAME}/." + fi + fi +fi + +echo "INFO: RSYNC_SOURCES contains ${RSYNC_SOURCES[@]}" + +# +# Define rsync options. +# +# Fairly standard RSYNC_OPTIONS='-avRK' +# where -a = archive mode = -rlptgoD. +# We don't sync ownership of the files. +# Instead all secondary copies on the destinations are owned by ${SYS_USER}. +# +RSYNC_OPTIONS='-rlptgDvRK' +# +# We don't sync permissions and change them explicitly. +# +RSYNC_OPTIONS="${RSYNC_OPTIONS} --perms --chmod=u=rwX,go=rX" +if [ "${DELETE_OLD}" -eq 1 ]; then + echo "WARN: Cleanup of outdated ${SOURCE_ROOT_PATH} data is enabled for ${DESTINATION_MOUNT_POINT_PARENTS[@]}." + RSYNC_OPTIONS="${RSYNC_OPTIONS} --delete-after" +fi +if [ "${LIST}" -eq 1 ]; then + echo 'WARN: List mode enabled: will only list what is out of sync and needs to be updated, but will not perform actual sync.' + RSYNC_OPTIONS="${RSYNC_OPTIONS} -nu" +else + RSYNC_OPTIONS="${RSYNC_OPTIONS} -q" +fi + +echo "INFO: RSYNC_OPTIONS contains ${RSYNC_OPTIONS}" + +# +## +### Check environment. +## +# + +# +# Check if we are running with the correct account + permissions. +# +CURRENT_USER=$(whoami) +if [ ${CURRENT_USER} != ${SYS_USER} ]; then + reportError ${LINENO} '1' "This script must be executed by user ${SYS_USER}, but you are ${CURRENT_USER}." +fi +CURRENT_GROUP=$(id -gn) +if [ ${CURRENT_GROUP} != ${SYS_GROUP} ]; then + reportError ${LINENO} '1' "This script must be executed by user ${SYS_USER} with primary group ${SYS_GROUP}, but your current primary group is ${CURRENT_GROUP}." +fi + +# +# Update Lmod cache. +# +UPDATE_LMOD_CACHE=$(which update_lmod_system_cache_files 2> /dev/null || echo 'missing') + +if [ "${LMOD_VERSION%%.*}" -gt 6 ] +then + lmod_modulepath=${MODULEPATH} + echo "INFO: found an lmod version higher than 6 (${LMOD_VERSION}), modulepath will be MODULEPATH" +else + lmod_modulepath=${LMOD_DEFAULT_MODULEPATH} + echo "INFO: found an lmod version of 6 or lower (${LMOD_VERSION}), modulepath will be LMOD_DEFAULT_MODULEPATH" + +fi + +if [ -x ${UPDATE_LMOD_CACHE} ]; then + echo -n 'INFO: Updating Lmod cache... ' + ${UPDATE_LMOD_CACHE} -d ${LMOD_CACHE_DIR} \ + -t ${LMOD_TIMESTAMP_FILE} \ + ${lmod_modulepath} \ + 2> ${TMP_LOG} || reportError ${LINENO} $? + echo 'done!' +else + echo 'FAILED' + reportError ${LINENO} '1' 'update_lmod_system_cache_files missing or not executable; Cannot update Lmod cache: Giving up!' +fi + +# +# Recursively fix group + permissions on SOURCE (should not be necessary, but just in case :)) +# +cd ${SOURCE_ROOT_PATH} +for (( i = 0 ; i < ${#RSYNC_SOURCES[@]:-0} ; i++ )) +do + echo "INFO: Trying to fix group and permissions on ${SOURCE_ROOT_PATH}${RSYNC_SOURCES[${i}]} recursively before sync." + echo ' Should not be necessary, but just in case...' + echo " This may fail (depending on current group and permissions) if user '${SYS_USER}' does not own the files/folders." + # + # We use find to try to fix group + perms only when they are not correct. + # This prevents permission denied errors when there is no need to change group or perms and we do not own the files/folders. + # + find "${RSYNC_SOURCES[${i}]}" \! -group "${SYS_GROUP}" -exec chgrp "${SYS_GROUP}" '{}' \; 2> ${TMP_LOG} || reportError ${LINENO} $? + find "${RSYNC_SOURCES[${i}]}" \! -type d -a \! \( -perm ${SYS_FILE_PERMS_EXECUTABLE} -o -perm ${SYS_FILE_PERMS_REGULAR} \) -exec chmod "${SYS_FILE_PERMS_CHMOD}" '{}' \; 2> ${TMP_LOG} || reportError ${LINENO} $? + find "${RSYNC_SOURCES[${i}]}" -type d -a \! -perm ${SYS_FOLDER_PERMS} -exec chmod "${SYS_FOLDER_PERMS}" '{}' \; 2> ${TMP_LOG} || reportError ${LINENO} $? +done + +# +# Check if all destinations are available and remove destinations, which are offline! +# +# This is critically essential as syncing to a mount point with missing mount would add the data to the disk containing the mount point, +# which is usually a relatively small disk containing the OS. Running out of space on the local system disk, will crash a server! +# +declare -a AVAILABLE_DESTINATION_ROOT_DIRS +for (( i = 0 ; i < ${#DESTINATION_MOUNT_POINT_PARENTS[@]:-0} ; i++ )) +do + # + # Check for presence of folders for logical file system (LFS) names + # and if present whether they contain a copy of ${SOURCE_ROOT_PATH}. + # + declare -a LFS_MOUNT_POINTS="$(find ${DESTINATION_MOUNT_POINT_PARENTS[${i}]} -mindepth 1 -maxdepth 1 -type d)" + for (( j = 0 ; j < ${#LFS_MOUNT_POINTS[@]:-0} ; j++ )) + do + DESTINATION_ROOT_DIR="${LFS_MOUNT_POINTS[${j}]}${SOURCE_ROOT_PATH}" + if [ -e ${DESTINATION_ROOT_DIR} ] && \ + [ -r ${DESTINATION_ROOT_DIR} ] && \ + [ -w ${DESTINATION_ROOT_DIR} ]; then + if [ "${#AVAILABLE_DESTINATION_ROOT_DIRS[@]:-0}" -eq 0 ]; then + AVAILABLE_DESTINATION_ROOT_DIRS=("${DESTINATION_ROOT_DIR}") + else + AVAILABLE_DESTINATION_ROOT_DIRS=("${AVAILABLE_DESTINATION_ROOT_DIRS[@]:-}" "${DESTINATION_ROOT_DIR}") + fi + else + echo "WARN: ${DESTINATION_ROOT_DIR} not available (symlink dead or mount missing). Skipping rsync to ${DESTINATION_ROOT_DIR}." + fi + done +done + +# +## +### Rsync. +## +# +if [ "${#AVAILABLE_DESTINATION_ROOT_DIRS[@]:-0}" -gt 0 ]; then + echo "INFO: AVAILABLE_DESTINATION_ROOT_DIRS contains ${AVAILABLE_DESTINATION_ROOT_DIRS[@]}" + # + # Perform the rsync for all sources that need to be synced to all destinations. + # + performSync +else + echo "WARN: None of the destinations is available: skipping rsync!" +fi + +# +## +### Sanity check. +## +# + +# +# Parse log: rsync log should exist and should be empty. +# +if [ "${LIST}" -eq 1 ]; then + cat "${RSYNC_LOG}" || reportError ${LINENO} $? "Listing differences between sources (${RSYNC_SOURCES[@]}) and destinations (${AVAILABLE_DESTINATION_ROOT_DIRS[@]}) started on ${START_TS} failed: cannot display ${RSYNC_LOG} contents!" +elif [[ ! -f "${RSYNC_LOG}" || -s "${RSYNC_LOG}" ]]; then + reportError ${LINENO} $? "Rsync of sources (${RSYNC_SOURCES[@]}) to destinations (${AVAILABLE_DESTINATION_ROOT_DIRS[@]}) started on ${START_TS} failed: error log not empty!" +fi + +# +# Cleanup. +# +if [ -e "${TMP_DIR}" ]; then + (rm -f "${TMP_LOG}" ; rm -f "${RSYNC_LOG}" ; rmdir "${TMP_DIR}") || reportError ${LINENO} $? "Cannot cleanup tmp dir ${TMP_DIR}." +fi + +# +# Signal success. +# +echo "INFO: Finished successfully." + +# +# Reset trap and exit. +# +trap - EXIT +exit 0 diff --git a/bin/hpc-environment-sync.cfg b/bin/hpc-environment-sync.cfg new file mode 100644 index 0000000..98a257c --- /dev/null +++ b/bin/hpc-environment-sync.cfg @@ -0,0 +1,57 @@ + +########################################################## +# Configuration file for the hpc-environment-sync script. +# +# * Listing variables in bash syntax +# * To activate this config: +# * Edit this file and +# * Remove the .template suffix from the filename +# +########################################################## + +# +# System account, group used for the rsync. +# * Group on SOURCE will be recursively changed to this one before sync. +# +SYS_USER='umcg-envsync' +SYS_GROUP='umcg-depad' +# +# Perms for environment on SOURCE +# * These permissions will be applied recursively on SOURCE before sync. +# * These are NOT the permissions applied to the DESTINATION (those are controlled by rsync options.) +# +SYS_FILE_PERMS_EXECUTABLE='0775' +SYS_FILE_PERMS_REGULAR='0664' +SYS_FILE_PERMS_CHMOD='ug+rwX,o+rX,o-w' +SYS_FOLDER_PERMS='2775' + +# +# Original location where we deployed our software, their modules and reference data. +# +SOURCE_ROOT_PATH='/apps/' +SOFTWARE_DIR_NAME='software' +MODULES_DIR_NAME='modules' +REFDATA_DIR_NAME='data' + +# +# Locations of env* file system mount points, where we want a copy of our deployed tools + resources. +# +declare -a DESTINATION_MOUNT_POINT_PARENTS=('/mnt/') + +# +# Should the script delete old stuff in DESTINATION when it is no longer present in SOURCE? +# +DELETE_OLD=1 + +# +# Errors are always logged to syslog. +# Errors are in addition logged to STDERR (default). +# The latter can be disabled by setting LOG2STDERR to 0. +# +LOG2STDERR=1 + +# +# Email reporting of failures. +# +EMAIL_FROM='sysop.gcc.groningen@gmail.com' +EMAIL_TO='gcc-analysis@googlegroups.com' From 593d0c5a89310149f3529461b8d96214b0ef410c Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Jun 2019 11:15:58 +0200 Subject: [PATCH 3/6] Improved documentation of tools in README and minor updates. --- README.md | 98 ++++++++++++++++++++++++++++++++++++---- bin/generateEasyConfig.R | 2 +- 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ca6e217..18d8599 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,96 @@ Utilities for deploy admins. -## 1. How to use this repo and contribute +## 1. List of tools. + +### Repo layout. + +``` + depad-utils/ + |-- bin/: various scripts to manage deployment of software modules. + |-- .gitignore + |-- LICENSE + `-- README.md +``` + +### Tools. + +- [hpc-environment-sync.bash](#-hpc-environment-sync): Synchronize deployed software, modules and reference data from primary to a secondary location. +- [GetPerlModuleDepTreeFromCPAN.pl](#-GetPerlModuleDepTreeFromCPAN): Helper script for making EasyConfigs for Bundles of Perl modules. +- [generateEasyConfig.R](#-generateEasyConfig): Helper script for making EasyConfigs for Bundles of R packages. + +#### hpc-environment-sync.bash + +Use the ```hpc-environment-sync.cfg``` config file in the same location as the script to configure various defaults. + +``` +Usage: + + hpc-environment-sync.bash [-l] -a + hpc-environment-sync.bash [-l] -r relative/path/to/ReferenceData/ + hpc-environment-sync.bash [-l] -m ModuleName/ModuleVersion + +Details: + + -l List: Do not perform actual sync, but only list changes instead (dry-run). + + -a All: syncs complete HPC environment (software, modules & reference data) from /apps/. + + -r Reference data: syncs only the specified data. + Path may be either an absolute path or relative to ${SOURCE_ROOT_PATH}/${REFDATA_DIR_NAME} as specified in hpc-environment-sync.cfg. + + -m Module: syncs only the specified module. + The tool must have been deployed with EasyBuild, with accompanying "module" file + and specified using NAME/VERSION as per "module" command syntax. + Will search for modules in ${SOURCE_ROOT_PATH}/${MODULES_DIR_NAME} as specified in hpc-environment-sync.cfg. + for software installed in ${SOURCE_ROOT_PATH}/${SOFTWARE_DIR_NAME} as specified in hpc-environment-sync.cfg. + The special NAME/VERSION combination ANY/ANY will sync all modules. +``` + +#### generateEasyConfig.R + +``` +Description: + Generates an EasyBuild EasyConfig file from an existing R environment. + Optionally you can first load a specific version of R using module load before generating the *.eb EasyConfig + +Example usage: + module load EasyBuild + module load R + generateEasyConfig.R --tc foss/2018b \ + --od /path/to/my/EasyConfigs/r/R/ \ + --ll WARNING + +Explanation of options: + --tc toolchain/version EasyBuild ToolChain (required). + To get a list of available toolchains (may or may not be already installed): + module load EasyBuild + eb --list-toolchains + To check if a toolchain is already installed and if yes which version is the default: + module -r -t avail -d '^name_of_toolchain$' + --od path Output Directory where the generated *.eb EasyConfig file will be stored (optional). + Will default to the current working directory as determined with getwd(). + Name of the output file follows strict rules + and is automatically generated based on R version and toolchain. + --ll LEVEL Log level (optional). + One of FINEST, FINER, FINE, DEBUG, INFO (default), WARNING, ERROR or CRITICAL. +``` + +#### GetPerlModuleDepTreeFromCPAN.pl + +``` +Usage: + + GetPerlModuleDepTreeFromCPAN.pl options + +Available options are: + + -pm '[PM]' Quoted and space sperated list of Perl Modules. E.g. 'My::SPPACE::Seperated List::Of::Modules' + -of [format] Output Format. One of: list or eb ("exts_list" format for including in an EasyBuild Bundle easyconfig.") + -ll [LEVEL] Log4perl Log Level. One of: ALL, TRACE, DEBUG, INFO (default), WARN, ERROR, FATAL or OFF. +``` + +## 2. How to use this repo and contribute We use a standard GitHub workflow except that we use only one branch "*master*" as this is a relatively small repo and we don't need the additional overhead from branches. ``` @@ -77,10 +166,3 @@ We use a standard GitHub workflow except that we use only one branch "*master*" git pull blessed master ``` etc. - -## 2. Main sections / topics - -``` - depad-utils/ - `-- bin/: various scripts to manage deployment of software modules. -``` \ No newline at end of file diff --git a/bin/generateEasyConfig.R b/bin/generateEasyConfig.R index 5b89db9..58ca3c5 100755 --- a/bin/generateEasyConfig.R +++ b/bin/generateEasyConfig.R @@ -38,7 +38,7 @@ Description: Example usage: module load EasyBuild module load R - generateEasyConfig.R --tc goolf/1.7.20 \\ + generateEasyConfig.R --tc foss/2018b \\ --od /path/to/my/EasyConfigs/r/R/ \\ --ll WARNING From e760aebec24701a724e5a1b4ce4132cc2331e530 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Jun 2019 11:28:35 +0200 Subject: [PATCH 4/6] Fixed anchors for relative URLs. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 18d8599..defa31c 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,11 @@ Utilities for deploy admins. ### Tools. -- [hpc-environment-sync.bash](#-hpc-environment-sync): Synchronize deployed software, modules and reference data from primary to a secondary location. -- [GetPerlModuleDepTreeFromCPAN.pl](#-GetPerlModuleDepTreeFromCPAN): Helper script for making EasyConfigs for Bundles of Perl modules. -- [generateEasyConfig.R](#-generateEasyConfig): Helper script for making EasyConfigs for Bundles of R packages. +- [hpc-environment-sync.bash](#-hpc-environment-syncbash): Synchronize deployed software, modules and reference data from primary to a secondary location. +- [generateEasyConfig.R](#-generateeasyconfigr): Helper script for making EasyConfigs for Bundles of R packages. +- [GetPerlModuleDepTreeFromCPAN.pl](#-getperlmodulepeptreefromdpanpl): Helper script for making EasyConfigs for Bundles of Perl modules. -#### hpc-environment-sync.bash +#### hpc-environment-sync.bash Use the ```hpc-environment-sync.cfg``` config file in the same location as the script to configure various defaults. @@ -48,7 +48,7 @@ Details: The special NAME/VERSION combination ANY/ANY will sync all modules. ``` -#### generateEasyConfig.R +#### generateEasyConfig.R ``` Description: @@ -77,7 +77,7 @@ Explanation of options: One of FINEST, FINER, FINE, DEBUG, INFO (default), WARNING, ERROR or CRITICAL. ``` -#### GetPerlModuleDepTreeFromCPAN.pl +#### GetPerlModuleDepTreeFromCPAN.pl ``` Usage: From 414e9ceab4276f4c886cdcecaf946d7dfac25524 Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Jun 2019 11:30:04 +0200 Subject: [PATCH 5/6] Fixed anchors for relative URLs. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index defa31c..b4ccb40 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Utilities for deploy admins. - [hpc-environment-sync.bash](#-hpc-environment-syncbash): Synchronize deployed software, modules and reference data from primary to a secondary location. - [generateEasyConfig.R](#-generateeasyconfigr): Helper script for making EasyConfigs for Bundles of R packages. -- [GetPerlModuleDepTreeFromCPAN.pl](#-getperlmodulepeptreefromdpanpl): Helper script for making EasyConfigs for Bundles of Perl modules. +- [GetPerlModuleDepTreeFromCPAN.pl](#-getperlmodulepeptreefromcpanpl): Helper script for making EasyConfigs for Bundles of Perl modules. #### hpc-environment-sync.bash @@ -77,7 +77,7 @@ Explanation of options: One of FINEST, FINER, FINE, DEBUG, INFO (default), WARNING, ERROR or CRITICAL. ``` -#### GetPerlModuleDepTreeFromCPAN.pl +#### GetPerlModuleDepTreeFromCPAN.pl ``` Usage: From 0a78df70699d19b13182b37b43ad55934587a5dc Mon Sep 17 00:00:00 2001 From: pneerincx Date: Mon, 3 Jun 2019 11:31:15 +0200 Subject: [PATCH 6/6] Fixed anchors for relative URLs. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b4ccb40..511de14 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Utilities for deploy admins. - [hpc-environment-sync.bash](#-hpc-environment-syncbash): Synchronize deployed software, modules and reference data from primary to a secondary location. - [generateEasyConfig.R](#-generateeasyconfigr): Helper script for making EasyConfigs for Bundles of R packages. -- [GetPerlModuleDepTreeFromCPAN.pl](#-getperlmodulepeptreefromcpanpl): Helper script for making EasyConfigs for Bundles of Perl modules. +- [GetPerlModuleDepTreeFromCPAN.pl](#-getperlmoduledeptreefromcpanpl): Helper script for making EasyConfigs for Bundles of Perl modules. #### hpc-environment-sync.bash @@ -77,7 +77,7 @@ Explanation of options: One of FINEST, FINER, FINE, DEBUG, INFO (default), WARNING, ERROR or CRITICAL. ``` -#### GetPerlModuleDepTreeFromCPAN.pl +#### GetPerlModuleDepTreeFromCPAN.pl ``` Usage: