From 846bdff6ed61a0e746aec629cd9d1ee6cfd94297 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Fri, 8 Nov 2024 16:52:11 +0100 Subject: [PATCH] Add subworkflow for merging per-sample SpatialData --- bin/merge_sdata.py | 32 ++++++++++++++++++++++++++ modules/local/merge_sdata.nf | 38 +++++++++++++++++++++++++++++++ subworkflows/local/aggregation.nf | 34 +++++++++++++++++++++++++++ workflows/spatialvi.nf | 9 ++++++++ 4 files changed, 113 insertions(+) create mode 100755 bin/merge_sdata.py create mode 100644 modules/local/merge_sdata.nf create mode 100644 subworkflows/local/aggregation.nf diff --git a/bin/merge_sdata.py b/bin/merge_sdata.py new file mode 100755 index 0000000..49168bd --- /dev/null +++ b/bin/merge_sdata.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +import argparse +import spatialdata + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Merge SpatialData objects") + parser.add_argument("files", nargs="+", help="List of SpatialData files to merge") + parser.add_argument("output", help="Output file name") + args = parser.parse_args() + + # Read all zarr SpatialData folders + sdatas = [] + for file in args.files: + sdata = spatialdata.read_zarr(file) + sdatas.append(sdata) + + # Merge the data + output_sdata = spatialdata.concatenate( + sdatas, + region_key=None, + instance_key=None, + concatenate_tables=False, + obs_names_make_unique=True, + modify_tables_inplace=False, + ) + + # Save the concatenated data + output_sdata.write( + args.output, + overwrite=True + ) diff --git a/modules/local/merge_sdata.nf b/modules/local/merge_sdata.nf new file mode 100644 index 0000000..fb4f756 --- /dev/null +++ b/modules/local/merge_sdata.nf @@ -0,0 +1,38 @@ +// +// Merge per-sample SpatialData into a single SpatialData +// +process MERGE_SDATA { + + label 'process_low' + container "docker.io/erikfas/spatialvi" + + input: + path(sdata, stageAs: "?/*") + + output: + path("aggregated-sdata.zarr"), emit: sdata + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "The MERGE_SDATA module does not support Conda/Mamba, please use Docker / Singularity / Podman instead." + } + """ + # Set environment variables + export XDG_CACHE_HOME="./.xdg_cache_home" + export XDG_DATA_HOME="./.xdg_data_home" + + # Execute script + merge_sdata.py \\ + ${sdata} \\ + aggregated-sdata.zarr + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spatialdata_io: \$(python -c "import spatialdata_io; print(spatialdata_io.__version__)") + END_VERSIONS + """ +} diff --git a/subworkflows/local/aggregation.nf b/subworkflows/local/aggregation.nf new file mode 100644 index 0000000..4a1ebf3 --- /dev/null +++ b/subworkflows/local/aggregation.nf @@ -0,0 +1,34 @@ +// +// Subworkflow for aggregation of sample data +// + +include { MERGE_SDATA } from '../../modules/local/merge_sdata' + +workflow AGGREGATION { + + take: + ch_sdata // Channel: [ meta, zarr ] + + main: + + ch_versions = Channel.empty() + + // + // MODULE: Merge per-sample SpatialData objects into one + // + ch_sdata_files = ch_sdata + | map { + meta, zarr -> + return [zarr] + } + MERGE_SDATA ( + ch_sdata_files.collect() + ) + ch_versions = ch_versions.mix(MERGE_SDATA.out.versions) + ch_merged_sdata = MERGE_SDATA.out.sdata + + emit: + merged_sdata = ch_merged_sdata // channel: [ aggregated-sdata.zarr ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/workflows/spatialvi.nf b/workflows/spatialvi.nf index b51044c..0f380e2 100644 --- a/workflows/spatialvi.nf +++ b/workflows/spatialvi.nf @@ -10,6 +10,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' include { INPUT_CHECK } from '../subworkflows/local/input_check' include { SPACERANGER } from '../subworkflows/local/spaceranger' include { DOWNSTREAM } from '../subworkflows/local/downstream' +include { AGGREGATION } from '../subworkflows/local/aggregation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { paramsSummaryMap } from 'plugin/nf-schema' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -83,6 +84,14 @@ workflow SPATIALVI { ) ch_versions = ch_versions.mix(DOWNSTREAM.out.versions) + // + // SUBWORKFLOW: Sample aggregation (optional) + // + AGGREGATION ( + DOWNSTREAM.out.svg_sdata + ) + ch_versions = ch_versions.mix(AGGREGATION.out.versions) + // // Collate and save software versions //