Skip to content

Commit

Permalink
Add subworkflow for merging per-sample SpatialData
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterius committed Nov 8, 2024
1 parent 822876d commit 846bdff
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
32 changes: 32 additions & 0 deletions bin/merge_sdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python

import argparse
import spatialdata

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Merge SpatialData objects")
parser.add_argument("files", nargs="+", help="List of SpatialData files to merge")
parser.add_argument("output", help="Output file name")
args = parser.parse_args()

# Read all zarr SpatialData folders
sdatas = []
for file in args.files:
sdata = spatialdata.read_zarr(file)
sdatas.append(sdata)

# Merge the data
output_sdata = spatialdata.concatenate(
sdatas,
region_key=None,
instance_key=None,
concatenate_tables=False,
obs_names_make_unique=True,
modify_tables_inplace=False,
)

# Save the concatenated data
output_sdata.write(
args.output,
overwrite=True
)
38 changes: 38 additions & 0 deletions modules/local/merge_sdata.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//
// Merge per-sample SpatialData into a single SpatialData
//
process MERGE_SDATA {

label 'process_low'
container "docker.io/erikfas/spatialvi"

input:
path(sdata, stageAs: "?/*")

output:
path("aggregated-sdata.zarr"), emit: sdata
path("versions.yml") , emit: versions

when:
task.ext.when == null || task.ext.when

script:
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
exit 1, "The MERGE_SDATA module does not support Conda/Mamba, please use Docker / Singularity / Podman instead."
}
"""
# Set environment variables
export XDG_CACHE_HOME="./.xdg_cache_home"
export XDG_DATA_HOME="./.xdg_data_home"
# Execute script
merge_sdata.py \\
${sdata} \\
aggregated-sdata.zarr
cat <<-END_VERSIONS > versions.yml
"${task.process}":
spatialdata_io: \$(python -c "import spatialdata_io; print(spatialdata_io.__version__)")
END_VERSIONS
"""
}
34 changes: 34 additions & 0 deletions subworkflows/local/aggregation.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//
// Subworkflow for aggregation of sample data
//

include { MERGE_SDATA } from '../../modules/local/merge_sdata'

workflow AGGREGATION {

take:
ch_sdata // Channel: [ meta, zarr ]

main:

ch_versions = Channel.empty()

//
// MODULE: Merge per-sample SpatialData objects into one
//
ch_sdata_files = ch_sdata
| map {
meta, zarr ->
return [zarr]
}
MERGE_SDATA (
ch_sdata_files.collect()
)
ch_versions = ch_versions.mix(MERGE_SDATA.out.versions)
ch_merged_sdata = MERGE_SDATA.out.sdata

emit:
merged_sdata = ch_merged_sdata // channel: [ aggregated-sdata.zarr ]
versions = ch_versions // channel: [ versions.yml ]

}
9 changes: 9 additions & 0 deletions workflows/spatialvi.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { SPACERANGER } from '../subworkflows/local/spaceranger'
include { DOWNSTREAM } from '../subworkflows/local/downstream'
include { AGGREGATION } from '../subworkflows/local/aggregation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { paramsSummaryMap } from 'plugin/nf-schema'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
Expand Down Expand Up @@ -83,6 +84,14 @@ workflow SPATIALVI {
)
ch_versions = ch_versions.mix(DOWNSTREAM.out.versions)

//
// SUBWORKFLOW: Sample aggregation (optional)
//
AGGREGATION (
DOWNSTREAM.out.svg_sdata
)
ch_versions = ch_versions.mix(AGGREGATION.out.versions)

//
// Collate and save software versions
//
Expand Down

0 comments on commit 846bdff

Please sign in to comment.