Standardize conversion workflow #8
GitHub Actions / JUnit Test Report
failed
Oct 30, 2024 in 0s
1 tests run, 0 passed, 0 skipped, 1 failed.
Annotations
github-actions / JUnit Test Report
nf.test-dataset_cellranger_aligner
Assertion failed:
11 of 13 assertions failed
Raw output
Nextflow stdout:
ERROR ~ Error executing process > 'NFCORE_SCRNASEQ:SCRNASEQ:MTX_CONVERSION:MTX_TO_H5AD (Sample_X)'
Caused by:
Process `NFCORE_SCRNASEQ:SCRNASEQ:MTX_CONVERSION:MTX_TO_H5AD (Sample_X)` terminated with an error exit status (1)
Command executed [/home/runner/work/scrnaseq/scrnaseq/./workflows/../subworkflows/local/../../modules/local/templates/mtx_to_h5ad_cellranger.py]:
#!/usr/bin/env python
# Set numba chache dir to current working directory (which is a writable mount also in containers)
import os
os.environ["NUMBA_CACHE_DIR"] = "."
import scanpy as sc
import pandas as pd
import argparse
from anndata import AnnData
import platform
def _mtx_to_adata(
input: str,
sample: str,
):
adata = sc.read_10x_h5(input)
adata.var["gene_symbols"] = adata.var_names
adata.var.set_index("gene_ids", inplace=True)
adata.obs["sample"] = sample
# reorder columns for 10x mtx files
adata.var = adata.var[["gene_symbols", "feature_types", "genome"]]
return adata
def format_yaml_like(data: dict, indent: int = 0) -> str:
"""Formats a dictionary to a YAML-like string.
Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.
Returns:
str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\n{format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\n"
return yaml_str
def dump_versions():
versions = {
"NFCORE_SCRNASEQ:SCRNASEQ:MTX_CONVERSION:MTX_TO_H5AD": {
"python": platform.python_version(),
"scanpy": sc.__version__,
"pandas": pd.__version__
}
}
with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
def input_to_adata(
input_data: str,
output: str,
sample: str,
):
print(f"Reading in {input_data}")
# open main data
adata = _mtx_to_adata(input_data, sample)
# standard format
# index are gene IDs and symbols are a column
adata.var['gene_versions'] = adata.var.index
adata.var.index = adata.var['gene_versions'].str.split('.').str[0]
adata.var_names_make_unique()
# write results
adata.write_h5ad(f"{output}", compression="gzip")
print(f"Wrote h5ad file to {output}")
# dump versions
dump_versions()
return adata
#
# Run main script
#
# create the directory with the sample name
os.makedirs("Sample_X", exist_ok=True)
# input_type comes from NF module
adata = input_to_adata(
input_data="filtered_feature_bc_matrix.h5",
output="Sample_X/Sample_X_filtered_matrix.h5ad",
sample="Sample_X"
)
Command exit status:
1
Command output:
Reading in filtered_feature_bc_matrix.h5
Command error:
4f4fb700ef54: Pull complete
7834e8feb904: Pull complete
5ac55ff04773: Pull complete
77c7a930b7cc: Pull complete
c864db06f68b: Pull complete
f628b9cff8d1: Pull complete
ba94160d36b7: Verifying Checksum
ba94160d36b7: Download complete
53269d96152a: Verifying Checksum
53269d96152a: Download complete
ba94160d36b7: Pull complete
6f63df1cb8dd: Verifying Checksum
6f63df1cb8dd: Download complete
6f63df1cb8dd: Pull complete
53269d96152a: Pull complete
54ba407d13f5: Verifying Checksum
54ba407d13f5: Download complete
54ba407d13f5: Pull complete
Digest: sha256:fbd40d3d00751ac0df11564b3697006ecf8604af48960833910d32755033575f
Status: Downloaded newer image for community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538
/opt/conda/lib/python3.12/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
/opt/conda/lib/python3.12/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
Reading in filtered_feature_bc_matrix.h5
Traceback (most recent call last):
File ".command.sh", line 92, in <module>
adata = input_to_adata(
^^^^^^^^^^^^^^^
File ".command.sh", line 76, in input_to_adata
adata.write_h5ad(f"{output}", compression="gzip")
File "/opt/conda/lib/python3.12/site-packages/anndata/_core/anndata.py", line 1929, in write_h5ad
write_h5ad(
File "/opt/conda/lib/python3.12/site-packages/anndata/_io/h5ad.py", line 105, in write_h5ad
write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
File "/opt/conda/lib/python3.12/site-packages/anndata/_io/specs/registry.py", line 359, in write_elem
Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
File "/opt/conda/lib/python3.12/site-packages/anndata/_io/utils.py", line 243, in func_wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.12/site-packages/anndata/_io/specs/registry.py", line 309, in write_elem
return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.12/site-packages/anndata/_io/specs/registry.py", line 57, in wrapper
result = func(g, k, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.12/site-packages/anndata/_io/specs/methods.py", line 691, in write_dataframe
raise ValueError(
ValueError: DataFrame.index.name ('gene_versions') is also used by a column whose values are different. This is not supported. Please make sure the values are the same, or use a different name.
Error raised while writing key 'var' of <class 'h5py._hl.group.Group'> to /
Work dir:
/home/runner/work/scrnaseq/scrnaseq/.nf-test/tests/17493f5b507e3df5263c89c8572a97f1/work/8f/a867bef0daf7245c50216b049bdd70
Tip: when you have fixed the problem you can continue the execution adding the option `-resume` to the run command line
-- Check '/home/runner/work/scrnaseq/scrnaseq/.nf-test/tests/17493f5b507e3df5263c89c8572a97f1/meta/nextflow.log' file for details
ERROR ~ Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting
-- Check '/home/runner/work/scrnaseq/scrnaseq/.nf-test/tests/17493f5b507e3df5263c89c8572a97f1/meta/nextflow.log' file for details
Nextflow stderr:
Nextflow 24.10.0 is available - Please consider updating your version to it
Loading