Format with black (#4645)

* Format with black * style: Add pyproject.toml to match tools ruff * style: ruff format * style: pyproject.toml => ruff.toml Don't want people getting the wrong idea that this is a python project or something. * Update cellranger_count.py * style: Clean up whitespace --------- Co-authored-by: Simon Pearce <[email protected]> Co-authored-by: Edmund Miller <[email protected]> Co-authored-by: Edmund Miller <[email protected]>
nf-core · Jun 14, 2024 · 82024cf · 82024cf
1 parent 612271e
commit 82024cf
Show file tree

Hide file tree

Showing 9 changed files with 170 additions and 96 deletions.
diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py
@@ -4,6 +4,7 @@
 
 Copyright (c) Gregor Sturm 2023 - MIT License
 """
+
 from subprocess import run
 from pathlib import Path
 from textwrap import dedent
@@ -34,11 +35,11 @@ def chunk_iter(seq, size):
 # Match R1 in the filename, but only if it is followed by a non-digit or non-character
 # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but
 # do not match "SRR12345", "file_INFIXR12", etc
-filename_pattern =  r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])'
+filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])"
 
 for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2), start=1):
     # double escapes are required because nextflow processes this python 'template'
-    if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name:
+    if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name:
         raise AssertionError(
             dedent(
                 f"""\
@@ -55,20 +56,20 @@ def chunk_iter(seq, size):
     r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz")
     r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz")
 
+# fmt: off
 run(
-    # fmt: off
     [
         "cellranger", "count",
         "--id", "${prefix}",
         "--fastqs", str(fastq_all),
         "--transcriptome", "${reference.name}",
         "--localcores", "${task.cpus}",
         "--localmem", "${task.memory.toGiga()}",
-        *shlex.split("""${args}""")
+        *shlex.split("""${args}"""),
     ],
-    # fmt: on
     check=True,
 )
+# fmt: on
 
 # Output version information
 version = run(

diff --git a/modules/nf-core/custom/catadditionalfasta/templates/fasta2gtf.py b/modules/nf-core/custom/catadditionalfasta/templates/fasta2gtf.py
@@ -114,16 +114,8 @@ def main() -> None:
     fasta_to_gtf("$add_fasta", f"{add_name}.gtf", "$biotype")
 
     # Concatenate new fasta to existing fasta, and the GTF we just generated to the GTF
-    genome_name = (
-        "$params.genome"
-        if "$params.genome" != "null"
-        else os.path.splitext(os.path.basename("$fasta"))[0]
-    )
-    output_prefix = (
-        "$task.ext.prefix"
-        if "$task.ext.prefix" != "null"
-        else f"{genome_name}_{add_name}"
-    )
+    genome_name = "$params.genome" if "$params.genome" != "null" else os.path.splitext(os.path.basename("$fasta"))[0]
+    output_prefix = "$task.ext.prefix" if "$task.ext.prefix" != "null" else f"{genome_name}_{add_name}"
 
     os.mkdir("out")
     os.system(f"cat $fasta $add_fasta > out/{output_prefix}.fasta")

diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -3,7 +3,6 @@
 
 """Provide functions to merge multiple versions.yml files."""
 
-
 import yaml
 import platform
 from textwrap import dedent

diff --git a/modules/nf-core/custom/tx2gene/templates/tx2gene.py b/modules/nf-core/custom/tx2gene/templates/tx2gene.py
@@ -17,6 +17,7 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+
 def format_yaml_like(data: dict, indent: int = 0) -> str:
     """Formats a dictionary to a YAML-like string.
 
@@ -36,6 +37,7 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
             yaml_str += f"{spaces}{key}: {value}\\n"
     return yaml_str
 
+
 def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]:
     """
     Read the top 100 transcripts from the quantification file.
@@ -123,7 +125,12 @@ def parse_attributes(attributes_text: str) -> Dict[str, str]:
 
 
 def map_transcripts_to_gene(
-    quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str
+    quant_type: str,
+    gtf_file: str,
+    quant_dir: str,
+    gene_id: str,
+    extra_id_field: str,
+    output_file: str,
 ) -> bool:
     """
     Map transcripts to gene names and write the output to a file.
@@ -156,7 +163,10 @@ def map_transcripts_to_gene(
             attr_dict = parse_attributes(cols[8])
             if gene_id in attr_dict and transcript_attribute in attr_dict:
                 # Create a unique identifier for the transcript-gene combination
-                transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id])
+                transcript_gene_pair = (
+                    attr_dict[transcript_attribute],
+                    attr_dict[gene_id],
+                )
 
                 # Check if the combination has already been seen
                 if transcript_gene_pair not in seen:
@@ -170,14 +180,14 @@ def map_transcripts_to_gene(
 
 # Main function to parse arguments and call the mapping function
 if __name__ == "__main__":
-    if '${task.ext.prefix}' != "null":
+    if "${task.ext.prefix}" != "null":
         prefix = "${task.ext.prefix}."
-    elif '$meta.id' != "null":
-        prefix = '${meta.id}.'
+    elif "$meta.id" != "null":
+        prefix = "${meta.id}."
     else:
-        prefix = ''
+        prefix = ""
 
-    if not map_transcripts_to_gene('$quant_type', '$gtf', 'quants', '$id', '$extra', f"{prefix}tx2gene.tsv"):
+    if not map_transcripts_to_gene("$quant_type", "$gtf", "quants", "$id", "$extra", f"{prefix}tx2gene.tsv"):
         logger.error("Failed to map transcripts to genes.")
 
     # Write the versions

diff --git a/modules/nf-core/mygene/templates/mygene.py b/modules/nf-core/mygene/templates/mygene.py
@@ -23,13 +23,10 @@ class Arguments:
     """
     Parses the argments, including the ones coming from $task.ext.args.
     """
+
     def __init__(self) -> None:
         self.input = "$gene_list"
-        self.prefix = (
-            "$task.ext.prefix"
-            if "$task.ext.prefix" != "null"
-            else "$meta.id"
-        )
+        self.prefix = "$task.ext.prefix" if "$task.ext.prefix" != "null" else "$meta.id"
         self.output_gmt = self.prefix + ".gmt"
         self.output_tsv = self.prefix + ".tsv"
         self.parse_ext_args("$task.ext.args")
@@ -46,17 +43,49 @@ def parse_ext_args(self, args_string: str) -> None:
         args_list = shlex.split(args_string)  # Split the string into a list of arguments
         parser = argparse.ArgumentParser()
         # input parameters
-        parser.add_argument('--columname', default='gene_id', help='Name of the column where the gene ids are stored in the input file. Default: gene_id')
+        parser.add_argument(
+            "--columname",
+            default="gene_id",
+            help="Name of the column where the gene ids are stored in the input file. Default: gene_id",
+        )
         # filtering parameters
-        parser.add_argument('--species', default=None, help="Comma separated of common name of the species or taxon ids")
-        parser.add_argument('--go_category', default=None, help="Comma separated list of GO categories to keep. Default: all")
-        parser.add_argument('--go_evidence', default=None, help="Comma separated list of GO evidence codes to keep. Default: all")
+        parser.add_argument(
+            "--species",
+            default=None,
+            help="Comma separated of common name of the species or taxon ids",
+        )
+        parser.add_argument(
+            "--go_category",
+            default=None,
+            help="Comma separated list of GO categories to keep. Default: all",
+        )
+        parser.add_argument(
+            "--go_evidence",
+            default=None,
+            help="Comma separated list of GO evidence codes to keep. Default: all",
+        )
         # additional parameters for querymany
-        parser.add_argument('--scopes', default=None, help="Comma separated list of scopes to search for.")
-        parser.add_argument('--entrezonly', default=False, help="When true, the query returns only the hits with valid Entrez gene ids. Default: false.")
-        parser.add_argument('--ensemblonly', default=False, help="When true, the query returns only the hits with valid Ensembl gene ids. Default: False")
+        parser.add_argument(
+            "--scopes",
+            default=None,
+            help="Comma separated list of scopes to search for.",
+        )
+        parser.add_argument(
+            "--entrezonly",
+            default=False,
+            help="When true, the query returns only the hits with valid Entrez gene ids. Default: false.",
+        )
+        parser.add_argument(
+            "--ensemblonly",
+            default=False,
+            help="When true, the query returns only the hits with valid Ensembl gene ids. Default: False",
+        )
         # output parameters
-        parser.add_argument('--generate_tsv', default=False, help="Also generate a tsv file with the gene based information. Default: False")
+        parser.add_argument(
+            "--generate_tsv",
+            default=False,
+            help="Also generate a tsv file with the gene based information. Default: False",
+        )
         args = parser.parse_args(args_list)
 
         # Convert "null" values to default values
@@ -135,7 +164,17 @@ class MyGene:
     of the query gene. Then, it queries for the annotations, and parses the go
     terms all together with all the other information.
     """
-    def __init__(self, query: list, species: str, scopes: str, entrezonly: bool, ensemblonly: bool, go_category: str = None, go_evidence: str = None) -> None:
+
+    def __init__(
+        self,
+        query: list,
+        species: str,
+        scopes: str,
+        entrezonly: bool,
+        ensemblonly: bool,
+        go_category: str = None,
+        go_evidence: str = None,
+    ) -> None:
         self.query = query
         self.fields = "go,symbol,name,taxid"
         self.species = species
@@ -158,9 +197,9 @@ def query2idmap(self) -> dict:
             species=self.species,
             entrezonly=self.entrezonly,
             ensemblonly=self.ensemblonly,
-            returnall=True
+            returnall=True,
         )
-        return {dic['_id']: dic['query'] for dic in q['out'] if '_id' in dic}
+        return {dic["_id"]: dic["query"] for dic in q["out"] if "_id" in dic}
 
     def id2info(self) -> list:
         """
@@ -177,22 +216,23 @@ def parse_go_based_info(self) -> dict:
         """
         info = {}
         for dic in self.id2info():
-
-            if 'go' not in dic:
+            if "go" not in dic:
                 continue
             if self.go_category:
-                dic['go'] = {category: dic['go'][category] for category in self.go_category.split(",") if category in dic['go']}
-            for category, go_list in dic['go'].items():
+                dic["go"] = {
+                    category: dic["go"][category] for category in self.go_category.split(",") if category in dic["go"]
+                }
+            for category, go_list in dic["go"].items():
                 if not isinstance(go_list, list):
                     go_list = [go_list]
                 for go in go_list:
-                    if (self.go_evidence) and (go['evidence'] not in self.go_evidence.split(",")):
+                    if (self.go_evidence) and (go["evidence"] not in self.go_evidence.split(",")):
                         continue
 
-                    if go['id'] not in info:
-                        info[go['id']] = [go['term'], self.idmap[dic['_id']]]
+                    if go["id"] not in info:
+                        info[go["id"]] = [go["term"], self.idmap[dic["_id"]]]
                     else:
-                        info[go['id']].append(self.idmap[dic['_id']])
+                        info[go["id"]].append(self.idmap[dic["_id"]])
         return info
 
     def parse_gene_based_info(self) -> dict:
@@ -206,30 +246,31 @@ def parse_gene_based_info(self) -> dict:
         """
         info = {}
         for dic in self.id2info():
-
-            if 'go' not in dic:
+            if "go" not in dic:
                 continue
             if self.go_category:
-                dic['go'] = {category: dic['go'][category] for category in self.go_category.split(",") if category in dic['go']}
-            for category, go_list in dic['go'].items():
+                dic["go"] = {
+                    category: dic["go"][category] for category in self.go_category.split(",") if category in dic["go"]
+                }
+            for category, go_list in dic["go"].items():
                 if not isinstance(go_list, list):
                     go_list = [go_list]
                 for go in go_list:
-                    if (self.go_evidence) and (go['evidence'] not in self.go_evidence.split(",")):
+                    if (self.go_evidence) and (go["evidence"] not in self.go_evidence.split(",")):
                         continue
 
                     current_info = {
-                        'query': self.idmap[dic['_id']],
-                        'mygene_id': dic['_id'],
-                        'go_id': go['id'],
-                        'go_term': go['term'],
-                        'go_evidence': go['evidence'],
-                        'go_category': category,
-                        'symbol': dic['symbol'],
-                        'name': dic['name'],
-                        'taxid': dic['taxid']
+                        "query": self.idmap[dic["_id"]],
+                        "mygene_id": dic["_id"],
+                        "go_id": go["id"],
+                        "go_term": go["term"],
+                        "go_evidence": go["evidence"],
+                        "go_category": category,
+                        "symbol": dic["symbol"],
+                        "name": dic["name"],
+                        "taxid": dic["taxid"],
                     }
-                    info[self.idmap[dic['_id']]] = current_info
+                    info[self.idmap[dic["_id"]]] = current_info
         return info
 
     def parse_and_save_to_gmt(self, filename: str) -> list:
@@ -239,7 +280,7 @@ def parse_and_save_to_gmt(self, filename: str) -> list:
         """
         info = self.parse_go_based_info()
         info = dict(sorted(info.items(), key=lambda x: x[0]))
-        with open(filename, 'w') as f:
+        with open(filename, "w") as f:
             for go_id, go_list in info.items():
                 tmp = sorted(go_list[1:])
                 f.write(go_id + "\\t" + go_list[0] + "\\t" + "\\t".join(tmp) + "\\n")
@@ -251,7 +292,7 @@ def parse_and_save_to_tsv(self, filename: str) -> None:
         The final tsv output will be sorted following the input query gene list order.
         """
         info = self.parse_gene_based_info()
-        with open(filename, 'w') as f:
+        with open(filename, "w") as f:
             f.write("\\t".join(info[self.query[0]].keys()) + "\\n")
             for gene in self.query:  # sorted by query gene list
                 if gene in info:
@@ -260,23 +301,22 @@ def parse_and_save_to_tsv(self, filename: str) -> None:
 
 
 def load_list(filename: str, columname: str) -> list:
-        """
-        It loads the list of gene ids from a file.
-        The columname is the name of the column where the gene ids are stored.
-        """
-        if filename.split('.')[-1] == 'tsv':
-            sep = "\\t"
-        elif filename.split('.')[-1] == 'csv':
-            sep = ","
-        else:
-            raise ValueError("The input file extension should be either tsv or csv.")
-        with open(filename, 'r') as f:
-            idx = f.readline().strip().split(sep).index(columname)
-            return [line.strip().split(sep)[idx] for line in f]
+    """
+    It loads the list of gene ids from a file.
+    The columname is the name of the column where the gene ids are stored.
+    """
+    if filename.split(".")[-1] == "tsv":
+        sep = "\\t"
+    elif filename.split(".")[-1] == "csv":
+        sep = ","
+    else:
+        raise ValueError("The input file extension should be either tsv or csv.")
+    with open(filename, "r") as f:
+        idx = f.readline().strip().split(sep).index(columname)
+        return [line.strip().split(sep)[idx] for line in f]
 
 
 if __name__ == "__main__":
-
     # parse and print arguments
     args = Arguments()
     args.print_args()
@@ -286,14 +326,14 @@ def load_list(filename: str, columname: str) -> list:
 
     # run mygene api
     mg = MyGene(
-            gene_list,
-            species=args.species,
-            scopes=args.scopes,
-            entrezonly=args.entrezonly,
-            ensemblonly=args.ensemblonly,
-            go_category=args.go_category,
-            go_evidence=args.go_evidence
-        )
+        gene_list,
+        species=args.species,
+        scopes=args.scopes,
+        entrezonly=args.entrezonly,
+        ensemblonly=args.ensemblonly,
+        go_category=args.go_category,
+        go_evidence=args.go_evidence,
+    )
 
     # parse annotations and save output files
     mg.parse_and_save_to_gmt(args.output_gmt)