From b815d548f6be503e60727a202115fed08b5a4046 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 29 Oct 2024 12:00:24 +0000 Subject: [PATCH] Formatting fixes from ruff --- bio2zarr/plink.py | 4 ++-- bio2zarr/typing.py | 3 +-- bio2zarr/vcf2zarr/icf.py | 6 ++++-- bio2zarr/vcf2zarr/vcz.py | 1 + bio2zarr/vcf2zarr/verification.py | 2 +- bio2zarr/vcf_utils.py | 20 +++++++++----------- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/bio2zarr/plink.py b/bio2zarr/plink.py index f558ebd..157eb60 100644 --- a/bio2zarr/plink.py +++ b/bio2zarr/plink.py @@ -185,11 +185,11 @@ def validate(bed_path, zarr_path): assert call_genotype.shape[2] == 2 row_id = 0 - for bed_row, zarr_row in zip(bed_genotypes, call_genotype): + for bed_row, zarr_row in zip(bed_genotypes, call_genotype, strict=False): # print("ROW", row_id) # print(bed_row, zarr_row) row_id += 1 - for bed_call, zarr_call in zip(bed_row, zarr_row): + for bed_call, zarr_call in zip(bed_row, zarr_row, strict=False): if bed_call == -127: assert list(zarr_call) == [-1, -1] elif bed_call == 0: diff --git a/bio2zarr/typing.py b/bio2zarr/typing.py index 35e595d..527e4e2 100644 --- a/bio2zarr/typing.py +++ b/bio2zarr/typing.py @@ -1,4 +1,3 @@ from pathlib import Path -from typing import Union -PathType = Union[str, Path] +PathType = str | Path diff --git a/bio2zarr/vcf2zarr/icf.py b/bio2zarr/vcf2zarr/icf.py index 8e313f8..7006cf0 100644 --- a/bio2zarr/vcf2zarr/icf.py +++ b/bio2zarr/vcf2zarr/icf.py @@ -289,7 +289,7 @@ def scan_vcf(path, target_num_partitions, *, local_alleles): samples=[Sample(sample_id) for sample_id in vcf.samples], contigs=[ Contig(contig_id, length) - for contig_id, length in zip(vcf.seqnames, contig_lengths) + for contig_id, length in zip(vcf.seqnames, contig_lengths, strict=False) ], filters=filters, fields=fields, @@ -764,7 +764,9 @@ def chunks(self, partition_id, start_chunk=0): chunk_cumulative_records = self.chunk_record_index(partition_id) chunk_num_records = np.diff(chunk_cumulative_records) for count, cumulative in zip( - chunk_num_records[start_chunk:], chunk_cumulative_records[start_chunk + 1 :] + chunk_num_records[start_chunk:], + chunk_cumulative_records[start_chunk + 1 :], + strict=False, ): path = partition_path / f"{cumulative}" chunk = self.read_chunk(path) diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py index dcaef3f..b170bff 100644 --- a/bio2zarr/vcf2zarr/vcz.py +++ b/bio2zarr/vcf2zarr/vcz.py @@ -760,6 +760,7 @@ def encode_alleles_partition(self, partition_index): for ref, alt in zip( ref_field.iter_values(partition.start, partition.stop), alt_field.iter_values(partition.start, partition.stop), + strict=False, ): j = alleles.next_buffer_row() alleles.buff[j, :] = constants.STR_FILL diff --git a/bio2zarr/vcf2zarr/verification.py b/bio2zarr/vcf2zarr/verification.py index 27e86fe..35ef914 100644 --- a/bio2zarr/vcf2zarr/verification.py +++ b/bio2zarr/vcf2zarr/verification.py @@ -114,7 +114,7 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number): assert isinstance(vcf_val, np.ndarray) if vcf_type in ("String", "Character"): assert len(vcf_val) == len(zarr_val) - for v, z in zip(vcf_val, zarr_val): + for v, z in zip(vcf_val, zarr_val, strict=False): if vcf_number == "1": assert v == z else: diff --git a/bio2zarr/vcf_utils.py b/bio2zarr/vcf_utils.py index 70dfea2..b586890 100644 --- a/bio2zarr/vcf_utils.py +++ b/bio2zarr/vcf_utils.py @@ -7,7 +7,7 @@ from collections.abc import Sequence from dataclasses import dataclass from enum import Enum -from typing import IO, Any, Optional, Union +from typing import IO, Any import cyvcf2 import humanfriendly @@ -33,7 +33,7 @@ def get_file_offset(vfp: int) -> int: return vfp >> 16 & address_mask -def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Optional[Any] = None) -> Any: +def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Any | None = None) -> Any: """Read bytes using a `struct` format string and return the unpacked data value. Parameters @@ -85,8 +85,8 @@ class Region: """ contig: str - start: Optional[int] = None - end: Optional[int] = None + start: int | None = None + end: int | None = None def __post_init__(self): if self.start is not None: @@ -194,9 +194,7 @@ def get_first_locus_in_bin(csi: CSIIndex, bin: int) -> int: return (bin - first_bin_on_level) * (max_span // level_size) + 1 -def read_csi( - file: PathType, storage_options: Optional[dict[str, str]] = None -) -> CSIIndex: +def read_csi(file: PathType, storage_options: dict[str, str] | None = None) -> CSIIndex: """Parse a CSI file into a `CSIIndex` object. Parameters @@ -311,7 +309,7 @@ def offsets(self) -> Any: def read_tabix( - file: PathType, storage_options: Optional[dict[str, str]] = None + file: PathType, storage_options: dict[str, str] | None = None ) -> TabixIndex: """Parse a tabix file into a `TabixIndex` object. @@ -452,7 +450,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): return False def contig_record_counts(self): - d = dict(zip(self.sequence_names, self.index.record_counts)) + d = dict(zip(self.sequence_names, self.index.record_counts, strict=False)) if self.file_type == VcfFileType.BCF: d = {k: v for k, v in d.items() if v > 0} return d @@ -483,8 +481,8 @@ def _filter_empty_and_refine(self, regions): def partition_into_regions( self, - num_parts: Optional[int] = None, - target_part_size: Union[None, int, str] = None, + num_parts: int | None = None, + target_part_size: None | int | str = None, ): if num_parts is None and target_part_size is None: raise ValueError("One of num_parts or target_part_size must be specified")