sgkit-dev · tomwhite · Nov 11, 2024 · Oct 29, 2024 · Oct 29, 2024 · Nov 4, 2024
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -18,7 +18,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.9'
+          python-version: '3.10'
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -24,7 +24,7 @@ jobs:
         # Use macos-13 because pip binary packages for ARM aren't
         # available for many dependencies
         os: [macos-13, macos-14, ubuntu-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
         exclude:
           # Just run macos tests on one Python version
           - os: macos-13
@@ -33,8 +33,6 @@ jobs:
             python-version: "3.11"
           - os: macos-13
             python-version: "3.12"
-          - os: macos-14
-            python-version: "3.9"
           - os: macos-14
             python-version: "3.10"
           - os: macos-14

diff --git a/bio2zarr/core.py b/bio2zarr/core.py
@@ -7,10 +7,8 @@
 import multiprocessing
 import os
 import os.path
-import sys
 import threading
 import time
-import warnings
 
 import humanfriendly
 import numcodecs
@@ -216,22 +214,6 @@ def setup_progress_counter(counter):
     _progress_counter = counter
 
 
-def warn_py39_mac():
-    if sys.platform == "darwin" and sys.version_info[:2] == (3, 9):
-        warnings.warn(
-            "There is a known issue with bio2zarr on MacOS Python 3.9 "
-            "in which OS-level named semaphores are leaked. "
-            "You will also probably see warnings like 'There appear to be N "
-            "leaked semaphore objects at shutdown'. "
-            "While this is likely harmless for a few runs, it could lead to "
-            "issues if you do a lot of conversion. To get prevent this issue "
-            "either: (1) use --worker-processes=0 or (2) upgrade to a newer "
-            "Python version. See https://github.com/sgkit-dev/bio2zarr/issues/209 "
-            "for more details.",
-            stacklevel=2,
-        )
-
-
 class ParallelWorkManager(contextlib.AbstractContextManager):
     def __init__(self, worker_processes=1, progress_config=None):
         # Need to specify this explicitly to suppport Macs and
@@ -244,7 +226,6 @@ def __init__(self, worker_processes=1, progress_config=None):
             # production. See note on the SynchronousExecutor class.
             self.executor = SynchronousExecutor()
         else:
-            warn_py39_mac()
             self.executor = cf.ProcessPoolExecutor(
                 max_workers=worker_processes,
                 mp_context=ctx,

diff --git a/bio2zarr/plink.py b/bio2zarr/plink.py
@@ -185,11 +185,11 @@ def validate(bed_path, zarr_path):
     assert call_genotype.shape[2] == 2
 
     row_id = 0
-    for bed_row, zarr_row in zip(bed_genotypes, call_genotype):
+    for bed_row, zarr_row in zip(bed_genotypes, call_genotype, strict=True):
         # print("ROW", row_id)
         # print(bed_row, zarr_row)
         row_id += 1
-        for bed_call, zarr_call in zip(bed_row, zarr_row):
+        for bed_call, zarr_call in zip(bed_row, zarr_row, strict=True):
             if bed_call == -127:
                 assert list(zarr_call) == [-1, -1]
             elif bed_call == 0:

diff --git a/bio2zarr/typing.py b/bio2zarr/typing.py
@@ -1,4 +1,3 @@
 from pathlib import Path
-from typing import Union
 
-PathType = Union[str, Path]
+PathType = str | Path
diff --git a/bio2zarr/vcf2zarr/icf.py b/bio2zarr/vcf2zarr/icf.py
@@ -289,7 +289,7 @@ def scan_vcf(path, target_num_partitions, *, local_alleles):
             samples=[Sample(sample_id) for sample_id in vcf.samples],
             contigs=[
                 Contig(contig_id, length)
-                for contig_id, length in zip(vcf.seqnames, contig_lengths)
+                for contig_id, length in zip(vcf.seqnames, contig_lengths, strict=True)
             ],
             filters=filters,
             fields=fields,
@@ -764,7 +764,9 @@ def chunks(self, partition_id, start_chunk=0):
         chunk_cumulative_records = self.chunk_record_index(partition_id)
         chunk_num_records = np.diff(chunk_cumulative_records)
         for count, cumulative in zip(
-            chunk_num_records[start_chunk:], chunk_cumulative_records[start_chunk + 1 :]
+            chunk_num_records[start_chunk:],
+            chunk_cumulative_records[start_chunk + 1 :],
+            strict=True,
         ):
             path = partition_path / f"{cumulative}"
             chunk = self.read_chunk(path)

diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py
@@ -760,6 +760,7 @@ def encode_alleles_partition(self, partition_index):
         for ref, alt in zip(
             ref_field.iter_values(partition.start, partition.stop),
             alt_field.iter_values(partition.start, partition.stop),
+            strict=True,
         ):
             j = alleles.next_buffer_row()
             alleles.buff[j, :] = constants.STR_FILL

diff --git a/bio2zarr/vcf2zarr/verification.py b/bio2zarr/vcf2zarr/verification.py
@@ -114,7 +114,7 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number):
     assert isinstance(vcf_val, np.ndarray)
     if vcf_type in ("String", "Character"):
         assert len(vcf_val) == len(zarr_val)
-        for v, z in zip(vcf_val, zarr_val):
+        for v, z in zip(vcf_val, zarr_val, strict=True):
             if vcf_number == "1":
                 assert v == z
             else:

diff --git a/bio2zarr/vcf_utils.py b/bio2zarr/vcf_utils.py
@@ -7,7 +7,7 @@
 from collections.abc import Sequence
 from dataclasses import dataclass
 from enum import Enum
-from typing import IO, Any, Optional, Union
+from typing import IO, Any
 
 import cyvcf2
 import humanfriendly
@@ -33,7 +33,7 @@ def get_file_offset(vfp: int) -> int:
     return vfp >> 16 & address_mask
 
 
-def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Optional[Any] = None) -> Any:
+def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Any | None = None) -> Any:
     """Read bytes using a `struct` format string and return the unpacked data value.
 
     Parameters
@@ -85,8 +85,8 @@ class Region:
     """
 
     contig: str
-    start: Optional[int] = None
-    end: Optional[int] = None
+    start: int | None = None
+    end: int | None = None
 
     def __post_init__(self):
         if self.start is not None:
@@ -194,9 +194,7 @@ def get_first_locus_in_bin(csi: CSIIndex, bin: int) -> int:
     return (bin - first_bin_on_level) * (max_span // level_size) + 1
 
 
-def read_csi(
-    file: PathType, storage_options: Optional[dict[str, str]] = None
-) -> CSIIndex:
+def read_csi(file: PathType, storage_options: dict[str, str] | None = None) -> CSIIndex:
     """Parse a CSI file into a `CSIIndex` object.
 
     Parameters
@@ -311,7 +309,7 @@ def offsets(self) -> Any:
 
 
 def read_tabix(
-    file: PathType, storage_options: Optional[dict[str, str]] = None
+    file: PathType, storage_options: dict[str, str] | None = None
 ) -> TabixIndex:
     """Parse a tabix file into a `TabixIndex` object.
 
@@ -452,7 +450,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         return False
 
     def contig_record_counts(self):
-        d = dict(zip(self.sequence_names, self.index.record_counts))
+        d = dict(zip(self.sequence_names, self.index.record_counts, strict=True))
         if self.file_type == VcfFileType.BCF:
             d = {k: v for k, v in d.items() if v > 0}
         return d
@@ -483,8 +481,8 @@ def _filter_empty_and_refine(self, regions):
 
     def partition_into_regions(
         self,
-        num_parts: Optional[int] = None,
-        target_part_size: Union[None, int, str] = None,
+        num_parts: int | None = None,
+        target_part_size: None | int | str = None,
     ):
         if num_parts is None and target_part_size is None:
             raise ValueError("One of num_parts or target_part_size must be specified")

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "cyvcf2",
   "bed_reader",
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 classifiers = [
   "Development Status :: 4 - Beta",
   "License :: OSI Approved :: Apache Software License",
@@ -35,7 +35,6 @@ classifiers = [
   "Intended Audience :: Science/Research",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
@@ -74,8 +73,8 @@ testpaths = "tests"
 addopts = "--cov=bio2zarr --cov-report term-missing"
 
 [tool.ruff]
-# Assume Python 3.9
-target-version = "py39"
+# Assume Python 3.10
+target-version = "py310"
 
 # Same as Black.
 line-length = 88