Skip to content

Commit

Permalink
Check duplicate positions
Browse files Browse the repository at this point in the history
Fixes #888
  • Loading branch information
hyanwong authored and mergify[bot] committed May 15, 2024
1 parent ec3218d commit 2cf0975
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
16 changes: 16 additions & 0 deletions tests/test_sgkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,22 @@ def test_ploidy1_unphased(self, tmp_path):
sgkit.save_dataset(ds, path)
tsinfer.SgkitSampleData(path)

def test_duplicate_positions(self, tmp_path):
path = tmp_path / "data.zarr"
ds = sgkit.simulate_genotype_call_dataset(n_variant=3, n_sample=3, phased=True)
ds["variant_position"][2] = ds["variant_position"][1]
sgkit.save_dataset(ds, path)
with pytest.raises(ValueError, match="duplicate or out-of-order values"):
tsinfer.SgkitSampleData(path)

def test_bad_order_positions(self, tmp_path):
path = tmp_path / "data.zarr"
ds = sgkit.simulate_genotype_call_dataset(n_variant=3, n_sample=3, phased=True)
ds["variant_position"][0] = ds["variant_position"][2] - 0.5
sgkit.save_dataset(ds, path)
with pytest.raises(ValueError, match="duplicate or out-of-order values"):
tsinfer.SgkitSampleData(path)

def test_empty_alleles_not_at_end(self, tmp_path):
path = tmp_path / "data.zarr"
ds = sgkit.simulate_genotype_call_dataset(n_variant=3, n_sample=3, n_ploidy=1)
Expand Down
6 changes: 6 additions & 0 deletions tsinfer/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2309,6 +2309,12 @@ def __init__(self, path):
" sgkit dataset, indicating that all the genotypes are"
" unphased"
)
if np.any(np.diff(self.sites_position) <= 0):
raise ValueError(
"Values taken from the variant_position array are not strictly "
"increasing (i.e. have duplicate or out-of-order values). "
"These must be masked out to run tsinfer."
)

@functools.cached_property
def format_name(self):
Expand Down

0 comments on commit 2cf0975

Please sign in to comment.