Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI: enable codspeed for Python #3262

Draft
wants to merge 1 commit into
base: latest
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/codspeed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,22 @@ jobs:
with:
run: "cd src/core && cargo codspeed run"
token: ${{ secrets.CODSPEED_TOKEN }}

benchmarks-python:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: "3.12"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox

- name: Run benchmarks
uses: CodSpeedHQ/action@v2
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: tox -e codspeed
124 changes: 0 additions & 124 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,74 +33,6 @@ def load_sequences():
return sequences


class TimeMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
)
self.sequences = load_sequences()

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_add_sequence(self):
mh = self.mh
sequences = self.sequences
for seq in sequences:
mh.add_sequence(seq)

def time_add_protein(self):
mh = self.protein_mh
sequences = self.sequences
for seq in sequences:
mh.add_protein(seq)

def time_get_mins(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins()

def time_add_hash(self):
mh = self.mh
for i in range(ADD_HASH_RANGE):
mh.add_hash(i)

def time_add_many(self):
mh = self.mh
mh.add_many(list(range(ADD_MANY_RANGE)))

def time_similarity(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(SIMILARITY_TIMES):
mh.similarity(other_mh)

def time_count_common(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(COUNT_COMMON_TIMES):
mh.count_common(other_mh)

def time_merge(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(MERGE_TIMES):
mh.merge(other_mh)

def time_copy(self):
mh = self.populated_mh
for i in range(COPY_TIMES):
mh.__copy__()

def time_concat(self):
mh = self.mh
other_mh = self.populated_mh
for i in range(CONCAT_TIMES):
mh += other_mh


class PeakmemMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
Expand Down Expand Up @@ -134,33 +66,6 @@ def peakmem_add_many(self):
####################


class TimeMinAbundanceSuite(TimeMinHashSuite):
def setup(self):
TimeMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

def time_get_mins_abundance(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins(with_abundance=True)

def time_set_abundances(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins)

def time_set_abundances_noclear(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
def setup(self):
PeakmemMinHashSuite.setup(self)
Expand All @@ -170,35 +75,6 @@ def setup(self):
####################


class TimeZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

def time_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
storage.load("sig1")

def time_load_small_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
storage.load("99999")

def teardown(self):
self.zipfile.close()


class PeakmemZipStorageSuite:
def setup(self):
import zipfile
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ test = [
"pytest>=6.2.4,<8.4.0",
"pytest-cov>=4,<6.0",
"pytest-xdist>=3.1",
"pytest-benchmark>=4.0",
"pyyaml>=6,<7",
"diff-cover>=7.3",
"covdefaults>=2.2.2",
Expand Down
176 changes: 176 additions & 0 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import random
from tempfile import NamedTemporaryFile

import pytest

from sourmash.sbt_storage import ZipStorage
from sourmash.minhash import MinHash

RANDOM_SEQ_SIZE = 3000
RANDOM_SEQ_NUMBER = 300

MINHASH_NUM = 500
MINHASH_K = 21

GET_MINS_RANGE = 500
ADD_HASH_RANGE = 10_000
ADD_MANY_RANGE = 1000
SIMILARITY_TIMES = 500
COUNT_COMMON_TIMES = 500
MERGE_TIMES = 500
COPY_TIMES = 500
CONCAT_TIMES = 500
SET_ABUNDANCES_RANGE = 500
ZIP_STORAGE_WRITE = 100_000
ZIP_STORAGE_LOAD = 20


def load_sequences():
sequences = []
for _ in range(10):
random_seq = random.sample(
"A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
)
sequences.append("".join(random_seq))
return sequences


@pytest.fixture
def mh():
return MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)


@pytest.fixture
def mh_protein():
return MinHash(MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False)


@pytest.fixture
def sequences():
return load_sequences()


@pytest.fixture
def populated_mh(sequences):
populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in sequences:
populated_mh.add_sequence(seq)
return populated_mh


def test_add_sequence(benchmark, mh, sequences):
@benchmark
def bench():
for seq in sequences:
mh.add_sequence(seq)


def test_add_protein(benchmark, mh_protein, sequences):
@benchmark
def bench():
for seq in sequences:
mh_protein.add_protein(seq)


def test_get_mins(benchmark, populated_mh):
benchmark(populated_mh.get_mins)


def test_add_hash(benchmark, mh):
@benchmark
def bench():
for i in range(ADD_HASH_RANGE):
mh.add_hash(i)


def test_add_many(benchmark, mh):
benchmark(mh.add_many, list(range(ADD_MANY_RANGE)))


def test_similarity(benchmark, mh, populated_mh):
benchmark(mh.similarity, populated_mh)


def test_count_common(benchmark, mh, populated_mh):
benchmark(mh.count_common, populated_mh)


def test_merge(benchmark, mh, populated_mh):
benchmark(mh.merge, populated_mh)


def test_copy(benchmark, populated_mh):
benchmark(populated_mh.__copy__)


def test_concat(benchmark, mh, populated_mh):
benchmark(mh.__iadd__, populated_mh)


####################


def setup(self):
TimeMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)


def time_get_mins_abundance(self):
mh = self.populated_mh
for i in range(GET_MINS_RANGE):
mh.get_mins(with_abundance=True)


def time_set_abundances(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins)


def time_set_abundances_noclear(self):
mh = self.mh
mins = self.populated_mh.get_mins(with_abundance=True)
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


####################


@pytest.fixture
def zipstore():
import zipfile

zf = NamedTemporaryFile()

with zipfile.ZipFile(zf, mode="w", compression=zipfile.ZIP_STORED) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

yield zf

zf.close()


def test_load_from_zipstorage(benchmark, zipstore):
@benchmark
def bench():
with ZipStorage(zipstore.name) as storage:
for _ in range(ZIP_STORAGE_LOAD):
storage.load("sig1")


def test_load_small_from_zipstorage(benchmark, zipstore):
@benchmark
def bench():
with ZipStorage(zipstore.name) as storage:
for _ in range(ZIP_STORAGE_LOAD):
storage.load("99999")
Loading
Loading