From 6f13a71cc700171bf6f95d52fbc205cd79d84dfb Mon Sep 17 00:00:00 2001 From: Jonas Dedden Date: Thu, 8 Aug 2024 10:18:39 +0200 Subject: [PATCH] Cleanup --- .github/workflows/lint.yml | 6 +- .travis.yml | 5 +- requirements-dev.txt | 3 + src/crc32c/__init__.py | 70 ++++++++++++++++----- src/crc32c/__init__.pyi | 28 --------- src/crc32c/_crc32c.pyi | 8 +++ test/test_crc32c.py | 124 ++++++++++++++++++++++--------------- 7 files changed, 147 insertions(+), 97 deletions(-) create mode 100644 requirements-dev.txt delete mode 100644 src/crc32c/__init__.pyi create mode 100644 src/crc32c/_crc32c.pyi diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f410724..299fcf0 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,9 +18,9 @@ jobs: architecture: x64 - name: Checkout uses: actions/checkout@v3 - - name: Install mypy & wheel & pytest - run: pip install mypy wheel pytest + - name: Install dev requirements + run: pip install -r requirements-dev.txt - name: Install package run: python setup.py bdist_wheel && pip install dist/crc32c*.whl - name: Run mypy - run: mypy --strict test/test_crc32c.py + run: mypy --strict src test diff --git a/.travis.yml b/.travis.yml index 769359f..07d1e90 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,20 +5,21 @@ dist: focal # Build and test in several python versions jobs: include: - - python: "2.7" - python: "3.5" - python: "3.6" - python: "3.7" - python: "3.8" - python: "3.9" - python: "3.10" + - python: "3.11" + - python: "3.12" - python: "3.8" arch: arm64 - python: "3.8" arch: s390x before_install: -- pip install pytest +- pip install -r requirements-dev.txt install: - pip install -v . diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..b39507c --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +wheel +mypy +pytest diff --git a/src/crc32c/__init__.py b/src/crc32c/__init__.py index 799b1d2..dcef917 100644 --- a/src/crc32c/__init__.py +++ b/src/crc32c/__init__.py @@ -1,32 +1,72 @@ -import copy -import base64 +from __future__ import annotations + +from typing import TYPE_CHECKING from ._crc32c import big_endian, crc32, crc32c, hardware_based +if TYPE_CHECKING: + from typing_extensions import Buffer, Self + + +class CRC32CHash: + """Wrapper class for crc32c. Tries to conform to the interface of `hashlib` classes.""" -class Crc32cHash: @property - def digest_size(self): + def digest_size(self) -> int: + """ + The size of the resulting hash in bytes. + """ return 4 @property - def name(self): + def block_size(self) -> int: + """ + The internal block size of the hash algorithm in bytes. + """ + return 1 + + @property + def name(self) -> str: + """ + The canonical name of this hash, + always lowercase and always suitable as a parameter to new() to create another hash of this type. + """ return "crc32c" - def __init__(self, data): + def __init__(self, data: Buffer = b"") -> None: + """ + Initialise the hash object with an optional bytes-like object. + """ self._checksum = crc32c(data) - def update(self, __data): - self._checksum = crc32c(__data, self._checksum) - - def base64(self): - return base64.b64encode(self.digest()).decode(encoding="ascii") + def update(self, data: Buffer) -> None: + """ + Update the hash object with the bytes-like object. + Repeated calls are equivalent to a single call with the concatenation of all the arguments: + m.update(a); m.update(b) is equivalent to m.update(a+b). + """ + self._checksum = crc32c(data, self._checksum) - def digest(self): + def digest(self) -> bytes: + """ + Return the digest of the data passed to the update() method so far. + This is a bytes object of size digest_size which may contain bytes in the whole range from 0 to 255. + """ return self._checksum.to_bytes(4, "big") - def hexdigest(self): + def hexdigest(self) -> str: + """ + Like digest() except the digest is returned as a string object of double length, + containing only hexadecimal digits. + This may be used to exchange the value safely in email or other non-binary environments. + """ return self.digest().hex() - def copy(self): - return copy.copy(self) + def copy(self) -> Self: + """ + Return a copy (“clone”) of the hash object. This can be used to efficiently compute + the digests of data sharing a common initial substring. + """ + res = type(self)() + res._checksum = self._checksum + return res diff --git a/src/crc32c/__init__.pyi b/src/crc32c/__init__.pyi deleted file mode 100644 index cb8d2ae..0000000 --- a/src/crc32c/__init__.pyi +++ /dev/null @@ -1,28 +0,0 @@ -from collections.abc import Buffer -from typing import Self - -big_endian: int -hardware_based: bool - -def crc32(data: Buffer, value: int = 0, gil_release_mode: int = -1) -> int: ... -def crc32c(data: Buffer, value: int = 0, gil_release_mode: int = -1) -> int: ... - - -class Crc32cHash: - @property - def digest_size(self) -> int: ... - - @property - def name(self) -> str: ... - - def __init__(self, data: Buffer) -> None: ... - - def update(self, __data: Buffer, /) -> None: ... - - def base64(self) -> str: ... - - def digest(self) -> bytes: ... - - def hexdigest(self) -> str: ... - - def copy(self) -> Self: ... diff --git a/src/crc32c/_crc32c.pyi b/src/crc32c/_crc32c.pyi new file mode 100644 index 0000000..08ab525 --- /dev/null +++ b/src/crc32c/_crc32c.pyi @@ -0,0 +1,8 @@ +from typing_extensions import Buffer + +big_endian: int +hardware_based: bool + + +def crc32(data: Buffer, value: int = 0, gil_release_mode: int = -1) -> int: ... +def crc32c(data: Buffer, value: int = 0, gil_release_mode: int = -1) -> int: ... diff --git a/test/test_crc32c.py b/test/test_crc32c.py index a1c91a6..e6c9b23 100644 --- a/test/test_crc32c.py +++ b/test/test_crc32c.py @@ -24,41 +24,42 @@ import struct import unittest import warnings +from typing import Any, Generator, NamedTuple import pytest -from typing import Any, Generator try: import crc32c - sw_mode = os.environ.get('CRC32C_SW_MODE') - if sw_mode == 'none' and not crc32c.hardware_based: + + sw_mode = os.environ.get("CRC32C_SW_MODE") + if sw_mode == "none" and not crc32c.hardware_based: raise RuntimeError('"none" should force hardware support') - elif sw_mode == 'force' and crc32c.hardware_based: + elif sw_mode == "force" and crc32c.hardware_based: raise RuntimeError('"force" should force software support') except ImportError: crc32c = None # type: ignore[assignment] def ulonglong_as_bytes(x: int) -> bytes: - return struct.pack(' bytes: - return struct.pack(' bytes: - return struct.pack(' bytes: - return struct.pack(' Generator[bytes, None, None]: length = len(x) for i in range(0, length, size): - yield x[i: min(i + size, length)] + yield x[i : min(i + size, length)] def as_individual_bytes(x: bytes) -> Generator[bytes, None, None]: @@ -66,14 +67,14 @@ def as_individual_bytes(x: bytes) -> Generator[bytes, None, None]: yield bytes([b]) -@unittest.skipIf(crc32c is None, 'no crc32c support in this platform') +@unittest.skipIf(crc32c is None, "no crc32c support in this platform") class TestMisc(unittest.TestCase): def test_zero(self) -> None: - self.assertEqual(0, crc32c.crc32c(b'')) + self.assertEqual(0, crc32c.crc32c(b"")) def test_keyword(self) -> None: - self.assertEqual(10, crc32c.crc32c(b'', value=10)) + self.assertEqual(10, crc32c.crc32c(b"", value=10)) def test_gil_behaviour(self) -> None: def _test(data: bytes) -> None: @@ -82,21 +83,21 @@ def _test(data: bytes) -> None: self.assertEqual(crc32c.crc32c(data, gil_release_mode=0), expected) self.assertEqual(crc32c.crc32c(data, gil_release_mode=1), expected) - _test(b'this_doesnt_release_the_gil_by_default') - _test(b'this_releases_the_gil_by_default' * 1024 * 1024) + _test(b"this_doesnt_release_the_gil_by_default") + _test(b"this_releases_the_gil_by_default" * 1024 * 1024) def test_crc32_deprecated(self) -> None: with warnings.catch_warnings(record=True) as warns: - crc32c.crc32(b'') + crc32c.crc32(b"") self.assertEqual(len(warns), 1) with warnings.catch_warnings(record=True) as warns: - crc32c.crc32c(b'') + crc32c.crc32c(b"") self.assertEqual(len(warns), 0) def test_msvc_examples(self) -> None: # Examples taken from MSVC's online examples. # Values are not xor'd in the examples though, so we do it here - max32 = 0xffffffff + max32 = 0xFFFFFFFF def assert_msvc_vals(b: bytes, crc: int, expected_crc: int) -> None: self.assertEqual(expected_crc ^ max32, crc32c.crc32c(b, crc ^ max32)) @@ -107,7 +108,32 @@ def assert_msvc_vals(b: bytes, crc: int, expected_crc: int) -> None: assert_msvc_vals(ulonglong_as_bytes(0x88889999EEEE3333), 0x5555AAAA, 0x16F57621) -@pytest.mark.skipIf(crc32c is None, reason='no crc32c support in this platform') +class CRCTestValue(NamedTuple): + name: str + data: bytes + crc: int + + +test_values: list[CRCTestValue] = [ + CRCTestValue("Numbers1", b"123456789", 0xE3069283), + CRCTestValue("Numbers2", b"23456789", 0xBFE92A83), + CRCTestValue("Phrase", b"The quick brown fox jumps over the lazy dog", 0x22620404), + CRCTestValue( + "LongPhrase", + b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc omni virtuti vitium contrario nomine opponitur. " + b"Conferam tecum, quam cuique verso rem subicias; Te ipsum, dignissimum maioribus tuis, voluptasne induxit, ut adolescentulus eriperes " + b"P. Conclusum est enim contra Cyrenaicos satis acute, nihil ad Epicurum. Duo Reges: constructio interrete. Tum Torquatus: Prorsus, inquit, assentior;\n" + b"Quando enim Socrates, qui parens philosophiae iure dici potest, quicquam tale fecit? Sed quid sentiat, non videtis. Haec quo modo conveniant, non " + b"sane intellego. Sed ille, ut dixi, vitiose. Dic in quovis conventu te omnia facere, ne doleas. Quod si ita se habeat, non possit beatam praestare " + b"vitam sapientia. Quis suae urbis conservatorem Codrum, quis Erechthei filias non maxime laudat? Primum divisit ineleganter; Huic mori optimum esse " + b"propter desperationem sapientiae, illi propter spem vivere.", + 0xFCB7575A, + ), + CRCTestValue("Empty", b"", 0x0), +] + + +@pytest.mark.skipif(crc32c is None, reason="no crc32c support in this platform") class TestCrc32cHash: def test_misc(self) -> None: crc32c_hash = crc32c.Crc32cHash(b"") @@ -115,52 +141,44 @@ def test_misc(self) -> None: assert crc32c_hash.digest_size == 4 assert crc32c_hash.name == "crc32c" assert len(crc32c_hash.digest()) == crc32c_hash.digest_size + assert len(crc32c_hash.hexdigest()) == crc32c_hash.digest_size * 2 def test_copy(self) -> None: crc32c_hash = crc32c.Crc32cHash(b"") crc32c_hash_copy = crc32c_hash.copy() assert crc32c_hash.digest() == crc32c_hash_copy.digest() + assert crc32c_hash.hexdigest() == crc32c_hash_copy.hexdigest() assert id(crc32c_hash) != id(crc32c_hash_copy) crc32c_hash.update(b"1") - crc32c_hash_copy.update(b"2") + assert crc32c_hash.digest() != crc32c_hash_copy.digest() + assert crc32c_hash.hexdigest() != crc32c_hash_copy.hexdigest() + crc32c_hash_copy.update(b"2") assert crc32c_hash.digest() != crc32c_hash_copy.digest() + assert crc32c_hash.hexdigest() != crc32c_hash_copy.hexdigest() - @pytest.mark.parametrize("data,digest,hexdigest,base64", [ - (b"", b"\x00\x00\x00\x00", "00000000", "AAAAAA=="), - (b"23456789", b"\xbf\xe9\x2a\x83", "bfe92a83", "v+kqgw=="), - (b"123456789", b"\xe3\x06\x92\x83", "e3069283", "4waSgw=="), - ]) + @pytest.mark.parametrize( + "data,crc", + [(value.data, value.crc) for value in test_values], + ) class TestSpecificValues: @staticmethod - def _check_values(crc32c_hash: crc32c.Crc32cHash, digest: bytes, hexdigest: str, base64: str) -> None: - assert crc32c_hash.digest() == digest - assert crc32c_hash.hexdigest() == hexdigest - assert crc32c_hash.base64() == base64 + def _check_values(crc32c_hash: crc32c.Crc32cHash, crc: int) -> None: + assert int.from_bytes(crc32c_hash.digest(), "big") == crc + assert len(crc32c_hash.digest()) == 4 + assert int(crc32c_hash.hexdigest(), 16) == crc + assert len(crc32c_hash.hexdigest()) == 8 - def test_piece_by_piece(self, data: bytes, digest: bytes, hexdigest: str, base64: str) -> None: + def test_piece_by_piece(self, data: bytes, crc: int) -> None: crc32c_hash = crc32c.Crc32cHash(b"") for x in as_individual_bytes(data): crc32c_hash.update(x) - self._check_values(crc32c_hash, digest, hexdigest, base64) - - def test_all(self, data: bytes, digest: bytes, hexdigest: str, base64: str) -> None: - self._check_values(crc32c.Crc32cHash(data), digest, hexdigest, base64) - + self._check_values(crc32c_hash, crc) -numbers1 = ('Numbers1', b'123456789', 0xe3069283) -numbers2 = ('Numbers2', b'23456789', 0xBFE92A83) -numbers3 = ('Numbers3', b'1234567890', 0xf3dbd4fe) -phrase = ('Phrase', b'The quick brown fox jumps over the lazy dog', 0x22620404) -long_phrase = ('LongPhrase', (b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc omni virtuti vitium contrario nomine opponitur. " -b"Conferam tecum, quam cuique verso rem subicias; Te ipsum, dignissimum maioribus tuis, voluptasne induxit, ut adolescentulus eriperes " -b"P. Conclusum est enim contra Cyrenaicos satis acute, nihil ad Epicurum. Duo Reges: constructio interrete. Tum Torquatus: Prorsus, inquit, assentior;\n" -b"Quando enim Socrates, qui parens philosophiae iure dici potest, quicquam tale fecit? Sed quid sentiat, non videtis. Haec quo modo conveniant, non " -b"sane intellego. Sed ille, ut dixi, vitiose. Dic in quovis conventu te omnia facere, ne doleas. Quod si ita se habeat, non possit beatam praestare " -b"vitam sapientia. Quis suae urbis conservatorem Codrum, quis Erechthei filias non maxime laudat? Primum divisit ineleganter; Huic mori optimum esse " -b"propter desperationem sapientiae, illi propter spem vivere."), 0xfcb7575a) + def test_all(self, data: bytes, crc: int) -> None: + self._check_values(crc32c.Crc32cHash(data), crc) class Crc32cChecks(object): @@ -190,11 +208,19 @@ def test_by_different_memory_offsets(self) -> None: val = memoryview(self.val) c = crc32c.crc32c(val[0:offset]) c = crc32c.crc32c(val[offset:], c) - self.assertEqual(self.checksum, c, "Invalid checksum when splitting at offset %d" % offset) + self.assertEqual( + self.checksum, + c, + "Invalid checksum when splitting at offset %d" % offset, + ) # Generate the actual unittest classes for each of the testing values if crc32c is not None: - for name, val, checksum in (numbers1, numbers2, numbers3, phrase, long_phrase): - classname = 'Test%s' % name - locals()[classname] = type(classname, (unittest.TestCase, Crc32cChecks), {'val': val, 'checksum': checksum}) \ No newline at end of file + for value in test_values: + classname = "Test%s" % value.name + locals()[classname] = type( + classname, + (unittest.TestCase, Crc32cChecks), + {"val": value.data, "checksum": value.crc}, + )