Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jonded94 committed Aug 8, 2024
1 parent c2c8509 commit 6f13a71
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 97 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
architecture: x64
- name: Checkout
uses: actions/checkout@v3
- name: Install mypy & wheel & pytest
run: pip install mypy wheel pytest
- name: Install dev requirements
run: pip install -r requirements-dev.txt
- name: Install package
run: python setup.py bdist_wheel && pip install dist/crc32c*.whl
- name: Run mypy
run: mypy --strict test/test_crc32c.py
run: mypy --strict src test
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@ dist: focal
# Build and test in several python versions
jobs:
include:
- python: "2.7"
- python: "3.5"
- python: "3.6"
- python: "3.7"
- python: "3.8"
- python: "3.9"
- python: "3.10"
- python: "3.11"
- python: "3.12"
- python: "3.8"
arch: arm64
- python: "3.8"
arch: s390x

before_install:
- pip install pytest
- pip install -r requirements-dev.txt

install:
- pip install -v .
Expand Down
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
wheel
mypy
pytest
70 changes: 55 additions & 15 deletions src/crc32c/__init__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,72 @@
import copy
import base64
from __future__ import annotations

from typing import TYPE_CHECKING

from ._crc32c import big_endian, crc32, crc32c, hardware_based

if TYPE_CHECKING:
from typing_extensions import Buffer, Self


class CRC32CHash:
"""Wrapper class for crc32c. Tries to conform to the interface of `hashlib` classes."""

class Crc32cHash:
@property
def digest_size(self):
def digest_size(self) -> int:
"""
The size of the resulting hash in bytes.
"""
return 4

@property
def name(self):
def block_size(self) -> int:
"""
The internal block size of the hash algorithm in bytes.
"""
return 1

@property
def name(self) -> str:
"""
The canonical name of this hash,
always lowercase and always suitable as a parameter to new() to create another hash of this type.
"""
return "crc32c"

def __init__(self, data):
def __init__(self, data: Buffer = b"") -> None:
"""
Initialise the hash object with an optional bytes-like object.
"""
self._checksum = crc32c(data)

def update(self, __data):
self._checksum = crc32c(__data, self._checksum)

def base64(self):
return base64.b64encode(self.digest()).decode(encoding="ascii")
def update(self, data: Buffer) -> None:
"""
Update the hash object with the bytes-like object.
Repeated calls are equivalent to a single call with the concatenation of all the arguments:
m.update(a); m.update(b) is equivalent to m.update(a+b).
"""
self._checksum = crc32c(data, self._checksum)

def digest(self):
def digest(self) -> bytes:
"""
Return the digest of the data passed to the update() method so far.
This is a bytes object of size digest_size which may contain bytes in the whole range from 0 to 255.
"""
return self._checksum.to_bytes(4, "big")

def hexdigest(self):
def hexdigest(self) -> str:
"""
Like digest() except the digest is returned as a string object of double length,
containing only hexadecimal digits.
This may be used to exchange the value safely in email or other non-binary environments.
"""
return self.digest().hex()

def copy(self):
return copy.copy(self)
def copy(self) -> Self:
"""
Return a copy (“clone”) of the hash object. This can be used to efficiently compute
the digests of data sharing a common initial substring.
"""
res = type(self)()
res._checksum = self._checksum
return res
28 changes: 0 additions & 28 deletions src/crc32c/__init__.pyi

This file was deleted.

8 changes: 8 additions & 0 deletions src/crc32c/_crc32c.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing_extensions import Buffer

big_endian: int
hardware_based: bool


def crc32(data: Buffer, value: int = 0, gil_release_mode: int = -1) -> int: ...
def crc32c(data: Buffer, value: int = 0, gil_release_mode: int = -1) -> int: ...
124 changes: 75 additions & 49 deletions test/test_crc32c.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,56 +24,57 @@
import struct
import unittest
import warnings
from typing import Any, Generator, NamedTuple

import pytest
from typing import Any, Generator

try:
import crc32c
sw_mode = os.environ.get('CRC32C_SW_MODE')
if sw_mode == 'none' and not crc32c.hardware_based:

sw_mode = os.environ.get("CRC32C_SW_MODE")
if sw_mode == "none" and not crc32c.hardware_based:
raise RuntimeError('"none" should force hardware support')
elif sw_mode == 'force' and crc32c.hardware_based:
elif sw_mode == "force" and crc32c.hardware_based:
raise RuntimeError('"force" should force software support')
except ImportError:
crc32c = None # type: ignore[assignment]


def ulonglong_as_bytes(x: int) -> bytes:
return struct.pack('<Q', x)
return struct.pack("<Q", x)


def uint_as_bytes(x: int) -> bytes:
return struct.pack('<I', x)
return struct.pack("<I", x)


def ushort_as_bytes(x: int) -> bytes:
return struct.pack('<H', x)
return struct.pack("<H", x)


def uchar_as_bytes(c: int) -> bytes:
return struct.pack('<B', c)
return struct.pack("<B", c)


def batched(x: bytes, size: int) -> Generator[bytes, None, None]:
length = len(x)
for i in range(0, length, size):
yield x[i: min(i + size, length)]
yield x[i : min(i + size, length)]


def as_individual_bytes(x: bytes) -> Generator[bytes, None, None]:
for b in x:
yield bytes([b])


@unittest.skipIf(crc32c is None, 'no crc32c support in this platform')
@unittest.skipIf(crc32c is None, "no crc32c support in this platform")
class TestMisc(unittest.TestCase):

def test_zero(self) -> None:
self.assertEqual(0, crc32c.crc32c(b''))
self.assertEqual(0, crc32c.crc32c(b""))

def test_keyword(self) -> None:
self.assertEqual(10, crc32c.crc32c(b'', value=10))
self.assertEqual(10, crc32c.crc32c(b"", value=10))

def test_gil_behaviour(self) -> None:
def _test(data: bytes) -> None:
Expand All @@ -82,21 +83,21 @@ def _test(data: bytes) -> None:
self.assertEqual(crc32c.crc32c(data, gil_release_mode=0), expected)
self.assertEqual(crc32c.crc32c(data, gil_release_mode=1), expected)

_test(b'this_doesnt_release_the_gil_by_default')
_test(b'this_releases_the_gil_by_default' * 1024 * 1024)
_test(b"this_doesnt_release_the_gil_by_default")
_test(b"this_releases_the_gil_by_default" * 1024 * 1024)

def test_crc32_deprecated(self) -> None:
with warnings.catch_warnings(record=True) as warns:
crc32c.crc32(b'')
crc32c.crc32(b"")
self.assertEqual(len(warns), 1)
with warnings.catch_warnings(record=True) as warns:
crc32c.crc32c(b'')
crc32c.crc32c(b"")
self.assertEqual(len(warns), 0)

def test_msvc_examples(self) -> None:
# Examples taken from MSVC's online examples.
# Values are not xor'd in the examples though, so we do it here
max32 = 0xffffffff
max32 = 0xFFFFFFFF

def assert_msvc_vals(b: bytes, crc: int, expected_crc: int) -> None:
self.assertEqual(expected_crc ^ max32, crc32c.crc32c(b, crc ^ max32))
Expand All @@ -107,60 +108,77 @@ def assert_msvc_vals(b: bytes, crc: int, expected_crc: int) -> None:
assert_msvc_vals(ulonglong_as_bytes(0x88889999EEEE3333), 0x5555AAAA, 0x16F57621)


@pytest.mark.skipIf(crc32c is None, reason='no crc32c support in this platform')
class CRCTestValue(NamedTuple):
name: str
data: bytes
crc: int


test_values: list[CRCTestValue] = [
CRCTestValue("Numbers1", b"123456789", 0xE3069283),
CRCTestValue("Numbers2", b"23456789", 0xBFE92A83),
CRCTestValue("Phrase", b"The quick brown fox jumps over the lazy dog", 0x22620404),
CRCTestValue(
"LongPhrase",
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc omni virtuti vitium contrario nomine opponitur. "
b"Conferam tecum, quam cuique verso rem subicias; Te ipsum, dignissimum maioribus tuis, voluptasne induxit, ut adolescentulus eriperes "
b"P. Conclusum est enim contra Cyrenaicos satis acute, nihil ad Epicurum. Duo Reges: constructio interrete. Tum Torquatus: Prorsus, inquit, assentior;\n"
b"Quando enim Socrates, qui parens philosophiae iure dici potest, quicquam tale fecit? Sed quid sentiat, non videtis. Haec quo modo conveniant, non "
b"sane intellego. Sed ille, ut dixi, vitiose. Dic in quovis conventu te omnia facere, ne doleas. Quod si ita se habeat, non possit beatam praestare "
b"vitam sapientia. Quis suae urbis conservatorem Codrum, quis Erechthei filias non maxime laudat? Primum divisit ineleganter; Huic mori optimum esse "
b"propter desperationem sapientiae, illi propter spem vivere.",
0xFCB7575A,
),
CRCTestValue("Empty", b"", 0x0),
]


@pytest.mark.skipif(crc32c is None, reason="no crc32c support in this platform")
class TestCrc32cHash:
def test_misc(self) -> None:
crc32c_hash = crc32c.Crc32cHash(b"")

assert crc32c_hash.digest_size == 4
assert crc32c_hash.name == "crc32c"
assert len(crc32c_hash.digest()) == crc32c_hash.digest_size
assert len(crc32c_hash.hexdigest()) == crc32c_hash.digest_size * 2

def test_copy(self) -> None:
crc32c_hash = crc32c.Crc32cHash(b"")
crc32c_hash_copy = crc32c_hash.copy()

assert crc32c_hash.digest() == crc32c_hash_copy.digest()
assert crc32c_hash.hexdigest() == crc32c_hash_copy.hexdigest()
assert id(crc32c_hash) != id(crc32c_hash_copy)

crc32c_hash.update(b"1")
crc32c_hash_copy.update(b"2")
assert crc32c_hash.digest() != crc32c_hash_copy.digest()
assert crc32c_hash.hexdigest() != crc32c_hash_copy.hexdigest()

crc32c_hash_copy.update(b"2")
assert crc32c_hash.digest() != crc32c_hash_copy.digest()
assert crc32c_hash.hexdigest() != crc32c_hash_copy.hexdigest()

@pytest.mark.parametrize("data,digest,hexdigest,base64", [
(b"", b"\x00\x00\x00\x00", "00000000", "AAAAAA=="),
(b"23456789", b"\xbf\xe9\x2a\x83", "bfe92a83", "v+kqgw=="),
(b"123456789", b"\xe3\x06\x92\x83", "e3069283", "4waSgw=="),
])
@pytest.mark.parametrize(
"data,crc",
[(value.data, value.crc) for value in test_values],
)
class TestSpecificValues:
@staticmethod
def _check_values(crc32c_hash: crc32c.Crc32cHash, digest: bytes, hexdigest: str, base64: str) -> None:
assert crc32c_hash.digest() == digest
assert crc32c_hash.hexdigest() == hexdigest
assert crc32c_hash.base64() == base64
def _check_values(crc32c_hash: crc32c.Crc32cHash, crc: int) -> None:
assert int.from_bytes(crc32c_hash.digest(), "big") == crc
assert len(crc32c_hash.digest()) == 4
assert int(crc32c_hash.hexdigest(), 16) == crc
assert len(crc32c_hash.hexdigest()) == 8

def test_piece_by_piece(self, data: bytes, digest: bytes, hexdigest: str, base64: str) -> None:
def test_piece_by_piece(self, data: bytes, crc: int) -> None:
crc32c_hash = crc32c.Crc32cHash(b"")
for x in as_individual_bytes(data):
crc32c_hash.update(x)
self._check_values(crc32c_hash, digest, hexdigest, base64)

def test_all(self, data: bytes, digest: bytes, hexdigest: str, base64: str) -> None:
self._check_values(crc32c.Crc32cHash(data), digest, hexdigest, base64)

self._check_values(crc32c_hash, crc)

numbers1 = ('Numbers1', b'123456789', 0xe3069283)
numbers2 = ('Numbers2', b'23456789', 0xBFE92A83)
numbers3 = ('Numbers3', b'1234567890', 0xf3dbd4fe)
phrase = ('Phrase', b'The quick brown fox jumps over the lazy dog', 0x22620404)
long_phrase = ('LongPhrase', (b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc omni virtuti vitium contrario nomine opponitur. "
b"Conferam tecum, quam cuique verso rem subicias; Te ipsum, dignissimum maioribus tuis, voluptasne induxit, ut adolescentulus eriperes "
b"P. Conclusum est enim contra Cyrenaicos satis acute, nihil ad Epicurum. Duo Reges: constructio interrete. Tum Torquatus: Prorsus, inquit, assentior;\n"
b"Quando enim Socrates, qui parens philosophiae iure dici potest, quicquam tale fecit? Sed quid sentiat, non videtis. Haec quo modo conveniant, non "
b"sane intellego. Sed ille, ut dixi, vitiose. Dic in quovis conventu te omnia facere, ne doleas. Quod si ita se habeat, non possit beatam praestare "
b"vitam sapientia. Quis suae urbis conservatorem Codrum, quis Erechthei filias non maxime laudat? Primum divisit ineleganter; Huic mori optimum esse "
b"propter desperationem sapientiae, illi propter spem vivere."), 0xfcb7575a)
def test_all(self, data: bytes, crc: int) -> None:
self._check_values(crc32c.Crc32cHash(data), crc)


class Crc32cChecks(object):
Expand Down Expand Up @@ -190,11 +208,19 @@ def test_by_different_memory_offsets(self) -> None:
val = memoryview(self.val)
c = crc32c.crc32c(val[0:offset])
c = crc32c.crc32c(val[offset:], c)
self.assertEqual(self.checksum, c, "Invalid checksum when splitting at offset %d" % offset)
self.assertEqual(
self.checksum,
c,
"Invalid checksum when splitting at offset %d" % offset,
)


# Generate the actual unittest classes for each of the testing values
if crc32c is not None:
for name, val, checksum in (numbers1, numbers2, numbers3, phrase, long_phrase):
classname = 'Test%s' % name
locals()[classname] = type(classname, (unittest.TestCase, Crc32cChecks), {'val': val, 'checksum': checksum})
for value in test_values:
classname = "Test%s" % value.name
locals()[classname] = type(
classname,
(unittest.TestCase, Crc32cChecks),
{"val": value.data, "checksum": value.crc},
)

0 comments on commit 6f13a71

Please sign in to comment.