Skip to content

Commit

Permalink
Merge pull request #32 from borgbackup/separate-format
Browse files Browse the repository at this point in the history
HashTableNT: give separate formats in value_format namedtuple
  • Loading branch information
ThomasWaldmann authored Nov 17, 2024
2 parents 9cc7509 + b4f8c7e commit 9fa09c9
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 18 deletions.
1 change: 1 addition & 0 deletions src/borghash/HashTableNT.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
cdef class HashTableNT:
cdef int key_size
cdef object value_type
cdef object value_format
cdef object value_struct
cdef int value_size
cdef object inner
31 changes: 20 additions & 11 deletions src/borghash/HashTableNT.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,23 @@ _NoDefault = object()

cdef class HashTableNT:
def __init__(self, items=None, *,
key_size: int = 0, value_format: str = "", value_type: Any = None,
key_size: int, value_type: Any, value_format: Any,
capacity: int = MIN_CAPACITY) -> None:
if not key_size:
raise ValueError("key_size must be specified and must be > 0.")
if not value_format:
raise ValueError("value_format must be specified and must be non-empty.")
if value_type is None:
raise ValueError("value_type must be specified (a namedtuple type corresponding to value_format).")
if not isinstance(key_size, int) or not key_size > 0:
raise ValueError("key_size must be an integer and > 0.")
if type(value_type) is not type:
raise TypeError("value_type must be a namedtuple type.")
if not isinstance(value_format, tuple):
raise TypeError("value_format must be a namedtuple instance.")
if value_format._fields != value_type._fields:
raise TypeError("value_format's and value_type's element names must correspond.")
if not all(isinstance(fmt, str) and len(fmt) > 0 for fmt in value_format):
raise ValueError("value_format's elements must be str and non-empty.")
self.key_size = key_size
self.value_struct = struct.Struct(value_format)
self.value_size = self.value_struct.size
self.value_type = value_type
self.value_format = value_format
self.value_struct = struct.Struct("".join(value_format))
self.value_size = self.value_struct.size
self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity)
_fill(self, items)

Expand Down Expand Up @@ -159,9 +164,11 @@ cdef class HashTableNT:
meta = {
'key_size': self.key_size,
'value_size': self.value_size,
'value_format': self.value_struct.format,
'value_type_name': self.value_type.__name__,
'value_type_fields': self.value_type._fields,
'value_format_name': self.value_format.__class__.__name__,
'value_format_fields': self.value_format._fields,
'value_format': self.value_format,
'capacity': self.inner.capacity,
'used': self.inner.used, # count of keys / values
}
Expand Down Expand Up @@ -201,7 +208,9 @@ cdef class HashTableNT:
raise ValueError(f"Invalid file, file is too short.")
meta = json.loads(meta_bytes.decode("utf-8"))
value_type = namedtuple(meta['value_type_name'], meta['value_type_fields'])
ht = cls(key_size=meta['key_size'], value_format=meta['value_format'], value_type=value_type, capacity=meta['capacity'])
value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
value_format = value_format_t(*meta['value_format'])
ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type, capacity=meta['capacity'])
count = 0
ksize, vsize = meta['key_size'], meta['value_size']
for i in range(meta['used']):
Expand Down
4 changes: 3 additions & 1 deletion src/borghash/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ def demo():
count = 50000
value_type = namedtuple("Chunk", ["refcount", "size"])
value_format_t = namedtuple("ChunkFormat", ["refcount", "size"])
value_format = value_format_t(refcount="<I", size="I")
# 256bit (32Byte) key, 2x 32bit (4Byte) values
ht = HashTableNT(key_size=32, value_format="<II", value_type=value_type)
ht = HashTableNT(key_size=32, value_type=value_type, value_format=value_format)
t0 = time()
for i in range(count):
Expand Down
5 changes: 3 additions & 2 deletions tests/benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
from .hashtable_test import H2

VALUE_TYPE = namedtuple("value_type", "value")
VALUE_FMT = "<I"
VALUE_FMT_TYPE = namedtuple("value_format", "value")
VALUE_FMT = VALUE_FMT_TYPE("<I")
KEY_SIZE = len(H2(0))
VALUE_SIZE = len(struct.pack(VALUE_FMT, 0))
VALUE_SIZE = len(struct.pack("".join(VALUE_FMT), 0))
VALUE_BITS = VALUE_SIZE * 8

@pytest.fixture(scope="module")
Expand Down
9 changes: 5 additions & 4 deletions tests/hashtablent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from .hashtable_test import H2

key_size = 32 # 32 bytes = 256bits key
value_format = "<III" # 3x little endian 32bit unsigned int
value_type = namedtuple("vt", "v1 v2 v3")
value_format_t = namedtuple("vf", "v1 v2 v3")
value_format = value_format_t(v1="<I", v2="I", v3="I") # 3x little endian 32bit unsigned int

key1, value1 = b"a" * 32, value_type(11, 12, 13)
key2, value2 = b"b" * 32, value_type(21, 22, 23)
Expand All @@ -19,7 +20,7 @@

@pytest.fixture
def ntht():
return HashTableNT(key_size=key_size, value_format=value_format, value_type=value_type)
return HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format)


@pytest.fixture
Expand All @@ -30,10 +31,10 @@ def ntht12(ntht):


def test_init():
ht = HashTableNT(key_size=32, value_format=value_format, value_type=value_type)
ht = HashTableNT(key_size=32, value_type=value_type, value_format=value_format)
assert len(ht) == 0
items = [(key1, value1), (key2, value2)]
ht = HashTableNT(items, key_size=32, value_format=value_format, value_type=value_type)
ht = HashTableNT(items, key_size=32, value_type=value_type, value_format=value_format)
assert ht[key1] == value1
assert ht[key2] == value2

Expand Down

0 comments on commit 9fa09c9

Please sign in to comment.