Skip to content

Commit

Permalink
HashTableNT: deal with byte_order separately
Browse files Browse the repository at this point in the history
the first value_format namedtuple element value should not be
special by including the byte order there, especially since that
byte order will apply to ALL elements.

So have it separately and prepend it to the joined elements formats.
  • Loading branch information
ThomasWaldmann committed Nov 17, 2024
1 parent 9fa09c9 commit 6150f11
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/borghash/HashTableNT.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
cdef class HashTableNT:
cdef int key_size
cdef object byte_order
cdef object value_type
cdef object value_format
cdef object value_struct
Expand Down
13 changes: 10 additions & 3 deletions src/borghash/HashTableNT.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ assert len(MAGIC) == 8
VERSION = 1 # version of the on-disk (serialized) format produced by .write().
HEADER_FMT = "<8sII" # magic, version, meta length

BYTE_ORDER = dict(big=">", little="<", network="!", native="=") # struct format chars

_NoDefault = object()

cdef class HashTableNT:
def __init__(self, items=None, *,
key_size: int, value_type: Any, value_format: Any,
capacity: int = MIN_CAPACITY) -> None:
capacity: int = MIN_CAPACITY, byte_order="little") -> None:
if not isinstance(key_size, int) or not key_size > 0:
raise ValueError("key_size must be an integer and > 0.")
if type(value_type) is not type:
Expand All @@ -33,10 +35,13 @@ cdef class HashTableNT:
raise TypeError("value_format's and value_type's element names must correspond.")
if not all(isinstance(fmt, str) and len(fmt) > 0 for fmt in value_format):
raise ValueError("value_format's elements must be str and non-empty.")
if byte_order not in BYTE_ORDER:
raise ValueError("byte_order must be one of: {','.join(BYTE_ORDER.keys())}")
self.key_size = key_size
self.value_type = value_type
self.value_format = value_format
self.value_struct = struct.Struct("".join(value_format))
self.byte_order = byte_order
self.value_struct = struct.Struct(BYTE_ORDER[byte_order] + "".join(value_format))
self.value_size = self.value_struct.size
self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity)
_fill(self, items)
Expand Down Expand Up @@ -164,6 +169,7 @@ cdef class HashTableNT:
meta = {
'key_size': self.key_size,
'value_size': self.value_size,
'byte_order': self.byte_order,
'value_type_name': self.value_type.__name__,
'value_type_fields': self.value_type._fields,
'value_format_name': self.value_format.__class__.__name__,
Expand Down Expand Up @@ -210,7 +216,8 @@ cdef class HashTableNT:
value_type = namedtuple(meta['value_type_name'], meta['value_type_fields'])
value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
value_format = value_format_t(*meta['value_format'])
ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type, capacity=meta['capacity'])
ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
capacity=meta['capacity'], byte_order=meta['byte_order'])
count = 0
ksize, vsize = meta['key_size'], meta['value_size']
for i in range(meta['used']):
Expand Down
2 changes: 1 addition & 1 deletion src/borghash/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def demo():
count = 50000
value_type = namedtuple("Chunk", ["refcount", "size"])
value_format_t = namedtuple("ChunkFormat", ["refcount", "size"])
value_format = value_format_t(refcount="<I", size="I")
value_format = value_format_t(refcount="I", size="I")
# 256bit (32Byte) key, 2x 32bit (4Byte) values
ht = HashTableNT(key_size=32, value_type=value_type, value_format=value_format)
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

VALUE_TYPE = namedtuple("value_type", "value")
VALUE_FMT_TYPE = namedtuple("value_format", "value")
VALUE_FMT = VALUE_FMT_TYPE("<I")
VALUE_FMT = VALUE_FMT_TYPE("I")
KEY_SIZE = len(H2(0))
VALUE_SIZE = len(struct.pack("".join(VALUE_FMT), 0))
VALUE_BITS = VALUE_SIZE * 8
Expand Down
2 changes: 1 addition & 1 deletion tests/hashtablent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
key_size = 32 # 32 bytes = 256bits key
value_type = namedtuple("vt", "v1 v2 v3")
value_format_t = namedtuple("vf", "v1 v2 v3")
value_format = value_format_t(v1="<I", v2="I", v3="I") # 3x little endian 32bit unsigned int
value_format = value_format_t(v1="I", v2="I", v3="I") # 3x little endian 32bit unsigned int

key1, value1 = b"a" * 32, value_type(11, 12, 13)
key2, value2 = b"b" * 32, value_type(21, 22, 23)
Expand Down

0 comments on commit 6150f11

Please sign in to comment.