Skip to content

Commit

Permalink
Merge pull request #33 from borgbackup/separate-endianness
Browse files Browse the repository at this point in the history
HashTableNT: deal with byte_order separately
  • Loading branch information
ThomasWaldmann authored Nov 17, 2024
2 parents 9fa09c9 + 6150f11 commit d4715e0
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/borghash/HashTableNT.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
cdef class HashTableNT:
cdef int key_size
cdef object byte_order
cdef object value_type
cdef object value_format
cdef object value_struct
Expand Down
13 changes: 10 additions & 3 deletions src/borghash/HashTableNT.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ assert len(MAGIC) == 8
VERSION = 1 # version of the on-disk (serialized) format produced by .write().
HEADER_FMT = "<8sII" # magic, version, meta length

BYTE_ORDER = dict(big=">", little="<", network="!", native="=") # struct format chars

_NoDefault = object()

cdef class HashTableNT:
def __init__(self, items=None, *,
key_size: int, value_type: Any, value_format: Any,
capacity: int = MIN_CAPACITY) -> None:
capacity: int = MIN_CAPACITY, byte_order="little") -> None:
if not isinstance(key_size, int) or not key_size > 0:
raise ValueError("key_size must be an integer and > 0.")
if type(value_type) is not type:
Expand All @@ -33,10 +35,13 @@ cdef class HashTableNT:
raise TypeError("value_format's and value_type's element names must correspond.")
if not all(isinstance(fmt, str) and len(fmt) > 0 for fmt in value_format):
raise ValueError("value_format's elements must be str and non-empty.")
if byte_order not in BYTE_ORDER:
raise ValueError("byte_order must be one of: {','.join(BYTE_ORDER.keys())}")
self.key_size = key_size
self.value_type = value_type
self.value_format = value_format
self.value_struct = struct.Struct("".join(value_format))
self.byte_order = byte_order
self.value_struct = struct.Struct(BYTE_ORDER[byte_order] + "".join(value_format))
self.value_size = self.value_struct.size
self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity)
_fill(self, items)
Expand Down Expand Up @@ -164,6 +169,7 @@ cdef class HashTableNT:
meta = {
'key_size': self.key_size,
'value_size': self.value_size,
'byte_order': self.byte_order,
'value_type_name': self.value_type.__name__,
'value_type_fields': self.value_type._fields,
'value_format_name': self.value_format.__class__.__name__,
Expand Down Expand Up @@ -210,7 +216,8 @@ cdef class HashTableNT:
value_type = namedtuple(meta['value_type_name'], meta['value_type_fields'])
value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
value_format = value_format_t(*meta['value_format'])
ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type, capacity=meta['capacity'])
ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
capacity=meta['capacity'], byte_order=meta['byte_order'])
count = 0
ksize, vsize = meta['key_size'], meta['value_size']
for i in range(meta['used']):
Expand Down
2 changes: 1 addition & 1 deletion src/borghash/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def demo():
count = 50000
value_type = namedtuple("Chunk", ["refcount", "size"])
value_format_t = namedtuple("ChunkFormat", ["refcount", "size"])
value_format = value_format_t(refcount="<I", size="I")
value_format = value_format_t(refcount="I", size="I")
# 256bit (32Byte) key, 2x 32bit (4Byte) values
ht = HashTableNT(key_size=32, value_type=value_type, value_format=value_format)
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

VALUE_TYPE = namedtuple("value_type", "value")
VALUE_FMT_TYPE = namedtuple("value_format", "value")
VALUE_FMT = VALUE_FMT_TYPE("<I")
VALUE_FMT = VALUE_FMT_TYPE("I")
KEY_SIZE = len(H2(0))
VALUE_SIZE = len(struct.pack("".join(VALUE_FMT), 0))
VALUE_BITS = VALUE_SIZE * 8
Expand Down
2 changes: 1 addition & 1 deletion tests/hashtablent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
key_size = 32 # 32 bytes = 256bits key
value_type = namedtuple("vt", "v1 v2 v3")
value_format_t = namedtuple("vf", "v1 v2 v3")
value_format = value_format_t(v1="<I", v2="I", v3="I") # 3x little endian 32bit unsigned int
value_format = value_format_t(v1="I", v2="I", v3="I") # 3x little endian 32bit unsigned int

key1, value1 = b"a" * 32, value_type(11, 12, 13)
key2, value2 = b"b" * 32, value_type(21, 22, 23)
Expand Down

0 comments on commit d4715e0

Please sign in to comment.