Skip to content

Commit

Permalink
feat: improve unmarshall performance (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
bdraco authored Sep 24, 2022
1 parent 4a7c4c4 commit db436b7
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 51 deletions.
76 changes: 58 additions & 18 deletions src/dbus_fast/_private/unmarshaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,25 @@
HeaderField,
)

IS_LITTLE_ENDIAN = sys.byteorder == "little"
IS_BIG_ENDIAN = sys.byteorder == "big"

MAX_UNIX_FDS = 16

UNPACK_SYMBOL = {LITTLE_ENDIAN: "<", BIG_ENDIAN: ">"}
UNPACK_LENGTHS = {BIG_ENDIAN: Struct(">III"), LITTLE_ENDIAN: Struct("<III")}

UINT32_CAST = "I"
UINT32_SIZE = 4
UINT32_DBUS_TYPE = "u"
UINT32_SIGNATURE = SignatureTree._get(UINT32_DBUS_TYPE).types[0]

DBUS_TO_CTYPE = {
"y": ("B", 1), # byte
"n": ("h", 2), # int16
"q": ("H", 2), # uint16
"i": ("i", 4), # int32
"u": ("I", 4), # uint32
UINT32_DBUS_TYPE: (UINT32_CAST, UINT32_SIZE), # uint32
"x": ("q", 8), # int64
"t": ("Q", 8), # uint64
"d": ("d", 8), # double
Expand All @@ -37,8 +45,6 @@
HEADER_SIGNATURE_SIZE = 16
HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION = 12

UINT32_SIGNATURE = SignatureTree._get("u").types[0]

HEADER_DESTINATION = HeaderField.DESTINATION.name
HEADER_PATH = HeaderField.PATH.name
HEADER_INTERFACE = HeaderField.INTERFACE.name
Expand Down Expand Up @@ -179,12 +185,28 @@ def read_to_offset(self, offset: int) -> None:
def read_boolean(self, _=None):
return bool(self.read_argument(UINT32_SIGNATURE))

def read_string(self, _=None):
str_length = self.read_argument(UINT32_SIGNATURE)
def read_string_cast(self, _=None):
"""Read a string using cast."""
self.offset += UINT32_SIZE + (-self.offset & (UINT32_SIZE - 1)) # align
str_start = self.offset
# read terminating '\0' byte as well (str_length + 1)
self.offset += str_length + 1
return self.buf[str_start : str_start + str_length].decode()
self.offset += (
self.view[self.offset - UINT32_SIZE : self.offset].cast(UINT32_CAST)[0] + 1
)
return self.buf[str_start : self.offset - 1].decode()

def read_string_unpack(self, _=None):
"""Read a string using unpack."""
self.offset += UINT32_SIZE + (-self.offset & (UINT32_SIZE - 1)) # align
str_start = self.offset
# read terminating '\0' byte as well (str_length + 1)
self.offset += (
self.readers[UINT32_DBUS_TYPE][3].unpack_from(
self.view, str_start - UINT32_SIZE
)[0]
+ 1
)
return self.buf[str_start : self.offset - 1].decode()

def read_signature(self, _=None):
signature_len = self.view[self.offset] # byte
Expand Down Expand Up @@ -288,10 +310,15 @@ def _read_header(self):
self.msg_len = (
self.header_len + (-self.header_len & 7) + self.body_len
) # align 8
can_cast = bool(sys.byteorder == "little" and endian == LITTLE_ENDIAN) or (
sys.byteorder == "big" and endian == BIG_ENDIAN
)
self.readers = self._readers_by_type[(endian, can_cast)]
self.readers = self._readers_by_type[
(
endian,
bool(
(IS_LITTLE_ENDIAN and endian == LITTLE_ENDIAN)
or (IS_BIG_ENDIAN and endian == BIG_ENDIAN)
),
)
]

def _read_body(self):
"""Read the body of the message."""
Expand Down Expand Up @@ -335,12 +362,25 @@ def unmarshall(self):
return None
return self.message

_complex_parsers: Dict[
_complex_parsers_unpack: Dict[
str, Tuple[Callable[["Unmarshaller", SignatureType], Any], None, None, None]
] = {
"b": (read_boolean, None, None, None),
"o": (read_string_unpack, None, None, None),
"s": (read_string_unpack, None, None, None),
"g": (read_signature, None, None, None),
"a": (read_array, None, None, None),
"(": (read_struct, None, None, None),
"{": (read_dict_entry, None, None, None),
"v": (read_variant, None, None, None),
}

_complex_parsers_cast: Dict[
str, Tuple[Callable[["Unmarshaller", SignatureType], Any], None, None, None]
] = {
"b": (read_boolean, None, None, None),
"o": (read_string, None, None, None),
"s": (read_string, None, None, None),
"o": (read_string_cast, None, None, None),
"s": (read_string_cast, None, None, None),
"g": (read_signature, None, None, None),
"a": (read_array, None, None, None),
"(": (read_struct, None, None, None),
Expand Down Expand Up @@ -372,18 +412,18 @@ def unmarshall(self):
_readers_by_type: Dict[Tuple[int, bool], READER_TYPE] = {
(LITTLE_ENDIAN, True): {
**_ctype_by_endian[(LITTLE_ENDIAN, True)],
**_complex_parsers,
**_complex_parsers_cast,
},
(LITTLE_ENDIAN, False): {
**_ctype_by_endian[(LITTLE_ENDIAN, False)],
**_complex_parsers,
**_complex_parsers_unpack,
},
(BIG_ENDIAN, True): {
**_ctype_by_endian[(BIG_ENDIAN, True)],
**_complex_parsers,
**_complex_parsers_cast,
},
(BIG_ENDIAN, False): {
**_ctype_by_endian[(BIG_ENDIAN, False)],
**_complex_parsers,
**_complex_parsers_unpack,
},
}
74 changes: 41 additions & 33 deletions tests/test_marshaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
from typing import Any, Dict
from unittest.mock import patch

import pytest

Expand Down Expand Up @@ -93,39 +94,46 @@ def test_marshalling_with_table():


@pytest.mark.parametrize("unmarshall_table", (table,))
def test_unmarshalling_with_table(unmarshall_table):
for item in unmarshall_table:

stream = io.BytesIO(bytes.fromhex(item["data"]))
unmarshaller = Unmarshaller(stream)
try:
unmarshaller.unmarshall()
except Exception as e:
print("message failed to unmarshall:")
print(json_dump(item["message"]))
raise e

message = json_to_message(item["message"])

body = []
for i, type_ in enumerate(message.signature_tree.types):
body.append(replace_variants(type_, message.body[i]))
message.body = body

for attr in [
"body",
"signature",
"message_type",
"destination",
"path",
"interface",
"member",
"flags",
"serial",
]:
assert getattr(unmarshaller.message, attr) == getattr(
message, attr
), f"attr doesnt match: {attr}"
@pytest.mark.parametrize("endians", ((True, False), (False, True)))
def test_unmarshalling_with_table(unmarshall_table, endians):
from dbus_fast._private import unmarshaller

with patch.object(unmarshaller, "IS_BIG_ENDIAN", endians[0]), patch.object(
unmarshaller, "IS_LITTLE_ENDIAN", endians[1]
):

for item in unmarshall_table:

stream = io.BytesIO(bytes.fromhex(item["data"]))
unmarshaller = Unmarshaller(stream)
try:
unmarshaller.unmarshall()
except Exception as e:
print("message failed to unmarshall:")
print(json_dump(item["message"]))
raise e

message = json_to_message(item["message"])

body = []
for i, type_ in enumerate(message.signature_tree.types):
body.append(replace_variants(type_, message.body[i]))
message.body = body

for attr in [
"body",
"signature",
"message_type",
"destination",
"path",
"interface",
"member",
"flags",
"serial",
]:
assert getattr(unmarshaller.message, attr) == getattr(
message, attr
), f"attr doesnt match: {attr}"


def test_unmarshall_can_resume():
Expand Down

0 comments on commit db436b7

Please sign in to comment.