Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use orjson to improve JSON marshalling performance #691

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ Changes for crate
Unreleased
==========

- Switched JSON encoder to use the `orjson`_ library, to improve JSON
marshalling performance. Thanks, @widmogrod.
orjson is fast and in some spots even more correct when compared against
Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson
will serialize to ``bytes`` instead of ``str``. Please also note it
will not deserialize to dataclasses, UUIDs, decimals, etc., or support
``object_hook``. Within ``crate-python``, it is applied with an encoder
function for additional type support about Python's ``Decimal`` type and
freezegun's ``FakeDatetime`` type.

.. _orjson: https://github.com/ijl/orjson
amotl marked this conversation as resolved.
Show resolved Hide resolved

2024/11/23 1.0.1
================
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def read(path):
packages=find_namespace_packages("src"),
package_dir={"": "src"},
install_requires=[
"orjson<4",
"urllib3",
"verlib2",
],
Expand Down
60 changes: 33 additions & 27 deletions src/crate/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,21 @@
# software solely pursuant to the terms of the relevant commercial agreement.


import calendar
import heapq
import io
import json
import logging
import os
import re
import socket
import ssl
import threading
import typing as t
from base64 import b64encode
from datetime import date, datetime, timezone
from decimal import Decimal
from time import time
from urllib.parse import urlparse
from uuid import UUID

import orjson
import urllib3
from urllib3 import connection_from_url
from urllib3.connection import HTTPConnection
Expand Down Expand Up @@ -86,25 +84,33 @@
return None


class CrateJsonEncoder(json.JSONEncoder):
epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc)
epoch_naive = datetime(1970, 1, 1)

def default(self, o):
if isinstance(o, (Decimal, UUID)):
return str(o)
if isinstance(o, datetime):
if o.tzinfo is not None:
delta = o - self.epoch_aware
else:
delta = o - self.epoch_naive
return int(
delta.microseconds / 1000.0
+ (delta.seconds + delta.days * 24 * 3600) * 1000.0
)
if isinstance(o, date):
return calendar.timegm(o.timetuple()) * 1000
return json.JSONEncoder.default(self, o)
def cratedb_json_encoder(obj: t.Any) -> str:
"""
Encoder function for orjson, with additional type support.

- Python's `Decimal` type.
- freezegun's `FakeDatetime` type.

https://github.com/ijl/orjson#default
"""
if isinstance(obj, Decimal):
return str(obj)
elif hasattr(obj, "isoformat"):
return obj.isoformat()
raise TypeError

Check warning on line 100 in src/crate/client/http.py

View check run for this annotation

Codecov / codecov/patch

src/crate/client/http.py#L98-L100

Added lines #L98 - L100 were not covered by tests


def json_dumps(obj: t.Any) -> bytes:
"""
Serialize to JSON format, using `orjson`, with additional type support.

https://github.com/ijl/orjson
"""
return orjson.dumps(
obj,
default=cratedb_json_encoder,
option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY),
)


class Server:
Expand Down Expand Up @@ -180,7 +186,7 @@

def _json_from_response(response):
try:
return json.loads(response.data.decode("utf-8"))
return orjson.loads(response.data)
except ValueError as ex:
raise ProgrammingError(
"Invalid server response of content-type '{}':\n{}".format(
Expand Down Expand Up @@ -223,7 +229,7 @@
if response.status == 503:
raise ConnectionError(message)
if response.headers.get("content-type", "").startswith("application/json"):
data = json.loads(response.data.decode("utf-8"))
data = orjson.loads(response.data)
error = data.get("error", {})
error_trace = data.get("error_trace", None)
if "results" in data:
Expand Down Expand Up @@ -323,7 +329,7 @@
kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED


def _create_sql_payload(stmt, args, bulk_args):
def _create_sql_payload(stmt, args, bulk_args) -> bytes:
if not isinstance(stmt, str):
raise ValueError("stmt is not a string")
if args and bulk_args:
Expand All @@ -334,7 +340,7 @@
data["args"] = args
if bulk_args:
data["bulk_args"] = bulk_args
return json.dumps(data, cls=CrateJsonEncoder)
return json_dumps(data)


def _get_socket_opts(
Expand Down
14 changes: 7 additions & 7 deletions tests/client/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
)
from crate.client.http import (
Client,
CrateJsonEncoder,
_get_socket_opts,
_remove_certs_for_non_https,
json_dumps,
)

REQUEST = "crate.client.http.Server.request"
Expand Down Expand Up @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request):
# convert string to dict
# because the order of the keys isn't deterministic
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], [1425108700000])
self.assertEqual(data["args"], ["2015-02-28T07:31:40"])
client.close()

@patch(REQUEST, autospec=True)
Expand All @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request):
day = dt.date(2016, 4, 21)
client.sql("insert into users (dt) values (?)", (day,))
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], [1461196800000])
self.assertEqual(data["args"], ["2016-04-21"])
client.close()

def test_socket_options_contain_keepalive(self):
Expand Down Expand Up @@ -724,10 +724,10 @@ def test_username(self):
class TestCrateJsonEncoder(TestCase):
def test_naive_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123")
result = json.dumps(data, cls=CrateJsonEncoder)
self.assertEqual(result, "1687771440123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000"')

def test_aware_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00")
result = json.dumps(data, cls=CrateJsonEncoder)
self.assertEqual(result, "1687764240123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"')
amotl marked this conversation as resolved.
Show resolved Hide resolved
Loading