Skip to content

Commit

Permalink
fix: replace pydantic with msgspec
Browse files Browse the repository at this point in the history
This ends up giving us a 1.5x speed improvement

```
❯ hyperfine --warmup 1 ".\old\Scripts\python.exe test_nzb_old.py" ".\.venv\Scripts\python.exe test_nzb.py"
Benchmark 1: .\old\Scripts\python.exe test_nzb_old.py
  Time (mean ± σ):      6.042 s ±  0.295 s    [User: 5.752 s, System: 0.259 s]
  Range (min … max):    5.754 s …  6.413 s    10 runs

Benchmark 2: .\.venv\Scripts\python.exe test_nzb.py
  Time (mean ± σ):      3.902 s ±  0.019 s    [User: 3.624 s, System: 0.222 s]
  Range (min … max):    3.871 s …  3.930 s    10 runs

Summary
  .\.venv\Scripts\python.exe test_nzb.py ran
    1.55 ± 0.08 times faster than .\old\Scripts\python.exe test_nzb_old.py
```
  • Loading branch information
Ravencentric committed Feb 3, 2025
1 parent 0846a73 commit d17e8b1
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 145 deletions.
22 changes: 9 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
"Typing :: Typed",
]
dependencies = ["natsort>=8.4.0", "pydantic>=2.9.2", "xmltodict>=0.13.0"]
dependencies = ["msgspec>=0.19.0", "natsort>=8.4.0", "xmltodict>=0.14.2"]

[project.urls]
Homepage = "https://nzb.ravencentric.cc"
Expand All @@ -24,20 +24,16 @@ Repository = "https://github.com/Ravencentric/nzb"

[dependency-groups]
docs = [
"mkdocs-autorefs>=1.3.0",
"mkdocs-material>=9.5.50",
"mkdocstrings[python]>=0.27.0",
]
test = [
"coverage[toml]>=7.6.10",
"pytest>=8.3.4",
"rnzb>=0.3.1",
"mkdocs-autorefs>=1.3.0",
"mkdocs-material>=9.5.50",
"mkdocstrings[python]>=0.27.0",
]
test = ["coverage[toml]>=7.6.10", "pytest>=8.3.4", "rnzb>=0.3.1"]
lint = [
"mypy>=1.14.1",
"ruff>=0.9.3",
"types-xmltodict>=0.14.0.20241009",
"typing-extensions>=4.12.2",
"mypy>=1.14.1",
"ruff>=0.9.3",
"types-xmltodict>=0.14.0.20241009",
"typing-extensions>=4.12.2",
]
dev = [
{ include-group = "docs" },
Expand Down
15 changes: 10 additions & 5 deletions src/nzb/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from pathlib import Path
from typing import TYPE_CHECKING, Literal, overload

import msgspec
import xmltodict
from natsort import natsorted

from nzb._models import File, Meta, ParentModel
from nzb._models import File, Meta
from nzb._parser import parse_doctype, parse_files, parse_metadata
from nzb._utils import construct_meta, nzb_to_dict, read_nzb_file, realpath, remove_meta_fields, sort_meta

Expand All @@ -20,7 +21,7 @@
from nzb._types import StrPath


class Nzb(ParentModel):
class Nzb(msgspec.Struct, frozen=True, eq=True, kw_only=True, cache_hash=True, dict=True):
"""
Represents a complete NZB file.
Expand Down Expand Up @@ -147,7 +148,7 @@ def from_json(cls, json: str, /) -> Nzb:
Raised if the NZB is invalid.
"""
return cls.model_validate_json(json)
return msgspec.json.decode(json, type=cls)

def to_json(self, *, pretty: bool = False) -> str:
"""
Expand All @@ -164,8 +165,12 @@ def to_json(self, *, pretty: bool = False) -> str:
JSON string representing the NZB.
"""
indent = 2 if pretty else None
return self.model_dump_json(indent=indent)
jsonified = msgspec.json.encode(self).decode()

if pretty:
return msgspec.json.format(jsonified)

return jsonified

@cached_property
def size(self) -> int:
Expand Down
16 changes: 6 additions & 10 deletions src/nzb/_models.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
from __future__ import annotations

import re
from datetime import datetime
from functools import cached_property
from os.path import splitext

from pydantic import BaseModel, ConfigDict
from msgspec import Struct

from nzb._types import UTCDateTime
from nzb._utils import name_is_par2, name_is_rar, stem_is_obfuscated


class ParentModel(BaseModel):
model_config = ConfigDict(frozen=True, str_strip_whitespace=True)


class Meta(ParentModel):
class Meta(Struct, frozen=True, eq=True, kw_only=True, cache_hash=True, dict=True):
"""Optional creator-definable metadata for the contents of the NZB."""

title: str | None = None
Expand All @@ -30,7 +26,7 @@ class Meta(ParentModel):
"""Category."""


class Segment(ParentModel):
class Segment(Struct, frozen=True, eq=True, kw_only=True, cache_hash=True, dict=True):
"""One part segment of a file."""

size: int
Expand All @@ -41,13 +37,13 @@ class Segment(ParentModel):
"""Message ID of the segment."""


class File(ParentModel):
class File(Struct, frozen=True, eq=True, kw_only=True, cache_hash=True, dict=True):
"""Represents a complete file, consisting of segments that make up a file."""

poster: str
"""The poster of the file."""

posted_at: UTCDateTime
posted_at: datetime
"""The date and time when the file was posted, in UTC."""

subject: str
Expand Down
35 changes: 21 additions & 14 deletions src/nzb/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
from typing import Any, TypeAlias, cast

import msgspec
from natsort import natsorted

from nzb._exceptions import InvalidNzbError
Expand Down Expand Up @@ -82,8 +83,8 @@ def parse_metadata(nzb: dict[str, Any]) -> Meta:

return Meta(
title=title,
passwords=passwords, # type: ignore[arg-type]
tags=tags, # type: ignore[arg-type]
passwords=tuple(passwords),
tags=tuple(tags),
category=category,
)

Expand Down Expand Up @@ -124,16 +125,16 @@ def parse_segments(segmentdict: dict[str, list[dict[str, str]] | dict[str, str]

for segment in segments:
try:
size = segment["@bytes"]
number = segment["@number"]
size = int(segment["@bytes"])
number = int(segment["@number"])
message_id = segment["#text"]
except KeyError:
except (KeyError, ValueError):
# This segment is broken
# We do not error here because a few missing
# segments don't invalidate the nzb.
continue

segmentlist.append(Segment(size=size, number=number, message_id=message_id)) # type: ignore[arg-type]
segmentlist.append(Segment(size=size, number=number, message_id=message_id))

return tuple(natsorted(segmentlist, key=lambda seg: seg.number))

Expand Down Expand Up @@ -197,15 +198,21 @@ def parse_files(nzb: dict[str, Any]) -> tuple[File, ...]:
else:
grouplist.extend(groups)

filelist.append(
File(
poster=file.get("@poster"),
posted_at=file.get("@date"),
subject=file.get("@subject"),
groups=natsorted(grouplist), # type: ignore[arg-type]
segments=parse_segments(file.get("segments")),
try:
_file = msgspec.convert(
{
"poster": file.get("@poster"),
"posted_at": file.get("@date"),
"subject": file.get("@subject"),
"groups": natsorted(grouplist),
"segments": parse_segments(file.get("segments")),
},
type=File,
strict=False,
)
)
except msgspec.ValidationError as e:
raise InvalidNzbError(str(e)) from None
filelist.append(_file)

if not filelist: # pragma: no cover
# I cannot think of any case where this will ever be raised
Expand Down
8 changes: 1 addition & 7 deletions src/nzb/_types.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
from __future__ import annotations

from datetime import datetime, timezone
from os import PathLike
from typing import Annotated, TypeAlias

from pydantic import AfterValidator
from typing import TypeAlias

StrPath: TypeAlias = str | PathLike[str]
"""String or pathlib.Path"""

UTCDateTime = Annotated[datetime, AfterValidator(lambda dt: dt.astimezone(timezone.utc))]
"""datetime that's always in UTC."""
28 changes: 28 additions & 0 deletions tests/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import textwrap
from pathlib import Path

import pytest
Expand Down Expand Up @@ -105,3 +106,30 @@ def test_read_nzb_file(tmp_path: Path) -> None:

with pytest.raises(InvalidNzbError, match="^Failed to read NZB file"):
read_nzb_file(tmp_file)


def test_nzb_with_missing_file_attributes() -> None:
nzb = textwrap.dedent("""
<?xml version="1.0" encoding="iso-8859-1" ?>
<!DOCTYPE nzb PUBLIC "-//newzBin//DTD NZB 1.1//EN" "http://www.newzbin.com/DTD/nzb/nzb-1.1.dtd">
<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
<head>
<meta type="title">Your File!</meta>
<meta type="password">secret</meta>
<meta type="tag">HD</meta>
<meta type="category">TV</meta>
</head>
<file poster="Joe Bloggs &lt;[email protected]&gt;" date="not a date" subject="Here's your file! abc-mr2a.r01 (1/2)">
<groups>
<group>alt.binaries.newzbin</group>
<group>alt.binaries.mojo</group>
</groups>
<segments>
<segment bytes="102394" number="1">[email protected]</segment>
<segment bytes="4501" number="2">[email protected]</segment>
</segments>
</file>
</nzb>
""").strip()
with pytest.raises(InvalidNzbError, match=r"Invalid RFC3339 encoded datetime - at `\$\.posted_at`"):
Nzb.from_str(nzb)
2 changes: 2 additions & 0 deletions tests/test_nzbparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,5 @@ def test_json_roundtrip(nzb_file: str) -> None:

assert original == deserialized
assert original_rnzb == deserialized_rnzb

assert deserialized.to_json(pretty=True) == deserialized_rnzb.to_json(pretty=True)
Loading

0 comments on commit d17e8b1

Please sign in to comment.