diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 467cc1c..1cecc59 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -63,7 +63,7 @@ jobs: run: rustup show - uses: mozilla-actions/sccache-action@v0.0.3 - run: make venv - - run: .venv/bin/python -m pip install polars==0.20.6 # min version + - run: .venv/bin/python -m pip install polars==1.5.0 # min version - run: make install - run: make test diff --git a/Cargo.lock b/Cargo.lock index be8877f..ac1838f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,6 +131,15 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.0.100" @@ -217,6 +226,21 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compact_str" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -438,6 +462,7 @@ checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -464,26 +489,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" -[[package]] -name = "jemalloc-sys" -version = "0.5.4+5.3.0-patched" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "jemallocator" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc" -dependencies = [ - "jemalloc-sys", - "libc", -] - [[package]] name = "jobserver" version = "0.1.31" @@ -812,9 +817,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad002eb9c541b4f7e0c7c759cefe884a0350e15d241231ac4be31c5568c15070" +checksum = "0e248cf2f0069277f8fe80d413cfb9240c7dd1cfa382b5674c1b4afa57222747" dependencies = [ "getrandom", "polars-arrow", @@ -832,9 +837,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d19c6db79cb6a3c55af3b5a3976276edaab64cbf7f69b392617c2af30d7742" +checksum = "2981d5b2f34c84069a39fceca0d36dffeb97db8cadba101e7ea6605c8d42294d" dependencies = [ "ahash", "atoi", @@ -856,6 +861,7 @@ dependencies = [ "parking_lot", "polars-arrow-format", "polars-error", + "polars-schema", "polars-utils", "ryu", "simdutf8", @@ -877,9 +883,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30194a5ff325f61d6fcb62dc215c9210f308fc4fc85a493ef777dbcd938cba24" +checksum = "5a97b2a5c9b880ab7e52553c40a336fdb6e3244bf896b4d4917700defe8085d5" dependencies = [ "bytemuck", "either", @@ -893,9 +899,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ba2a3b736d55b92a12889672d0197dc25ad321ab23eba4168a3b6316a6b6349" +checksum = "d5bc2cadcca904a9dc4d2c2b437c346712806e9a678bf17c7e94ebf622faae76" dependencies = [ "ahash", "bitflags", @@ -912,12 +918,12 @@ dependencies = [ "polars-compute", "polars-error", "polars-row", + "polars-schema", "polars-utils", "rand", "rand_distr", "rayon", "regex", - "smartstring", "thiserror", "version_check", "xxhash-rust", @@ -925,9 +931,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07101d1803ca2046cdb3a8adb1523ddcc879229860f0ac56a853034269dec1e1" +checksum = "56b0a8eb9b1e56a4640de6887d613cb4de73c4e09d491f3b779855d4c3bcb9ba" dependencies = [ "polars-arrow-format", "regex", @@ -937,14 +943,15 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5c69634ddbb0f44186cd1c42d166963fc756f9cc994438e941bc2703ddbbab" +checksum = "34e9c0e8c7ba93aac64051b92dc68eac5a0e9543cf44ca784467db2c035821fe" dependencies = [ "ahash", "bitflags", "once_cell", "polars-arrow", + "polars-compute", "polars-core", "polars-io", "polars-ops", @@ -952,14 +959,13 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", ] [[package]] name = "polars-ffi" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121eb464d69f70284fa675f791e8a8fa5539efebb588ba9193ac90998c9c647c" +checksum = "d6740f858cc3f26445322a3a16aed0a1daabf01be1baef55b73f30d64deb7c93" dependencies = [ "polars-arrow", "polars-core", @@ -967,9 +973,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a48ddf416ae185336c3d7880d2e05b7e55686e3e0da1014e5e7325eff9c7d722" +checksum = "454ebbebe1cb8cb4768adca44b8fc9431abc3c91d5927f6824e73f916bced911" dependencies = [ "ahash", "atoi_simd", @@ -978,6 +984,7 @@ dependencies = [ "chrono-tz 0.8.6", "fast-float", "glob", + "hashbrown", "home", "itoa", "memchr", @@ -989,20 +996,20 @@ dependencies = [ "polars-core", "polars-error", "polars-json", + "polars-schema", "polars-time", "polars-utils", "rayon", "regex", "ryu", "simdutf8", - "smartstring", ] [[package]] name = "polars-json" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0a43388585a922524e8bbaa1ed1391c9c4b0768a644585609afa9a2fd5fc702" +checksum = "4ca086fbbff6e46efbc97032e93f92690c1fc9c662fd5e1f13a42922bd7d3aa4" dependencies = [ "ahash", "chrono", @@ -1022,9 +1029,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a514a85df9e7d501c71c96f094861d0608b05a3f533447b1c0ea9cf714162fcb" +checksum = "7e61c062e833d2376de0a4cf745504449215cbf499cea293cb592e674ffb39ca" dependencies = [ "ahash", "bitflags", @@ -1041,15 +1048,14 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "smartstring", "version_check", ] [[package]] name = "polars-mem-engine" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d057df81b17b4f0ea0e4424ee34f755e6b9ccfba432ecb2fe57dc4da6da2713" +checksum = "c0643812829cc990e1533a5bf48c21a1b3eaa46aabf2967b0f53f99097cbc74c" dependencies = [ "memmap2", "polars-arrow", @@ -1066,9 +1072,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ba44233249b7937491b5d2bdbf14e4ad534c0a65d06548c3bc418fc3e60791" +checksum = "5ac14a136d87bea798f3db51d5987556deb2293da34bfc8b105ebffa05f6e810" dependencies = [ "ahash", "argminmax", @@ -1086,24 +1092,25 @@ dependencies = [ "polars-compute", "polars-core", "polars-error", + "polars-schema", "polars-utils", "rayon", "regex", - "smartstring", "unicode-reverse", "version_check", ] [[package]] name = "polars-parquet" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2993265079ffa07dd16277189444424f8d787b00b01c6f6e001f58bab543ce" +checksum = "491f5af321169259d5b1294c9fe8ed89faaeac34b4dec4abcedc0d1b3d11013a" dependencies = [ "ahash", "base64", "bytemuck", "ethnum", + "hashbrown", "num-traits", "parquet-format-safe", "polars-arrow", @@ -1116,9 +1123,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ccba94c4fa9fded0f41730f7649574c72d6d938a840731c7e4eea4e7ed5cecf" +checksum = "29215c31f599295cc0f803c42fc812cc518db6d5ed4d6c7cc03daf3976a0add5" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1135,20 +1142,20 @@ dependencies = [ "polars-row", "polars-utils", "rayon", - "smartstring", "uuid", "version_check", ] [[package]] name = "polars-plan" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6b29cc53d6c086c09b11050b01c25c28f6a91339036ba1fb1250fcf0d89e74" +checksum = "e3f728df4bc643492a2057a0a125c7e550cbcfe35b391444653ad294be9ab190" dependencies = [ "ahash", "bitflags", "bytemuck", + "bytes", "chrono", "chrono-tz 0.8.6", "either", @@ -1165,16 +1172,15 @@ dependencies = [ "rayon", "recursive", "regex", - "smartstring", "strum_macros", "version_check", ] [[package]] name = "polars-row" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e11f43f48466c4b1caa6dc61c381dc10c2d67b87fcb74bc996e21c4f7b0a311" +checksum = "4eb931f0929ca7498b3ed5056357d2d364cad42cce95383a7e3983dbceb4bed1" dependencies = [ "bytemuck", "polars-arrow", @@ -1182,11 +1188,23 @@ dependencies = [ "polars-utils", ] +[[package]] +name = "polars-schema" +version = "0.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7e1234b942d3244024ecbac9c7f5a48a52a815f8ca4b9d075fbba16afb1a39" +dependencies = [ + "indexmap", + "polars-error", + "polars-utils", + "version_check", +] + [[package]] name = "polars-sql" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e9338806e7254618eb819cc632c34b75b71d462222a913f9c1035ed81911ddc" +checksum = "ce52bfd2ef1e2e18ac26d7d7ea3f9132b199cff06d975156703fa5badcfae187" dependencies = [ "hex", "once_cell", @@ -1197,6 +1215,7 @@ dependencies = [ "polars-ops", "polars-plan", "polars-time", + "polars-utils", "rand", "serde", "serde_json", @@ -1205,9 +1224,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a601ab9a62e733b8b560b37642321cb1933faa194864739f6a59d6dfc4d686" +checksum = "9925ab75e1d859ae2283ca09d7683198b0b9ff5afecd03f2c9180f3e36e35056" dependencies = [ "atoi", "bytemuck", @@ -1221,27 +1240,27 @@ dependencies = [ "polars-ops", "polars-utils", "regex", - "smartstring", ] [[package]] name = "polars-utils" -version = "0.42.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19dd73207bd15efb0ae5c9c3ece3227927ed6a16ad63578acec342378e6bdcb4" +checksum = "b44846e1fc6ae1dfdc7f65a37af7d270d0a6a17a58fff76716561f5b887a8ad7" dependencies = [ "ahash", "bytemuck", "bytes", + "compact_str", "hashbrown", "indexmap", + "libc", "memmap2", "num-traits", "once_cell", "polars-error", "raw-cpuid", "rayon", - "smartstring", "stacker", "sysinfo", "version_check", @@ -1249,11 +1268,10 @@ dependencies = [ [[package]] name = "polars_xdt" -version = "0.15.2" +version = "0.15.3" dependencies = [ "chrono", "chrono-tz 0.9.0", - "jemallocator", "polars", "polars-arrow", "polars-ops", @@ -1363,9 +1381,9 @@ dependencies = [ [[package]] name = "pyo3-polars" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffa7f0af4fb0ec561a736c75f4090938dc4c6c01ea235fc70b97fc5512d5688" +checksum = "55eebe3e4ead5efa995ede4017c2706f200fd0f02c4c1f48e21d63678fcac8d0" dependencies = [ "libc", "once_cell", @@ -1382,9 +1400,9 @@ dependencies = [ [[package]] name = "pyo3-polars-derive" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ec018e2563ba4e42b7311c84b10da132c10a56bf91bd2cddd45ffb985becb9c" +checksum = "6f6de264b871e9a1da6d4a0bc98801601defd5eeb25d4869463f3f231edde5c0" dependencies = [ "polars-core", "polars-ffi", @@ -1648,17 +1666,6 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "smartstring" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" -dependencies = [ - "autocfg", - "static_assertions", - "version_check", -] - [[package]] name = "sqlparser" version = "0.49.0" diff --git a/Cargo.toml b/Cargo.toml index e391d0f..c757442 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,13 +10,11 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.22.2", features = ["extension-module", "abi3-py38"] } -pyo3-polars = { version = "0.16.0", features = ["derive"] } +pyo3-polars = { version = "0.17.0", features = ["derive"] } serde = { version = "1", features = ["derive"] } chrono = { version = "0.4.38", default-features = false, features = ["std", "unstable-locales"] } chrono-tz = "0.9.0" -polars = { version = "0.42.0", features = ["strings", "timezones"]} -polars-ops = { version = "0.42.0", default-features = false } -polars-arrow = { version = "0.42.0", default-features = false } +polars = { version = "0.43.1", features = ["strings", "timezones"]} +polars-ops = { version = "0.43.1", default-features = false } +polars-arrow = { version = "0.43.1", default-features = false } -[target.'cfg(target_os = "linux")'.dependencies] -jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] } diff --git a/polars_xdt/functions.py b/polars_xdt/functions.py index 6efcf04..bb22aea 100644 --- a/polars_xdt/functions.py +++ b/polars_xdt/functions.py @@ -5,8 +5,9 @@ from typing import TYPE_CHECKING, Literal, Sequence import polars as pl +from polars.plugins import register_plugin_function -from polars_xdt.utils import parse_into_expr, parse_version, register_plugin +from polars_xdt.utils import parse_into_expr if sys.version_info >= (3, 10): from typing import TypeAlias @@ -16,27 +17,20 @@ if TYPE_CHECKING: from datetime import date + from polars import Expr + from polars_xdt.typing import IntoExpr + Ambiguous: TypeAlias = Literal["earliest", "latest", "raise", "null"] + RollStrategy: TypeAlias = Literal["raise", "forward", "backward"] -if parse_version(pl.__version__) < parse_version("0.20.16"): - from polars.utils.udfs import ( # type: ignore[import-not-found] - _get_shared_lib_location, - ) - - lib: str | Path = _get_shared_lib_location(__file__) -else: - lib = Path(__file__).parent +PLUGIN_PATH = Path(__file__).parent mapping = {"Mon": 1, "Tue": 2, "Wed": 3, "Thu": 4, "Fri": 5, "Sat": 6, "Sun": 7} reverse_mapping = {value: key for key, value in mapping.items()} -if TYPE_CHECKING: - from polars import Expr - from polars.type_aliases import Ambiguous - def get_weekmask(weekend: Sequence[str]) -> list[bool]: if weekend == ("Sat", "Sun"): @@ -178,9 +172,9 @@ def from_local_datetime( """ expr = parse_into_expr(expr) from_tz = parse_into_expr(from_tz, str_as_lit=True) - return register_plugin( - lib=lib, - symbol="from_local_datetime", + return register_plugin_function( + plugin_path=PLUGIN_PATH, + function_name="from_local_datetime", is_elementwise=True, args=[expr, from_tz], kwargs={ @@ -245,9 +239,9 @@ def to_local_datetime( """ expr = parse_into_expr(expr) time_zone = parse_into_expr(time_zone, str_as_lit=True) - return register_plugin( - lib=lib, - symbol="to_local_datetime", + return register_plugin_function( + plugin_path=PLUGIN_PATH, + function_name="to_local_datetime", is_elementwise=True, args=[expr, time_zone], ) @@ -303,9 +297,9 @@ def format_localized( """ expr = parse_into_expr(expr) - return register_plugin( - lib=lib, - symbol="format_localized", + return register_plugin_function( + plugin_path=PLUGIN_PATH, + function_name="format_localized", is_elementwise=True, args=[expr], kwargs={"format": format, "locale": locale}, @@ -342,9 +336,9 @@ def to_julian_date(expr: str | pl.Expr) -> pl.Expr: """ expr = parse_into_expr(expr) - return register_plugin( - lib=lib, - symbol="to_julian_date", + return register_plugin_function( + plugin_path=PLUGIN_PATH, + function_name="to_julian_date", is_elementwise=True, args=[expr], ) @@ -573,9 +567,9 @@ def month_delta( start_dates = parse_into_expr(start_dates) end_dates = parse_into_expr(end_dates) - return register_plugin( - lib=lib, - symbol="month_delta", + return register_plugin_function( + plugin_path=PLUGIN_PATH, + function_name="month_delta", is_elementwise=True, args=[start_dates, end_dates], ) @@ -670,9 +664,9 @@ def arg_previous_greater(expr: IntoExpr) -> pl.Expr: """ expr = parse_into_expr(expr) - return register_plugin( - lib=lib, - symbol="arg_previous_greater", + return register_plugin_function( + plugin_path=PLUGIN_PATH, + function_name="arg_previous_greater", is_elementwise=False, args=[expr], ) diff --git a/polars_xdt/ranges.py b/polars_xdt/ranges.py index c284fa4..5b683ff 100644 --- a/polars_xdt/ranges.py +++ b/polars_xdt/ranges.py @@ -1,16 +1,25 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, Literal, Sequence, overload +from typing import TYPE_CHECKING, Literal, Sequence, Union, overload import polars as pl mapping = {"Mon": 1, "Tue": 2, "Wed": 3, "Thu": 4, "Fri": 5, "Sat": 6, "Sun": 7} if TYPE_CHECKING: + import sys + + if sys.version_info >= (3, 10): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias from datetime import date, datetime, timedelta - from polars.type_aliases import ClosedInterval, IntoExprColumn + ClosedInterval: TypeAlias = Literal[ + "left", "right", "both", "none" + ] # ClosedWindow + IntoExprColumn: TypeAlias = Union["pl.Expr", "pl.Series", str] @overload diff --git a/polars_xdt/utils.py b/polars_xdt/utils.py index f330bd2..f49b04e 100644 --- a/polars_xdt/utils.py +++ b/polars_xdt/utils.py @@ -1,14 +1,11 @@ from __future__ import annotations -import re -from typing import TYPE_CHECKING, Any, Sequence +from typing import TYPE_CHECKING import polars as pl if TYPE_CHECKING: - from pathlib import Path - - from polars.type_aliases import IntoExpr, PolarsDataType + from polars_xdt.typing import IntoExpr, PolarsDataType def parse_into_expr( @@ -50,39 +47,3 @@ def parse_into_expr( expr = pl.lit(expr, dtype=dtype) return expr - - -def register_plugin( - *, - lib: str | Path, - symbol: str, - is_elementwise: bool, - kwargs: dict[str, Any] | None = None, - args: list[IntoExpr], -) -> pl.Expr: - if parse_version(pl.__version__) < parse_version("0.20.16"): - assert isinstance(args[0], pl.Expr) - assert isinstance(lib, str) - return args[0].register_plugin( - lib=lib, - symbol=symbol, - args=args[1:], - kwargs=kwargs, - is_elementwise=is_elementwise, - ) - from polars.plugins import register_plugin_function - - return register_plugin_function( - args=args, - plugin_path=lib, - function_name=symbol, - kwargs=kwargs, - is_elementwise=is_elementwise, - ) - - -def parse_version(version: Sequence[str | int]) -> tuple[int, ...]: - # Simple version parser; split into a tuple of ints for comparison. - if isinstance(version, str): - version = version.split(".") - return tuple(int(re.sub(r"\D", "", str(v))) for v in version) diff --git a/pyproject.toml b/pyproject.toml index 5fe715a..f716732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=1.0,<2.0", "polars>=0.20.6"] +requires = ["maturin>=1.0,<2.0", "polars>=1.5.0"] build-backend = "maturin" [project] diff --git a/src/expressions.rs b/src/expressions.rs index aa9a5fc..5228e94 100644 --- a/src/expressions.rs +++ b/src/expressions.rs @@ -4,12 +4,9 @@ use crate::format_localized::*; use crate::month_delta::*; use crate::timezone::*; use crate::to_julian::*; -use crate::utc_offsets::*; -use chrono_tz::Tz; use polars::prelude::*; use pyo3_polars::derive::polars_expr; use serde::Deserialize; -use std::str::FromStr; #[derive(Deserialize)] pub struct FromLocalDatetimeKwargs { @@ -22,13 +19,6 @@ pub struct FormatLocalizedKwargs { locale: String, } -fn duration_ms(input_fields: &[Field]) -> PolarsResult { - Ok(Field::new( - input_fields[0].name(), - DataType::Duration(TimeUnit::Milliseconds), - )) -} - pub fn to_local_datetime_output(input_fields: &[Field]) -> PolarsResult { let field = input_fields[0].clone(); let dtype = match field.dtype { @@ -37,7 +27,7 @@ pub fn to_local_datetime_output(input_fields: &[Field]) -> PolarsResult { "dtype '{}' not supported", field.dtype ), }; - Ok(Field::new(&field.name, dtype)) + Ok(Field::new(field.name, dtype)) } pub fn from_local_datetime_output( @@ -46,12 +36,14 @@ pub fn from_local_datetime_output( ) -> PolarsResult { let field = input_fields[0].clone(); let dtype = match field.dtype { - DataType::Datetime(unit, _) => DataType::Datetime(unit, Some(kwargs.to_tz)), + DataType::Datetime(unit, _) => { + DataType::Datetime(unit, Some(PlSmallStr::from_str(&kwargs.to_tz))) + } _ => polars_bail!(InvalidOperation: "dtype '{}' not supported", field.dtype ), }; - Ok(Field::new(&field.name, dtype)) + Ok(Field::new(field.name, dtype)) } #[polars_expr(output_type=Int32)] @@ -91,30 +83,6 @@ fn to_julian_date(inputs: &[Series]) -> PolarsResult { impl_to_julian_date(s) } -#[polars_expr(output_type_func=duration_ms)] -fn base_utc_offset(inputs: &[Series]) -> PolarsResult { - let s = &inputs[0]; - match s.dtype() { - DataType::Datetime(time_unit, Some(time_zone)) => { - let time_zone = Tz::from_str(time_zone).unwrap(); - Ok(impl_base_utc_offset(s.datetime()?, time_unit, &time_zone).into_series()) - } - _ => polars_bail!(InvalidOperation: "base_utc_offset only works on Datetime type."), - } -} - -#[polars_expr(output_type_func=duration_ms)] -fn dst_offset(inputs: &[Series]) -> PolarsResult { - let s = &inputs[0]; - match s.dtype() { - DataType::Datetime(time_unit, Some(time_zone)) => { - let time_zone = Tz::from_str(time_zone).unwrap(); - Ok(impl_dst_offset(s.datetime()?, time_unit, &time_zone).into_series()) - } - _ => polars_bail!(InvalidOperation: "base_utc_offset only works on Datetime type."), - } -} - // todo: can we make this bigidx-dependent? #[polars_expr(output_type=UInt32)] fn arg_previous_greater(inputs: &[Series]) -> PolarsResult { diff --git a/src/format_localized.rs b/src/format_localized.rs index f0fb8a8..ae1c9a3 100644 --- a/src/format_localized.rs +++ b/src/format_localized.rs @@ -28,9 +28,8 @@ pub(crate) fn impl_format_localized( let locale = chrono::Locale::try_from(locale).map_err( |_| polars_err!(ComputeError: format!("given locale {} could not be parsed", locale)), )?; - let name = s.name(); - let mut ca: StringChunked = match s.dtype() { + let ca: StringChunked = match s.dtype() { DataType::Date => { let ca = s.date()?; ca.apply_kernel_cast(&|arr| { @@ -87,6 +86,5 @@ pub(crate) fn impl_format_localized( } _ => unreachable!(), }; - ca.rename(name); Ok(ca.into_series()) } diff --git a/src/lib.rs b/src/lib.rs index faa0c37..c66ccd6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,20 +4,16 @@ mod format_localized; mod month_delta; mod timezone; mod to_julian; -mod utc_offsets; use pyo3::types::{PyModule, PyModuleMethods}; use pyo3::{pymodule, Bound, PyResult}; - -#[cfg(target_os = "linux")] -use jemallocator::Jemalloc; - -#[global_allocator] -#[cfg(target_os = "linux")] -static ALLOC: Jemalloc = Jemalloc; +use pyo3_polars::PolarsAllocator; #[pymodule] fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; Ok(()) } + +#[global_allocator] +static ALLOC: PolarsAllocator = PolarsAllocator::new(); diff --git a/src/timezone.rs b/src/timezone.rs index c0e0c66..4186b9a 100644 --- a/src/timezone.rs +++ b/src/timezone.rs @@ -139,6 +139,6 @@ pub fn elementwise_from_local_datetime( } }), }; - let out = out?.into_datetime(datetime.time_unit(), Some(out_tz.to_string())); + let out = out?.into_datetime(datetime.time_unit(), Some(PlSmallStr::from_str(out_tz))); Ok(out) } diff --git a/src/to_julian.rs b/src/to_julian.rs index 91b5a7c..ba5321f 100644 --- a/src/to_julian.rs +++ b/src/to_julian.rs @@ -45,7 +45,7 @@ pub(crate) fn impl_to_julian_date(s: &Series) -> PolarsResult { }) .collect_trusted() }); - Ok(Float64Chunked::from_chunk_iter(ca.name(), chunks).into_series()) + Ok(Float64Chunked::from_chunk_iter(PlSmallStr::EMPTY, chunks).into_series()) } DataType::Datetime(time_unit, time_zone) => { if !(time_zone.is_none() || time_zone.as_deref() == Some("UTC")) { @@ -81,7 +81,7 @@ pub(crate) fn impl_to_julian_date(s: &Series) -> PolarsResult { }) .collect_trusted() }); - Ok(Float64Chunked::from_chunk_iter(ca.name(), chunks).into_series()) + Ok(Float64Chunked::from_chunk_iter(PlSmallStr::EMPTY, chunks).into_series()) } _ => { polars_bail!(InvalidOperation: "polars_xdt to_julian currently only works on Date type. \ diff --git a/src/utc_offsets.rs b/src/utc_offsets.rs deleted file mode 100644 index 59cf1fc..0000000 --- a/src/utc_offsets.rs +++ /dev/null @@ -1,43 +0,0 @@ -use chrono::TimeZone; -use chrono_tz::OffsetComponents; -use chrono_tz::Tz; -use polars::prelude::*; -use polars_arrow::temporal_conversions::{ - timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime, -}; - -pub(crate) fn impl_base_utc_offset( - ca: &DatetimeChunked, - time_unit: &TimeUnit, - time_zone: &Tz, -) -> DurationChunked { - let timestamp_to_datetime = match time_unit { - TimeUnit::Nanoseconds => timestamp_ns_to_datetime, - TimeUnit::Microseconds => timestamp_us_to_datetime, - TimeUnit::Milliseconds => timestamp_ms_to_datetime, - }; - ca.0.apply_values(|t| { - let ndt = timestamp_to_datetime(t); - let dt = time_zone.from_utc_datetime(&ndt); - dt.offset().base_utc_offset().num_milliseconds() - }) - .into_duration(TimeUnit::Milliseconds) -} - -pub(crate) fn impl_dst_offset( - ca: &DatetimeChunked, - time_unit: &TimeUnit, - time_zone: &Tz, -) -> DurationChunked { - let timestamp_to_datetime = match time_unit { - TimeUnit::Nanoseconds => timestamp_ns_to_datetime, - TimeUnit::Microseconds => timestamp_us_to_datetime, - TimeUnit::Milliseconds => timestamp_ms_to_datetime, - }; - ca.0.apply_values(|t| { - let ndt = timestamp_to_datetime(t); - let dt = time_zone.from_utc_datetime(&ndt); - dt.offset().dst_offset().num_milliseconds() - }) - .into_duration(TimeUnit::Milliseconds) -} diff --git a/tests/test_format_localized.py b/tests/test_format_localized.py index ead01f2..530b074 100644 --- a/tests/test_format_localized.py +++ b/tests/test_format_localized.py @@ -1,8 +1,8 @@ from datetime import date, datetime +from typing import Any import polars as pl import pytest -from polars.type_aliases import TimeUnit import polars_xdt as xdt @@ -28,7 +28,7 @@ ], ) def test_format_localized_datetime( - time_unit: TimeUnit, expected_us: str, expected_ukr: str + time_unit: Any, expected_us: str, expected_ukr: str ) -> None: df = pl.DataFrame( { @@ -82,7 +82,7 @@ def test_tz_aware() -> None: @pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) -def test_pre_epoch(time_unit: TimeUnit) -> None: +def test_pre_epoch(time_unit: Any) -> None: df = pl.DataFrame( { "date_col": [datetime(1960, 1, 1, 0, 0, 0, 1)], diff --git a/tests/test_timezone.py b/tests/test_timezone.py index d1c4e78..4d8afc0 100644 --- a/tests/test_timezone.py +++ b/tests/test_timezone.py @@ -40,11 +40,11 @@ def test_convert_tz_to_local_datetime( expected = df.with_columns(pl.lit(local_date).alias("local_dt")) - result = df.with_columns( + result = df.lazy().with_columns( xdt.to_local_datetime("date", pl.col("timezone")).alias("local_dt") ) - - assert_frame_equal(result, expected) + assert result.collect_schema() == expected.schema + assert_frame_equal(result.collect(), expected) @pytest.mark.parametrize( @@ -76,13 +76,13 @@ def test_convert_tz_from_local_datetime( pl.lit(date).alias("date").dt.convert_time_zone("Europe/London") ) - result = df.with_columns( + result = df.lazy().with_columns( xdt.from_local_datetime( "local_date", pl.col("timezone"), "Europe/London" ).alias("date") ) - - assert_frame_equal(result, expected) + assert result.collect_schema() == expected.schema + assert_frame_equal(result.collect(), expected) def test_convert_tz_from_local_datetime_literal() -> None: @@ -118,20 +118,14 @@ def test_convert_tz_to_local_datetime_literal() -> None: assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Resolving the schema of a LazyFrame") def test_convert_tz_to_local_datetime_schema() -> None: df = pl.LazyFrame({"date": [datetime(2020, 10, 15, tzinfo=timezone.utc)]}) result = df.with_columns( xdt.from_local_datetime("date", "America/New_York", "Asia/Kathmandu") - ).schema["date"] - assert result == pl.Datetime("us", "Asia/Kathmandu") - result = ( - df.with_columns( - xdt.from_local_datetime( - "date", "America/New_York", "Asia/Kathmandu" - ) - ) - .collect() - .schema["date"] ) - assert result == pl.Datetime("us", "Asia/Kathmandu") + assert result.collect_schema()["date"] == pl.Datetime( + "us", "Asia/Kathmandu" + ) + assert result.collect().schema["date"] == pl.Datetime( + "us", "Asia/Kathmandu" + )