Skip to content

Commit

Permalink
perf: make .dt.weekday 20x faster
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Aug 1, 2024
1 parent 5134051 commit ec5cb08
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 25 deletions.
4 changes: 4 additions & 0 deletions crates/polars-arrow/src/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ pub const MICROSECONDS: i64 = 1_000_000;
pub const NANOSECONDS: i64 = 1_000_000_000;
/// Number of milliseconds in a day
pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
/// Number of microseconds in a day
pub const MICROSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MICROSECONDS;
/// Number of nanoseconds in a day
pub const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS;
/// Number of days between 0001-01-01 and 1970-01-01
pub const EPOCH_DAYS_FROM_CE: i32 = 719_163;

Expand Down
7 changes: 0 additions & 7 deletions crates/polars-time/src/chunkedarray/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ pub trait DateMethods: AsDate {
ca.apply_kernel_cast::<Int8Type>(&date_to_month)
}

/// Extract ISO weekday from underlying NaiveDate representation.
/// Returns the weekday number where monday = 1 and sunday = 7
fn weekday(&self) -> Int8Chunked {
let ca = self.as_date();
ca.apply_kernel_cast::<Int8Type>(&date_to_iso_weekday)
}

/// Returns the ISO week number starting from 1.
/// The return value ranges from 1 to 53. (The last week of year differs by years.)
fn week(&self) -> Int8Chunked {
Expand Down
16 changes: 0 additions & 16 deletions crates/polars-time/src/chunkedarray/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@ use super::super::windows::calendar::*;
use super::*;

trait PolarsIso {
fn p_weekday(&self) -> i8;
fn week(&self) -> i8;
fn iso_year(&self) -> i32;
}

impl PolarsIso for NaiveDateTime {
fn p_weekday(&self) -> i8 {
self.weekday().number_from_monday().try_into().unwrap()
}
fn week(&self) -> i8 {
self.iso_week().week().try_into().unwrap()
}
Expand All @@ -32,9 +28,6 @@ impl PolarsIso for NaiveDateTime {
}

impl PolarsIso for NaiveDate {
fn p_weekday(&self) -> i8 {
self.weekday().number_from_monday().try_into().unwrap()
}
fn week(&self) -> i8 {
self.iso_week().week().try_into().unwrap()
}
Expand Down Expand Up @@ -103,15 +96,6 @@ to_temporal_unit!(
ArrowDataType::Int32
);
#[cfg(feature = "dtype-date")]
to_temporal_unit!(
date_to_iso_weekday,
p_weekday,
date32_to_datetime_opt,
i32,
i8,
ArrowDataType::Int8
);
#[cfg(feature = "dtype-date")]
to_temporal_unit!(
date_to_year,
year,
Expand Down
29 changes: 27 additions & 2 deletions crates/polars-time/src/series/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::ops::{Deref, Div};

use arrow::temporal_conversions::{MICROSECONDS_IN_DAY, MILLISECONDS_IN_DAY, NANOSECONDS_IN_DAY};
use polars_core::prelude::arity::unary_elementwise_values;
use polars_core::prelude::*;

use crate::chunkedarray::*;
Expand Down Expand Up @@ -86,9 +88,32 @@ pub trait TemporalMethods: AsSeries {
let s = self.as_series();
match s.dtype() {
#[cfg(feature = "dtype-date")]
DataType::Date => s.date().map(|ca| ca.weekday()),
DataType::Date => s.date().map(|ca| {
// Closed formula to find weekday, no need to go via Chrono.
// The 4 comes from the fact that 1970-01-01 was a Thursday.
// We do an extra `+ 7` then `% 7` to ensure the result is non-negative.
unary_elementwise_values(ca, |t| (((t - 4) % 7 + 7) % 7 + 1) as i8)
}),
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(_, _) => s.datetime().map(|ca| ca.weekday()),
DataType::Datetime(time_unit, time_zone) => s.datetime().map(|ca| {
match time_zone.as_deref() {
Some("UTC") | None => {
// fastpath!
// Same idea as above, but we need to subtract 1 for dates
// before 1970-01-01 with non-zero sub-daily components.
let divisor = match time_unit {
TimeUnit::Milliseconds => MILLISECONDS_IN_DAY,
TimeUnit::Microseconds => MICROSECONDS_IN_DAY,
TimeUnit::Nanoseconds => NANOSECONDS_IN_DAY,
};
unary_elementwise_values(ca, |t| {
let t = t / divisor - ((t < 0 && t % divisor != 0) as i64);
(((t - 4) % 7 + 7) % 7 + 1) as i8
})
},
_ => ca.weekday(),
}
}),
dt => polars_bail!(opq = weekday, dt),
}
}
Expand Down
29 changes: 29 additions & 0 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
from datetime import date, datetime, time, timedelta, timezone
from typing import TYPE_CHECKING, Any, cast

import hypothesis.strategies as st
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest
from hypothesis import given

import polars as pl
from polars.datatypes import DTYPE_TEMPORAL_UNITS
Expand Down Expand Up @@ -2280,3 +2282,30 @@ def test_misc_precision_any_value_conversion(time_zone: Any) -> None:
def test_pytime_conversion(tm: time) -> None:
s = pl.Series("tm", [tm])
assert s.to_list() == [tm]


@given(
value=st.datetimes(min_value=datetime(1800, 1, 1), max_value=datetime(2100, 1, 1)),
time_zone=st.sampled_from(["UTC", "Asia/Kathmandu", "Europe/Amsterdam", None]),
time_unit=st.sampled_from(["ms", "us", "ns"]),
)
def test_weekday_vs_stdlib_datetime(
value: datetime, time_zone: str, time_unit: TimeUnit
) -> None:
result = (
pl.Series([value], dtype=pl.Datetime(time_unit))
.dt.replace_time_zone(time_zone)
.dt.weekday()
.item()
)
expected = value.isoweekday()
assert result == expected


@given(
value=st.dates(),
)
def test_weekday_vs_stdlib_date(value: date) -> None:
result = pl.Series([value]).dt.weekday().item()
expected = value.isoweekday()
assert result == expected

0 comments on commit ec5cb08

Please sign in to comment.