From 19e33865551a7ad5f08df6362ebeeb34cd2cfe87 Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Fri, 2 Aug 2024 08:59:35 -0400
Subject: [PATCH 01/11] allow float in interpolate_by by column

---
 .../ops/interpolation/interpolate_by.rs       | 20 ++++++++++++----
 .../unit/operations/test_interpolate_by.py    | 23 +++++++++++++++----
 2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs
index 674cbab514e9..c77d2ad6f157 100644
--- a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs
+++ b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs
@@ -87,7 +87,7 @@ fn interpolate_impl_by_sorted<T, F, I>(
 ) -> PolarsResult<ChunkedArray<T>>
 where
     T: PolarsNumericType,
-    F: PolarsIntegerType,
+    F: PolarsNumericType,
     I: Fn(T::Native, T::Native, &[F::Native], &mut Vec<T::Native>),
 {
     // This implementation differs from pandas as that boundary None's are not removed.
@@ -169,7 +169,7 @@ fn interpolate_impl_by<T, F, I>(
 ) -> PolarsResult<ChunkedArray<T>>
 where
     T: PolarsNumericType,
-    F: PolarsIntegerType,
+    F: PolarsNumericType,
     I: Fn(T::Native, T::Native, &[F::Native], &mut [T::Native], &[IdxSize]),
 {
     // This implementation differs from pandas as that boundary None's are not removed.
@@ -273,7 +273,7 @@ pub fn interpolate_by(s: &Series, by: &Series, by_is_sorted: bool) -> PolarsResu
     ) -> PolarsResult<Series>
     where
         T: PolarsNumericType,
-        F: PolarsIntegerType,
+        F: PolarsNumericType,
         ChunkedArray<T>: IntoSeries,
     {
         if is_sorted {
@@ -290,6 +290,18 @@ pub fn interpolate_by(s: &Series, by: &Series, by_is_sorted: bool) -> PolarsResu
     }
 
     match (s.dtype(), by.dtype()) {
+        (DataType::Float64, DataType::Float64) => {
+            func(s.f64().unwrap(), by.f64().unwrap(), by_is_sorted)
+        },
+        (DataType::Float64, DataType::Float32) => {
+            func(s.f64().unwrap(), by.f32().unwrap(), by_is_sorted)
+        },
+        (DataType::Float32, DataType::Float64) => {
+            func(s.f32().unwrap(), by.f64().unwrap(), by_is_sorted)
+        },
+        (DataType::Float32, DataType::Float32) => {
+            func(s.f32().unwrap(), by.f32().unwrap(), by_is_sorted)
+        },
         (DataType::Float64, DataType::Int64) => {
             func(s.f64().unwrap(), by.i64().unwrap(), by_is_sorted)
         },
@@ -326,7 +338,7 @@ pub fn interpolate_by(s: &Series, by: &Series, by_is_sorted: bool) -> PolarsResu
         _ => {
             polars_bail!(InvalidOperation: "expected series to be Float64, Float32, \
                 Int64, Int32, UInt64, UInt32, and `by` to be Date, Datetime, Int64, Int32, \
-                UInt64, or UInt32")
+                UInt64, UInt32, Float32 or Float64")
         },
     }
 }
diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 423992abeadd..93acff0b6366 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -28,6 +28,8 @@
         pl.Int32,
         pl.UInt64,
         pl.UInt32,
+        pl.Float32,
+        pl.Float64
     ],
 )
 @pytest.mark.parametrize(
@@ -143,14 +145,16 @@ def test_interpolate_by_trailing_nulls() -> None:
 
 
 @given(data=st.data())
-def test_interpolate_vs_numpy(data: st.DataObject) -> None:
+@pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
+def test_interpolate_vs_numpy(data: st.DataObject, x_dtype) -> None:
+    
     dataframe = (
         data.draw(
             dataframes(
                 [
                     column(
                         "ts",
-                        dtype=pl.Date,
+                        dtype=x_dtype,
                         allow_null=False,
                     ),
                     column(
@@ -166,13 +170,24 @@ def test_interpolate_vs_numpy(data: st.DataObject) -> None:
         .fill_nan(None)
         .unique("ts")
     )
+    
+    if x_dtype == pl.Float64:
+        assume(not dataframe['ts'].is_nan().any())
+        assume(not dataframe['ts'].is_null().any())
+        assume(not dataframe["ts"].is_in([float("-inf"), float("inf")]).any())
+
     assume(not dataframe["value"].is_null().all())
     assume(not dataframe["value"].is_in([float("-inf"), float("inf")]).any())
+
+    dataframe = dataframe.sort('ts')
+
     result = dataframe.select(pl.col("value").interpolate_by("ts"))["value"]
 
     mask = dataframe["value"].is_not_null()
-    x = dataframe["ts"].to_numpy().astype("int64")
-    xp = dataframe["ts"].filter(mask).to_numpy().astype("int64")
+    
+    np_dtype = "int64" if x_dtype == pl.Date else 'float64'
+    x = dataframe["ts"].to_numpy().astype(np_dtype)
+    xp = dataframe["ts"].filter(mask).to_numpy().astype(np_dtype)
     yp = dataframe["value"].filter(mask).to_numpy().astype("float64")
     interp = np.interp(x, xp, yp)
     # Polars preserves nulls on boundaries, but NumPy doesn't.

From f13f211c11442fd1f748fa73519db5752f5d57cd Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Fri, 2 Aug 2024 15:57:29 -0400
Subject: [PATCH 02/11] Fix formatting problems

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 93acff0b6366..63efa88aeef1 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -147,7 +147,7 @@ def test_interpolate_by_trailing_nulls() -> None:
 @given(data=st.data())
 @pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
 def test_interpolate_vs_numpy(data: st.DataObject, x_dtype) -> None:
-    
+
     dataframe = (
         data.draw(
             dataframes(
@@ -170,7 +170,7 @@ def test_interpolate_vs_numpy(data: st.DataObject, x_dtype) -> None:
         .fill_nan(None)
         .unique("ts")
     )
-    
+
     if x_dtype == pl.Float64:
         assume(not dataframe['ts'].is_nan().any())
         assume(not dataframe['ts'].is_null().any())
@@ -184,7 +184,7 @@ def test_interpolate_vs_numpy(data: st.DataObject, x_dtype) -> None:
     result = dataframe.select(pl.col("value").interpolate_by("ts"))["value"]
 
     mask = dataframe["value"].is_not_null()
-    
+
     np_dtype = "int64" if x_dtype == pl.Date else 'float64'
     x = dataframe["ts"].to_numpy().astype(np_dtype)
     xp = dataframe["ts"].filter(mask).to_numpy().astype(np_dtype)

From f226b98376819c5a962d6516de71b9aa06cf37d3 Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Fri, 2 Aug 2024 15:59:43 -0400
Subject: [PATCH 03/11] type hint

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 63efa88aeef1..95f2eae1e81f 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -146,7 +146,7 @@ def test_interpolate_by_trailing_nulls() -> None:
 
 @given(data=st.data())
 @pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
-def test_interpolate_vs_numpy(data: st.DataObject, x_dtype) -> None:
+def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None:
 
     dataframe = (
         data.draw(

From 200d09846da7214e491ae2d9bc523eca3044aa62 Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Fri, 2 Aug 2024 23:29:59 -0400
Subject: [PATCH 04/11] more formatting

---
 .../tests/unit/operations/test_interpolate_by.py      | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 95f2eae1e81f..f073e5366617 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -29,7 +29,7 @@
         pl.UInt64,
         pl.UInt32,
         pl.Float32,
-        pl.Float64
+        pl.Float64,
     ],
 )
 @pytest.mark.parametrize(
@@ -147,7 +147,6 @@ def test_interpolate_by_trailing_nulls() -> None:
 @given(data=st.data())
 @pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
 def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None:
-
     dataframe = (
         data.draw(
             dataframes(
@@ -172,20 +171,20 @@ def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None
     )
 
     if x_dtype == pl.Float64:
-        assume(not dataframe['ts'].is_nan().any())
-        assume(not dataframe['ts'].is_null().any())
+        assume(not dataframe["ts"].is_nan().any())
+        assume(not dataframe["ts"].is_null().any())
         assume(not dataframe["ts"].is_in([float("-inf"), float("inf")]).any())
 
     assume(not dataframe["value"].is_null().all())
     assume(not dataframe["value"].is_in([float("-inf"), float("inf")]).any())
 
-    dataframe = dataframe.sort('ts')
+    dataframe = dataframe.sort("ts")
 
     result = dataframe.select(pl.col("value").interpolate_by("ts"))["value"]
 
     mask = dataframe["value"].is_not_null()
 
-    np_dtype = "int64" if x_dtype == pl.Date else 'float64'
+    np_dtype = "int64" if x_dtype == pl.Date else "float64"
     x = dataframe["ts"].to_numpy().astype(np_dtype)
     xp = dataframe["ts"].filter(mask).to_numpy().astype(np_dtype)
     yp = dataframe["value"].filter(mask).to_numpy().astype("float64")

From 0e3b77d793c0b6178d6f3ad2ef0e33ad7dcc29fb Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Fri, 2 Aug 2024 23:53:57 -0400
Subject: [PATCH 05/11] less wastefull hypothesis testing parameters

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index f073e5366617..095c00343356 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -155,6 +155,7 @@ def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None
                         "ts",
                         dtype=x_dtype,
                         allow_null=False,
+                        strategy=st.floats(allow_nan=False, allow_infinity=False, allow_subnormal=False) if x_dtype == pl.Float64 else None,
                     ),
                     column(
                         "value",

From 04c7c2b4579764222acb282d7d62f04216e95466 Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Sat, 3 Aug 2024 00:07:09 -0400
Subject: [PATCH 06/11] formatting

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 095c00343356..5c5f89d331a2 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -147,6 +147,11 @@ def test_interpolate_by_trailing_nulls() -> None:
 @given(data=st.data())
 @pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
 def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None:
+    if x_dtype == pl.Float64:
+        by_strategy = st.floats(allow_nan=False, allow_infinity=False, allow_subnormal=False)
+    else:
+        by_strategy = None
+
     dataframe = (
         data.draw(
             dataframes(
@@ -155,7 +160,7 @@ def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None
                         "ts",
                         dtype=x_dtype,
                         allow_null=False,
-                        strategy=st.floats(allow_nan=False, allow_infinity=False, allow_subnormal=False) if x_dtype == pl.Float64 else None,
+                        strategy=by_strategy,
                     ),
                     column(
                         "value",

From 5b25b45c42198379e7bc51485327109a5fc22fae Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Sat, 3 Aug 2024 00:08:58 -0400
Subject: [PATCH 07/11] really?

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 5c5f89d331a2..4161a2f1c04c 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -148,7 +148,9 @@ def test_interpolate_by_trailing_nulls() -> None:
 @pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
 def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None:
     if x_dtype == pl.Float64:
-        by_strategy = st.floats(allow_nan=False, allow_infinity=False, allow_subnormal=False)
+        by_strategy = st.floats(
+            allow_nan=False, allow_infinity=False, allow_subnormal=False
+        )
     else:
         by_strategy = None
 

From 9210a080633c255a6b24054afe8feb576024b5a7 Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Sat, 3 Aug 2024 08:33:43 -0400
Subject: [PATCH 08/11] At float version of test_interpolate_by_trailing_nulls

---
 .../unit/operations/test_interpolate_by.py    | 25 ++++++++++++++++---
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 4161a2f1c04c..7e672890cbc4 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -118,8 +118,10 @@ def test_interpolate_by_leading_nulls() -> None:
     assert_frame_equal(result, expected)
 
 
-def test_interpolate_by_trailing_nulls() -> None:
-    df = pl.DataFrame(
+@pytest.mark.parametrize("dataset", ["floats", "dates"])
+def test_interpolate_by_trailing_nulls(dataset: str) -> None:
+    input_data = {
+        "dates": pl.DataFrame(
         {
             "times": [
                 date(2020, 1, 1),
@@ -130,10 +132,25 @@ def test_interpolate_by_trailing_nulls() -> None:
                 date(2020, 1, 13),
             ],
             "values": [1, None, None, 5, None, None],
+        }),
+        "floats": pl.DataFrame(
+        {
+            "times": [0.2, 0.4, 0.5, 0.6, 0.9, 1.1],
+            "values": [1, None, None, 5, None, None],
         }
-    )
+        )
+    }
+
+    expected_data = {
+        "dates": pl.DataFrame({"values": [1.0, 1.7999999999999998, 4.6, 5.0, None, None]}),
+        "floats": pl.DataFrame({"values": [1.0, 3.0, 4.0, 5.0, None, None]})
+    }
+
+    df = input_data[dataset]
+    expected = expected_data[dataset]
+
     result = df.select(pl.col("values").interpolate_by("times"))
-    expected = pl.DataFrame({"values": [1.0, 1.7999999999999998, 4.6, 5.0, None, None]})
+
     assert_frame_equal(result, expected)
     result = (
         df.sort("times", descending=True)

From bf3af7abaa70cd43e9c7ad731505e25ce79d1e6e Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Sat, 3 Aug 2024 10:02:09 -0400
Subject: [PATCH 09/11] more formatting

---
 .../unit/operations/test_interpolate_by.py    | 39 ++++++++++---------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 7e672890cbc4..39c6aa64d741 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -122,28 +122,31 @@ def test_interpolate_by_leading_nulls() -> None:
 def test_interpolate_by_trailing_nulls(dataset: str) -> None:
     input_data = {
         "dates": pl.DataFrame(
-        {
-            "times": [
-                date(2020, 1, 1),
-                date(2020, 1, 3),
-                date(2020, 1, 10),
-                date(2020, 1, 11),
-                date(2020, 1, 12),
-                date(2020, 1, 13),
-            ],
-            "values": [1, None, None, 5, None, None],
-        }),
+            {
+                "times": [
+                    date(2020, 1, 1),
+                    date(2020, 1, 3),
+                    date(2020, 1, 10),
+                    date(2020, 1, 11),
+                    date(2020, 1, 12),
+                    date(2020, 1, 13),
+                ],
+                "values": [1, None, None, 5, None, None],
+            }
+        ),
         "floats": pl.DataFrame(
-        {
-            "times": [0.2, 0.4, 0.5, 0.6, 0.9, 1.1],
-            "values": [1, None, None, 5, None, None],
-        }
-        )
+            {
+                "times": [0.2, 0.4, 0.5, 0.6, 0.9, 1.1],
+                "values": [1, None, None, 5, None, None],
+            }
+        ),
     }
 
     expected_data = {
-        "dates": pl.DataFrame({"values": [1.0, 1.7999999999999998, 4.6, 5.0, None, None]}),
-        "floats": pl.DataFrame({"values": [1.0, 3.0, 4.0, 5.0, None, None]})
+        "dates": pl.DataFrame(
+            {"values": [1.0, 1.7999999999999998, 4.6, 5.0, None, None]}
+        ),
+        "floats": pl.DataFrame({"values": [1.0, 3.0, 4.0, 5.0, None, None]}),
     }
 
     df = input_data[dataset]

From c7c9ca328626242da8999c8f4c3d76ceed0971cb Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Mon, 5 Aug 2024 18:51:38 -0400
Subject: [PATCH 10/11] use @given instead of parametrize

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 39c6aa64d741..0e981ec09e1f 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -164,8 +164,7 @@ def test_interpolate_by_trailing_nulls(dataset: str) -> None:
     assert_frame_equal(result, expected)
 
 
-@given(data=st.data())
-@pytest.mark.parametrize("x_dtype", [pl.Date, pl.Float64])
+@given(data=st.data(), x_dtype=st.sampled_from([pl.Date, pl.Float64]))
 def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None:
     if x_dtype == pl.Float64:
         by_strategy = st.floats(

From c9ff1a858565e9b311592f19fbfdf18dcfe3fa1c Mon Sep 17 00:00:00 2001
From: andrew <agossard@gmail.com>
Date: Thu, 8 Aug 2024 21:41:08 -0400
Subject: [PATCH 11/11] try float bounds on hypothesis test

---
 py-polars/tests/unit/operations/test_interpolate_by.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py
index 0e981ec09e1f..98ee656fdaed 100644
--- a/py-polars/tests/unit/operations/test_interpolate_by.py
+++ b/py-polars/tests/unit/operations/test_interpolate_by.py
@@ -168,7 +168,11 @@ def test_interpolate_by_trailing_nulls(dataset: str) -> None:
 def test_interpolate_vs_numpy(data: st.DataObject, x_dtype: pl.DataType) -> None:
     if x_dtype == pl.Float64:
         by_strategy = st.floats(
-            allow_nan=False, allow_infinity=False, allow_subnormal=False
+            min_value=-1e150,
+            max_value=1e150,
+            allow_nan=False,
+            allow_infinity=False,
+            allow_subnormal=False,
         )
     else:
         by_strategy = None