diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index 12e8c6c6e53e..039c995557be 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -844,20 +844,26 @@ fn replace_n<'a>( "replacement value length ({}) does not match string column length ({})", len_val, ca.len(), ); - let literal = literal || is_literal_pat(&pat); + let lit = is_literal_pat(&pat); + let literal_pat = literal || lit; - if literal { + if literal_pat { pat = escape(&pat) } let reg = Regex::new(&pat)?; - let lit = pat.chars().all(|c| !c.is_ascii_punctuation()); let f = |s: &'a str, val: &'a str| { if lit && (s.len() <= 32) { Cow::Owned(s.replacen(&pat, val, 1)) } else { - reg.replace(s, val) + // According to the docs for replace + // when literal = True then capture groups are ignored. + if literal { + reg.replace(s, NoExpand(val)) + } else { + reg.replace(s, val) + } } }; Ok(iter_and_replace(ca, val, f)) diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index 18a02aef5e6f..fcca5c5987b1 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -1006,7 +1006,7 @@ def test_replace_all() -> None: ) -def test_replace_literal_no_caputures() -> None: +def test_replace_all_literal_no_caputures() -> None: # When using literal = True, capture groups should be disabled # Single row code path in Rust @@ -1034,6 +1034,38 @@ def test_replace_literal_no_caputures() -> None: assert df2.get_column("text2")[1] == "I lost $2 yesterday." +def test_replace_literal_no_caputures() -> None: + # When using literal = True, capture groups should be disabled + + # Single row code path in Rust + df = pl.DataFrame({"text": ["I found yesterday."], "amt": ["$1"]}) + df = df.with_columns( + pl.col("text").str.replace("", pl.col("amt"), literal=True).alias("text2") + ) + assert df.get_column("text2")[0] == "I found $1 yesterday." + + # Multi-row code path in Rust + # A string shorter than 32 chars, + # and one longer than 32 chars to test both sub-paths + df2 = pl.DataFrame( + { + "text": [ + "I found yesterday.", + "I lost yesterday and this string is longer than 32 characters.", + ], + "amt": ["$1", "$2"], + } + ) + df2 = df2.with_columns( + pl.col("text").str.replace("", pl.col("amt"), literal=True).alias("text2") + ) + assert df2.get_column("text2")[0] == "I found $1 yesterday." + assert ( + df2.get_column("text2")[1] + == "I lost $2 yesterday and this string is longer than 32 characters." + ) + + def test_replace_expressions() -> None: df = pl.DataFrame({"foo": ["123 bla 45 asd", "xyz 678 910t"], "value": ["A", "B"]}) out = df.select([pl.col("foo").str.replace(pl.col("foo").first(), pl.col("value"))])