From be87d694d52751fb72461683afc79bbbaf2dc24a Mon Sep 17 00:00:00 2001 From: ion-elgreco <15728914+ion-elgreco@users.noreply.github.com> Date: Fri, 8 Mar 2024 19:51:10 +0100 Subject: [PATCH] only convert configs that have "true" --- crates/core/src/operations/create.rs | 2 +- .../core/src/operations/set_tbl_properties.rs | 19 ++++++++++++++++--- python/tests/test_create.py | 9 ++++++--- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index 78e9791fd3..555de905bb 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -1,7 +1,7 @@ //! Command for creating a new delta table // https://github.com/delta-io/delta/blob/master/core/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::sync::Arc; use futures::future::BoxFuture; diff --git a/crates/core/src/operations/set_tbl_properties.rs b/crates/core/src/operations/set_tbl_properties.rs index 6ee0db58d7..77ed16dc02 100644 --- a/crates/core/src/operations/set_tbl_properties.rs +++ b/crates/core/src/operations/set_tbl_properties.rs @@ -205,14 +205,22 @@ pub fn apply_properties_to_protocol( } /// Converts existing properties into features if the reader_version is >=3 or writer_version >=3 +/// only converts features that are "true" pub fn convert_properties_to_features( mut new_protocol: Protocol, configuration: &HashMap>, ) -> Protocol { if new_protocol.min_writer_version >= 7 { let mut converted_writer_features = configuration + .iter() + .filter(|(_, value)| { + value.as_ref().map_or(false, |v| { + v.to_ascii_lowercase().parse::().is_ok_and(|v| v) + }) + }) + .collect::>>() .keys() - .map(|key| key.clone().into()) + .map(|key| (*key).clone().into()) .filter(|v| !matches!(v, WriterFeatures::Other(_))) .collect::>(); @@ -233,8 +241,13 @@ pub fn convert_properties_to_features( } if new_protocol.min_reader_version >= 3 { let converted_reader_features = configuration - .keys() - .map(|key| key.clone().into()) + .iter() + .filter(|(_, value)| { + value.as_ref().map_or(false, |v| { + v.to_ascii_lowercase().parse::().is_ok_and(|v| v) + }) + }) + .map(|(key, _)| (*key).clone().into()) .filter(|v| !matches!(v, ReaderFeatures::Other(_))) .collect::>(); match new_protocol.reader_features { diff --git a/python/tests/test_create.py b/python/tests/test_create.py index 243c89b883..51f83b4143 100644 --- a/python/tests/test_create.py +++ b/python/tests/test_create.py @@ -14,7 +14,7 @@ def test_create_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table name="test_name", description="test_desc", configuration={ - "delta.appendOnly": "false", + "delta.appendOnly": "true", "delta.logRetentionDuration": "interval 2 days", }, custom_metadata={"userName": "John Doe"}, @@ -25,11 +25,13 @@ def test_create_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table assert metadata.name == "test_name" assert metadata.description == "test_desc" assert metadata.configuration == { - "delta.appendOnly": "false", + "delta.appendOnly": "true", "delta.logRetentionDuration": "interval 2 days", } assert dt.history()[0]["userName"] == "John Doe" + assert {*dt.protocol().writer_features} == {"appendOnly", "timestampNtz"} # type: ignore + def test_create_modes(tmp_path: pathlib.Path, sample_data: pa.Table): dt = DeltaTable.create(tmp_path, sample_data.schema, mode="error") @@ -65,6 +67,7 @@ def test_create_schema(tmp_path: pathlib.Path, sample_data: pa.Table): def test_create_with_deletion_vectors_enabled( tmp_path: pathlib.Path, sample_table: pa.Table ): + """append only is set to false so shouldn't be converted to a feature""" dt = DeltaTable.create( tmp_path, sample_table.schema, @@ -87,7 +90,7 @@ def test_create_with_deletion_vectors_enabled( } assert protocol.min_reader_version == 3 assert protocol.min_writer_version == 7 - assert set(protocol.writer_features) == {"deletionVectors", "appendOnly"} # type: ignore + assert protocol.writer_features == ["deletionVectors"] # type: ignore assert protocol.reader_features == ["deletionVectors"] assert dt.history()[0]["userName"] == "John Doe"