diff --git a/crates/core/src/errors.rs b/crates/core/src/errors.rs index e3447cad72..c037566eaf 100644 --- a/crates/core/src/errors.rs +++ b/crates/core/src/errors.rs @@ -236,6 +236,9 @@ pub enum DeltaTableError { #[error("End timestamp {ending_timestamp} is greater than latest commit timestamp")] ChangeDataTimestampGreaterThanCommit { ending_timestamp: DateTime }, + + #[error("No starting version or timestamp provided for CDC")] + NoStartingVersionOrTimestamp, } impl From for DeltaTableError { diff --git a/crates/core/src/operations/load_cdf.rs b/crates/core/src/operations/load_cdf.rs index 3d5bed2d26..9718c3eda1 100644 --- a/crates/core/src/operations/load_cdf.rs +++ b/crates/core/src/operations/load_cdf.rs @@ -36,7 +36,7 @@ pub struct CdfLoadBuilder { /// Columns to project columns: Option>, /// Version to read from - starting_version: i64, + starting_version: Option, /// Version to stop reading at ending_version: Option, /// Starting timestamp of commits to accept @@ -56,7 +56,7 @@ impl CdfLoadBuilder { snapshot, log_store, columns: None, - starting_version: 0, + starting_version: None, ending_version: None, starting_timestamp: None, ending_timestamp: None, @@ -67,7 +67,7 @@ impl CdfLoadBuilder { /// Version to start at (version 0 if not provided) pub fn with_starting_version(mut self, starting_version: i64) -> Self { - self.starting_version = starting_version; + self.starting_version = Some(starting_version); self } @@ -107,6 +107,25 @@ impl CdfLoadBuilder { self } + async fn calculate_earliest_version(&self) -> DeltaResult { + let ts = self.starting_timestamp.unwrap_or(DateTime::UNIX_EPOCH); + for v in 0..self.snapshot.version() { + if let Ok(Some(bytes)) = self.log_store.read_commit_entry(v).await { + if let Ok(actions) = get_actions(v, bytes).await { + if actions.iter().any(|action| match action { + Action::CommitInfo(CommitInfo { + timestamp: Some(t), .. + }) if ts.timestamp_millis() < *t => true, + _ => false, + }) { + return Ok(v); + } + } + } + } + Ok(0) + } + /// This is a rust version of https://github.com/delta-io/delta/blob/master/spark/src/main/scala/org/apache/spark/sql/delta/commands/cdc/CDCReader.scala#L418 /// Which iterates through versions of the delta table collects the relevant actions / commit info and returns those /// groupings for later use. The scala implementation has a lot more edge case handling and read schema checking (and just error checking in general) @@ -118,8 +137,16 @@ impl CdfLoadBuilder { Vec>, Vec>, )> { - let start = self.starting_version; - let latest_version = self.log_store.get_latest_version(0).await?; // Start from 0 since if start > latest commit, the returned commit is not a valid commit + if self.starting_version.is_none() && self.starting_timestamp.is_none() { + return Err(DeltaTableError::NoStartingVersionOrTimestamp); + } + let start = if let Some(s) = self.starting_version { + s + } else { + self.calculate_earliest_version().await? + }; + let latest_version = self.log_store.get_latest_version(start).await?; // Start from 0 since if start > latest commit, the returned commit is not a valid commit + let mut end = self.ending_version.unwrap_or(latest_version); let mut change_files: Vec> = vec![]; @@ -130,19 +157,18 @@ impl CdfLoadBuilder { end = latest_version; } - if start > latest_version { + if end < start { return if self.allow_out_of_range { Ok((change_files, add_files, remove_files)) } else { - Err(DeltaTableError::InvalidVersion(start)) + Err(DeltaTableError::ChangeDataInvalidVersionRange { start, end }) }; } - - if end < start { + if start >= latest_version { return if self.allow_out_of_range { Ok((change_files, add_files, remove_files)) } else { - Err(DeltaTableError::ChangeDataInvalidVersionRange { start, end }) + Err(DeltaTableError::InvalidVersion(start)) }; } @@ -151,7 +177,7 @@ impl CdfLoadBuilder { .ending_timestamp .unwrap_or(DateTime::from(SystemTime::now())); - // Check that starting_timestmp is within boundaries of the latest version + // Check that starting_timestamp is within boundaries of the latest version let latest_snapshot_bytes = self .log_store .read_commit_entry(latest_version) @@ -296,6 +322,7 @@ impl CdfLoadBuilder { Some(ScalarValue::Utf8(Some(String::from("insert")))) } + #[inline] fn get_remove_action_type() -> Option { Some(ScalarValue::Utf8(Some(String::from("delete")))) } @@ -520,6 +547,7 @@ pub(crate) mod tests { .await? .load_cdf() .with_session_ctx(ctx.clone()) + .with_starting_version(0) .with_ending_timestamp(starting_timestamp.and_utc()) .build() .await?; @@ -732,6 +760,49 @@ pub(crate) mod tests { Ok(()) } + #[tokio::test] + async fn test_load_vacuumed_table() -> TestResult { + let ending_timestamp = NaiveDateTime::from_str("2024-01-06T15:44:59.570")?; + let ctx = SessionContext::new(); + let table = DeltaOps::try_from_uri("../test/tests/data/checkpoint-cdf-table") + .await? + .load_cdf() + .with_session_ctx(ctx.clone()) + .with_starting_timestamp(ending_timestamp.and_utc()) + .build() + .await?; + + let batches = collect_batches( + table.properties().output_partitioning().partition_count(), + table, + ctx, + ) + .await?; + + assert_batches_sorted_eq! { + ["+----+--------+------------------+-----------------+-------------------------+------------+", + "| id | name | _change_type | _commit_version | _commit_timestamp | birthday |", + "+----+--------+------------------+-----------------+-------------------------+------------+", + "| 11 | Ossama | update_preimage | 5 | 2025-01-06T16:38:19.623 | 2024-12-30 |", + "| 12 | Ossama | update_postimage | 5 | 2025-01-06T16:38:19.623 | 2024-12-30 |", + "| 7 | Dennis | delete | 3 | 2024-01-06T16:44:59.570 | 2023-12-29 |", + "| 14 | Zach | update_preimage | 5 | 2025-01-06T16:38:19.623 | 2023-12-25 |", + "| 15 | Zach | update_postimage | 5 | 2025-01-06T16:38:19.623 | 2023-12-25 |", + "| 13 | Ryan | update_preimage | 5 | 2025-01-06T16:38:19.623 | 2023-12-22 |", + "| 14 | Ryan | update_postimage | 5 | 2025-01-06T16:38:19.623 | 2023-12-22 |", + "| 12 | Nick | update_preimage | 5 | 2025-01-06T16:38:19.623 | 2023-12-29 |", + "| 13 | Nick | update_postimage | 5 | 2025-01-06T16:38:19.623 | 2023-12-29 |", + "| 11 | Ossama | insert | 4 | 2025-01-06T16:33:18.167 | 2024-12-30 |", + "| 12 | Nick | insert | 4 | 2025-01-06T16:33:18.167 | 2023-12-29 |", + "| 13 | Ryan | insert | 4 | 2025-01-06T16:33:18.167 | 2023-12-22 |", + "| 14 | Zach | insert | 4 | 2025-01-06T16:33:18.167 | 2023-12-25 |", + "+----+--------+------------------+-----------------+-------------------------+------------+"], + &batches + } + + Ok(()) + } + #[tokio::test] async fn test_use_remove_actions_for_deletions() -> TestResult { let delta_schema = TestSchemas::simple(); diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/.cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/.cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet.crc new file mode 100644 index 0000000000..65455b6e20 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/.cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet new file mode 100644 index 0000000000..7fb822f3a5 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00000-59fa51a4-edbb-4fc0-a497-6969cdf3966c.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet new file mode 100644 index 0000000000..9420cd532b Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00001-308c0cab-92b2-41e1-90bd-9416b10ba6a6.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet new file mode 100644 index 0000000000..b90a2592c7 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00002-ea0bad63-f199-42c6-bf85-3b9f5027578c.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet new file mode 100644 index 0000000000..4a37095b87 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-22/cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet new file mode 100644 index 0000000000..50b5da505a Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00000-fb59d34a-5bd7-4b10-8c41-71e38c07fdc2.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet new file mode 100644 index 0000000000..fe6fc81be2 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00001-985fd824-b34a-4f3e-b7e4-90bf8d04898e.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet new file mode 100644 index 0000000000..dca1df96f6 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-23/cdc-00002-831078a2-a13d-4713-aa88-7d5f5228d781.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet new file mode 100644 index 0000000000..4298341a2a Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00000-4beb5c26-e34a-470a-a62e-2ecc8dc24035.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet new file mode 100644 index 0000000000..3f6d0df77d Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00001-a5f1d5a2-e308-406f-af76-3b32bab79832.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet new file mode 100644 index 0000000000..53985a92a1 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-24/cdc-00002-ddca9e04-03ef-4533-a9c8-05c1d4f79d6a.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-25/.cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-25/.cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet.crc new file mode 100644 index 0000000000..fbc5980416 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-25/.cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-25/cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-25/cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet new file mode 100644 index 0000000000..34c6fd1fb7 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-25/cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/.cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/.cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet.crc new file mode 100644 index 0000000000..8488bf3159 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/.cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet new file mode 100644 index 0000000000..0169eb7b25 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00000-e8760032-5a99-4d37-9739-fc9d4db24308.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet new file mode 100644 index 0000000000..ff7097554e Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet new file mode 100644 index 0000000000..9d860c43c2 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00001-1aa06a1f-c45f-4227-b0ac-e70b1e2115b1.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet new file mode 100644 index 0000000000..1f7d733500 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet new file mode 100644 index 0000000000..f126fc421b Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2023-12-29/cdc-00002-97dc4c5b-3806-4198-99ed-062c0a337c29.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2024-12-30/.cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2024-12-30/.cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet.crc new file mode 100644 index 0000000000..255ca601fe Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2024-12-30/.cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2024-12-30/cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2024-12-30/cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet new file mode 100644 index 0000000000..55dabdf67c Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_change_data/birthday=2024-12-30/cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000003.checkpoint.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000003.checkpoint.parquet.crc new file mode 100644 index 0000000000..4d514c4851 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000003.checkpoint.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000004.json.crc b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000004.json.crc new file mode 100644 index 0000000000..03c45464f8 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000004.json.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000005.json.crc b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000005.json.crc new file mode 100644 index 0000000000..b0c77511c4 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/.00000000000000000005.json.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/._last_checkpoint.crc b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/._last_checkpoint.crc new file mode 100644 index 0000000000..fdb702b3ad Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/._last_checkpoint.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000003.checkpoint.parquet b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000003.checkpoint.parquet new file mode 100644 index 0000000000..0eb641eb64 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000003.checkpoint.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000003.json b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..26b9aa78df --- /dev/null +++ b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1704559499570,"operation":"DELETE","operationParameters":{"predicate":"[\"(name#40 = Dennis)\"]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"917","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"3479","numDeletionVectorsUpdated":"0","numAddedFiles":"0","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"1","numDeletedRows":"1","scanTimeMs":"3157","numAddedBytes":"0","rewriteTimeMs":"322"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"ef48960f-ceb5-4bc2-9b59-8c947083ae58"}} +{"remove":{"path":"birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet","deletionTimestamp":1704559499540,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-29"},"size":917}} +{"cdc":{"path":"_change_data/birthday=2023-12-29/cdc-00000-ed223ebe-3b27-44af-b2cf-91e882f4c500.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":971,"dataChange":false}} diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000004.json b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..da5838f1dc --- /dev/null +++ b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000004.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1736181198167,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputRows":"4","numOutputBytes":"2760"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.2.1","txnId":"05de6624-a123-4c46-bf95-4dcc34b56aff"}} +{"add":{"path":"birthday=2024-12-30/part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet","partitionValues":{"birthday":"2024-12-30"},"size":701,"modificationTime":1736181198024,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":11,\"name\":\"Ossama\"},\"maxValues\":{\"id\":11,\"name\":\"Ossama\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"add":{"path":"birthday=2023-12-29/part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":687,"modificationTime":1736181198024,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12,\"name\":\"Nick\"},\"maxValues\":{\"id\":12,\"name\":\"Nick\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"add":{"path":"birthday=2023-12-22/part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":686,"modificationTime":1736181198024,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13,\"name\":\"Ryan\"},\"maxValues\":{\"id\":13,\"name\":\"Ryan\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"add":{"path":"birthday=2023-12-25/part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-25"},"size":686,"modificationTime":1736181198024,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14,\"name\":\"Zach\"},\"maxValues\":{\"id\":14,\"name\":\"Zach\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000005.json b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..441da74062 --- /dev/null +++ b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/00000000000000000005.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1736181499623,"operation":"UPDATE","operationParameters":{"predicate":"[\"(id#104 >= 11)\"]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"4","numRemovedBytes":"6939","numCopiedRows":"0","numDeletionVectorsAdded":"0","executionTimeMs":"6073","numDeletionVectorsUpdated":"0","scanTimeMs":"5118","numAddedFiles":"4","numUpdatedRows":"4","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"4","numAddedBytes":"3628","rewriteTimeMs":"950"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.2.1","txnId":"a53a1e14-a31b-43dc-837b-053f3c423cc4"}} +{"add":{"path":"birthday=2024-12-30/part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet","partitionValues":{"birthday":"2024-12-30"},"size":918,"modificationTime":1736181499498,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12,\"name\":\"Ossama\"},\"maxValues\":{\"id\":12,\"name\":\"Ossama\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"add":{"path":"birthday=2023-12-29/part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":904,"modificationTime":1736181499498,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13,\"name\":\"Nick\"},\"maxValues\":{\"id\":13,\"name\":\"Nick\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"add":{"path":"birthday=2023-12-25/part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-25"},"size":904,"modificationTime":1736181499498,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":15,\"name\":\"Zach\"},\"maxValues\":{\"id\":15,\"name\":\"Zach\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"add":{"path":"birthday=2023-12-22/part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":902,"modificationTime":1736181499498,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14,\"name\":\"Ryan\"},\"maxValues\":{\"id\":14,\"name\":\"Ryan\"},\"nullCount\":{\"id\":0,\"name\":0}}"}} +{"cdc":{"path":"_change_data/birthday=2024-12-30/cdc-00000-66f2943f-f545-4ad5-a29a-d41a6fc0964f.c000.snappy.parquet","partitionValues":{"birthday":"2024-12-30"},"size":1056,"dataChange":false}} +{"cdc":{"path":"_change_data/birthday=2023-12-29/cdc-00001-8a2331ca-2aec-4763-9b72-0ef2ebf20c89.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-29"},"size":1041,"dataChange":false}} +{"cdc":{"path":"_change_data/birthday=2023-12-25/cdc-00002-1bf2daf8-1bef-483e-a298-60f36a9f14c7.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-25"},"size":1041,"dataChange":false}} +{"cdc":{"path":"_change_data/birthday=2023-12-22/cdc-00003-1d50571e-b1c0-46a6-8fc9-575036b63924.c000.snappy.parquet","partitionValues":{"birthday":"2023-12-22"},"size":1041,"dataChange":false}} +{"remove":{"path":"birthday=2024-12-30/part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet","deletionTimestamp":1736181499597,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2024-12-30"},"size":701}} +{"remove":{"path":"birthday=2023-12-29/part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet","deletionTimestamp":1736181499597,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-29"},"size":687}} +{"remove":{"path":"birthday=2023-12-25/part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet","deletionTimestamp":1736181499597,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-25"},"size":686}} +{"remove":{"path":"birthday=2023-12-22/part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet","deletionTimestamp":1736181499597,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"birthday":"2023-12-22"},"size":686}} diff --git a/crates/test/tests/data/checkpoint-cdf-table/_delta_log/_last_checkpoint b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..15af40928a --- /dev/null +++ b/crates/test/tests/data/checkpoint-cdf-table/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":3,"size":11,"sizeInBytes":18082,"numOfAddFiles":9,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues_parsed","type":{"type":"struct","fields":[{"name":"birthday","type":"date","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"d7e1e1a7cb6ef0cb2059567425b7a1c7"} diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/.part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/.part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet.crc new file mode 100644 index 0000000000..0b659f956b Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/.part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/.part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/.part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet.crc new file mode 100644 index 0000000000..21e00077c8 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/.part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet new file mode 100644 index 0000000000..7a24bef8d2 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00000-592a7e14-f790-4236-9c61-120d006eb3b8.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet new file mode 100644 index 0000000000..f5e6762c81 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00000-cd6a8496-3a3c-4ac9-8fba-035e60e71ab2.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet new file mode 100644 index 0000000000..a6c9b9265c Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00001-96c64ea1-3383-42c8-bc83-487a583eb01b.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet new file mode 100644 index 0000000000..51ff3c55b0 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00002-93942e85-bb5c-45ff-b334-3a50c28185bb.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet new file mode 100644 index 0000000000..2fa41fe8ad Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00002-fc3f3da0-9475-49db-a5be-f675a10bbe2c.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet new file mode 100644 index 0000000000..f3c8c3fdff Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-22/part-00003-50021c28-2b26-4382-9a0f-63f05671edef.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet new file mode 100644 index 0000000000..2a814ac262 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00001-723d68a5-94eb-4acc-9db1-e985867a1a6c.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet new file mode 100644 index 0000000000..5f2ad4cf7e Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00002-7c6f102f-6ad1-4e3b-bee3-df831f4abf3c.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet new file mode 100644 index 0000000000..357f6f12ca Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-23/part-00003-98b8082f-db4e-43f8-ac4f-56538beeddae.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet new file mode 100644 index 0000000000..5a85db4f6b Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00004-218c1bff-cde9-44b2-b7bf-93f2f37c0cb9.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet new file mode 100644 index 0000000000..317e8a0137 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00005-8aeab9bc-7a46-4083-9a85-e4f8d4501a67.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet new file mode 100644 index 0000000000..f716e5ddb4 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-24/part-00006-53327328-4603-45ad-adb9-21feeeee2c31.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/.part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/.part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet.crc new file mode 100644 index 0000000000..017e71a370 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/.part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/.part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/.part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet.crc new file mode 100644 index 0000000000..6dd6feceec Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/.part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet new file mode 100644 index 0000000000..e5bad4e8f9 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00002-90c97264-1f4e-4789-9879-8da4ac3a278c.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet new file mode 100644 index 0000000000..03ac5d93fb Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00003-4f6cd749-bd9f-4a4a-a594-66fc77d41c58.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet new file mode 100644 index 0000000000..97740ce49c Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00007-8cd4b5a3-b4dd-4bbc-8bb3-721fa82961c6.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet new file mode 100644 index 0000000000..b690d3363a Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00008-436dbf31-f213-4b3b-bcc3-5df022ec6b35.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet new file mode 100644 index 0000000000..5cc0801871 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-25/part-00009-685aacbb-c7ac-4cb2-93f1-6dc27cd2e980.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/.part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/.part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet.crc new file mode 100644 index 0000000000..ed6a8d304b Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/.part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/.part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/.part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet.crc new file mode 100644 index 0000000000..a6514cbd72 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/.part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet new file mode 100644 index 0000000000..b24ab63fdc Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00000-1ca113cd-a94c-46a8-9c5b-b99e676ddd06.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet new file mode 100644 index 0000000000..2309c2e9ca Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-21869311-b18b-4a90-800d-521fdeeb0917.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet new file mode 100644 index 0000000000..c7717e10ca Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-8334a9a7-7041-4d88-8377-aa36cfe5762f.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet new file mode 100644 index 0000000000..46b0d5a34c Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00001-e041c37a-5bac-443c-a8c6-a3713894743d.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet new file mode 100644 index 0000000000..c0fd23c794 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2023-12-29/part-00002-7dd6bbed-a0c1-44f0-b729-42b7d7d7f5ca.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/.part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/.part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet.crc new file mode 100644 index 0000000000..8aed5ff5ca Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/.part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/.part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet.crc b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/.part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet.crc new file mode 100644 index 0000000000..bdb8390c64 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/.part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet.crc differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet new file mode 100644 index 0000000000..d0c7569eff Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/part-00000-1f959cb4-ae21-4e3c-b9da-e1610fb63cae.c000.snappy.parquet differ diff --git a/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet new file mode 100644 index 0000000000..998e3eb477 Binary files /dev/null and b/crates/test/tests/data/checkpoint-cdf-table/birthday=2024-12-30/part-00000-735d4a7f-9956-46d5-8955-e9bc3599aa88.c000.snappy.parquet differ diff --git a/python/src/lib.rs b/python/src/lib.rs index 7c86aeec9e..aed9643e81 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -779,12 +779,12 @@ impl RawDeltaTable { Ok(()) } - #[pyo3(signature = (starting_version = 0, ending_version = None, starting_timestamp = None, ending_timestamp = None, columns = None, allow_out_of_range = false))] + #[pyo3(signature = (starting_version = None, ending_version = None, starting_timestamp = None, ending_timestamp = None, columns = None, allow_out_of_range = false))] #[allow(clippy::too_many_arguments)] pub fn load_cdf( &self, py: Python, - starting_version: i64, + starting_version: Option, ending_version: Option, starting_timestamp: Option, ending_timestamp: Option, @@ -792,9 +792,11 @@ impl RawDeltaTable { allow_out_of_range: bool, ) -> PyResult> { let ctx = SessionContext::new(); - let mut cdf_read = CdfLoadBuilder::new(self.log_store()?, self.cloned_state()?) - .with_starting_version(starting_version); + let mut cdf_read = CdfLoadBuilder::new(self.log_store()?, self.cloned_state()?); + if let Some(sv) = starting_version { + cdf_read = cdf_read.with_starting_version(sv); + } if let Some(ev) = ending_version { cdf_read = cdf_read.with_ending_version(ev); } diff --git a/python/src/query.rs b/python/src/query.rs index ce09cf46a8..3a5d979c8e 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -9,7 +9,7 @@ use pyo3::prelude::*; use crate::{error::PythonError, utils::rt, RawDeltaTable}; -/// PyQueryBuilder supports the _experimental_ `QueryBuilder` Pythoh interface which allows users +/// PyQueryBuilder supports the _experimental_ `QueryBuilder` Python interface which allows users /// to take advantage of the [Apache DataFusion](https://datafusion.apache.org) engine already /// present in the Python package. #[pyclass(module = "deltalake._internal")]