From e8e981d4abe8e3be66ac8674f5284a202388a357 Mon Sep 17 00:00:00 2001 From: zml1206 Date: Thu, 9 Nov 2023 17:07:16 +0800 Subject: [PATCH] [KYUUBI #5535][AUTHZ] Support vacuum table command for Delta Lake ### _Why are the changes needed?_ To close #5535 . Support vacuum table command for Delta Lake. https://docs.delta.io/latest/delta-utility.html#remove-files-no-longer-referenced-by-a-delta-table == Analyzed Logical Plan == Delta Lake 3.0/Spark 3.5 ``` == Analyzed Logical Plan == path: string VacuumTableCommand false +- ResolvedTable org.apache.spark.sql.delta.catalog.DeltaCatalog3947bad4, delta_ns.table1_delta, DeltaTableV2(org.apache.spark.sql.SparkSession2e7075d4,file:/var/folders/gc/c__qhntd7s502txfp0ltxh880000gn/T/spark-warehouse-ba741e85-4c66-46f7-b1a7-10a1f32cd20c/delta_ns.db/table1_delta,Some(CatalogTable( Catalog: spark_catalog Database: delta_ns Table: table1_delta Owner: default_table_owner Created Time: Thu Nov 09 10:45:20 CST 2023 Last Access: UNKNOWN Created By: Spark 3.5.0 Type: MANAGED Provider: delta Location: file:/var/folders/gc/c__qhntd7s502txfp0ltxh880000gn/T/spark-warehouse-ba741e85-4c66-46f7-b1a7-10a1f32cd20c/delta_ns.db/table1_delta Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.SequenceFileInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Partition Provider: Catalog)),Some(delta_ns.table1_delta),None,Map()), [id#818, name#819, gender#820, birthDate#821] ``` Before Delta Lake 3.0/Spark 3.5 ``` == Analyzed Logical Plan == path: string VacuumTableCommand `delta_ns`.`table1_delta`, false ``` ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5655 from zml1206/KYUUBI-5535. Closes #5535 9500ee730 [zml1206] Support vacuum table command for Delta Lake Authored-by: zml1206 Signed-off-by: Kent Yao --- .../main/resources/table_command_spec.json | 24 +++++++++++++++++++ .../spark/authz/gen/DeltaCommands.scala | 10 +++++++- ...eltaCatalogRangerSparkExtensionSuite.scala | 12 ++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index 3c52998cd61..272876d524d 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -2042,6 +2042,30 @@ "opType" : "QUERY", "queryDescs" : [ ], "uriDescs" : [ ] +}, { + "classname" : "io.delta.tables.execution.VacuumTableCommand", + "tableDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + }, { + "fieldName" : "table", + "fieldExtractor" : "TableIdentifierOptionTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "MSCK", + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.delta.commands.DeleteCommand", "tableDescs" : [ { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala index cf9e9a5d846..1c121f6eb5c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala @@ -61,9 +61,17 @@ object DeltaCommands extends CommandSpecs[TableCommandSpec] { TableCommandSpec(cmd, Seq(childDesc, tableDesc), ALTERTABLE_COMPACT) } + val VacuumTableCommand = { + val cmd = "io.delta.tables.execution.VacuumTableCommand" + val childDesc = TableDesc("child", classOf[ResolvedTableTableExtractor]) + val tableDesc = TableDesc("table", classOf[TableIdentifierOptionTableExtractor]) + TableCommandSpec(cmd, Seq(childDesc, tableDesc), MSCK) + } + override def specs: Seq[TableCommandSpec] = Seq( DeleteCommand, MergeIntoCommand, OptimizeTableCommand, - UpdateCommand) + UpdateCommand, + VacuumTableCommand) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala index c05f7671a93..9c4dc42ff6d 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala @@ -281,6 +281,18 @@ class DeltaCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { doAs(admin, sql(optimizeTableSql)) } } + + test("vacuum table") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + val vacuumTableSql = s"VACUUM $namespace1.$table1" + interceptContains[AccessControlException]( + doAs(someone, sql(vacuumTableSql)))( + s"does not have [alter] privilege on [$namespace1/$table1]") + doAs(admin, sql(vacuumTableSql)) + } + } } object DeltaCatalogRangerSparkExtensionSuite {