Update Job DTO, specifically handle 404 (#45)

* Revert "support for multi-arch (#44)" This reverts commit fa7ba4f. * fix for resource not exist if ID is set * expose data_security_mode for cluster settings, queue for jobs * move data_security_mode to jobsettings pass queued through to api create request * update crds in chart, bump versions
mach-kernel · Jan 29, 2024 · 7a11aa9 · 7a11aa9
1 parent fa7ba4f
commit 7a11aa9
Show file tree

Hide file tree

Showing 23 changed files with 240 additions and 35 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -11,45 +11,34 @@ jobs:
         with:
           api-key: ${{secrets.FOSSA_API_KEY}}
   test:
-    name: Test (x86)
+    name: Build, test (x86)
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: Swatinem/rust-cache@v2
-      - uses: houseabsolute/actions-rust-cross@v0
+      - uses: actions-rs/toolchain@v1
         with:
-          command: "test"
           toolchain: stable
-          args: "--test-threads=1"
-  build:
-    name: Build
-    runs-on: ubuntu-latest
-    needs: test
-    strategy: 
-      matrix:
-        target: [ aarch64-unknown-linux-gnu, x86_64-unknown-linux-gnu ]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Build binary
-        uses: houseabsolute/actions-rust-cross@v0
+      - uses: Swatinem/rust-cache@v2
+      - uses: actions-rs/cargo@v1
         with:
-          command: "build"
-          target: ${{ matrix.target }}
-          toolchain: stable
-          args: "--locked --release --all-features"
-          strip: true
+          command: test
+          args: -- --test-threads=1
+      - uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --all-features
       - uses: actions/upload-artifact@v4
         with:
           name: crd_gen
           path: target/release/crd_gen
       - uses: actions/upload-artifact@v4
         with:
           name: databricks_kube
-          path: target/release/databricks_kube 
-  publish:
+          path: target/release/databricks_kube
+  publish_image:
     name: Docker
     runs-on: ubuntu-latest
-    needs: build
+    needs: test
     if: github.ref == 'refs/heads/master'
     steps:
       - uses: actions/checkout@v4
@@ -59,10 +48,6 @@ jobs:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-        with:
-          platforms: linux/amd64,linux/arm64
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
       - name: Docker meta
@@ -83,6 +68,5 @@ jobs:
         with:
           context: .
           push: true
-          platforms: linux/amd64, linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/charts/databricks-kube-operator/Chart.yaml b/charts/databricks-kube-operator/Chart.yaml
@@ -1,8 +1,8 @@
 apiVersion: v2
-appVersion: 0.6.2
+appVersion: 0.7.0
 name: databricks-kube-operator
 description: A kube-rs operator for managing Databricks API resources
-version: 0.6.2
+version: 0.7.0
 
 home: https://github.com/mach-kernel/databricks-kube-operator
 sources:

diff --git a/charts/databricks-kube-operator/templates/crds.yaml b/charts/databricks-kube-operator/templates/crds.yaml
@@ -262,6 +262,17 @@ spec:
                                   description: 'An object with key value pairs. The key length must be between 1 and 127 UTF-8 characters, inclusive. The value length must be less than or equal to 255 UTF-8 characters. For a list of all restrictions, see AWS Tag Restrictions: <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions>'
                                   nullable: true
                                   type: object
+                                data_security_mode:
+                                  description: Data security mode decides what data governance model to use when accessing data from a cluster.
+                                  enum:
+                                  - NONE
+                                  - SINGLE_USER
+                                  - USER_ISOLATION
+                                  - LEGACY_TABLE_ACL
+                                  - LEGACY_PASSTHROUGH
+                                  - LEGACY_SINGLE_USER
+                                  nullable: true
+                                  type: string
                                 docker_image:
                                   nullable: true
                                   properties:
@@ -374,6 +385,10 @@ spec:
                                   description: 'The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the `spark_version` value. Allowed values include:  * `PHOTON`: Use the Photon runtime engine type. * `STANDARD`: Use the standard runtime engine type.  This field is optional.'
                                   nullable: true
                                   type: string
+                                single_user_name:
+                                  description: Single user name if data_security_mode is SINGLE_USER
+                                  nullable: true
+                                  type: string
                                 spark_conf:
                                   additionalProperties: true
                                   description: An arbitrary object where the object key is a configuration propery name and the value is a configuration property value.
@@ -410,6 +425,14 @@ spec:
                         description: An optional name for the job.
                         nullable: true
                         type: string
+                      queue:
+                        description: 'JobSettingsQueue : The queue settings of the job.'
+                        nullable: true
+                        properties:
+                          enabled:
+                            nullable: true
+                            type: boolean
+                        type: object
                       schedule:
                         nullable: true
                         properties:
@@ -716,6 +739,17 @@ spec:
                                   description: 'An object with key value pairs. The key length must be between 1 and 127 UTF-8 characters, inclusive. The value length must be less than or equal to 255 UTF-8 characters. For a list of all restrictions, see AWS Tag Restrictions: <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions>'
                                   nullable: true
                                   type: object
+                                data_security_mode:
+                                  description: Data security mode decides what data governance model to use when accessing data from a cluster.
+                                  enum:
+                                  - NONE
+                                  - SINGLE_USER
+                                  - USER_ISOLATION
+                                  - LEGACY_TABLE_ACL
+                                  - LEGACY_PASSTHROUGH
+                                  - LEGACY_SINGLE_USER
+                                  nullable: true
+                                  type: string
                                 docker_image:
                                   nullable: true
                                   properties:
@@ -828,6 +862,10 @@ spec:
                                   description: 'The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the `spark_version` value. Allowed values include:  * `PHOTON`: Use the Photon runtime engine type. * `STANDARD`: Use the standard runtime engine type.  This field is optional.'
                                   nullable: true
                                   type: string
+                                single_user_name:
+                                  description: Single user name if data_security_mode is SINGLE_USER
+                                  nullable: true
+                                  type: string
                                 spark_conf:
                                   additionalProperties: true
                                   description: An arbitrary object where the object key is a configuration propery name and the value is a configuration property value.
@@ -1275,4 +1313,6 @@ spec:
     served: true
     storage: true
     subresources: {}
+
+
 {{- end -}}
diff --git a/charts/databricks-kube-operator/values.yaml b/charts/databricks-kube-operator/values.yaml
@@ -9,5 +9,6 @@ image:
 podAnnotations: {}
 nodeSelector:
   kubernetes.io/os: linux
+  kubernetes.io/arch: amd64
 resources: {}
 affinity: {}
diff --git a/databricks-kube/Cargo.toml b/databricks-kube/Cargo.toml
@@ -5,7 +5,7 @@ path = "src/crdgen.rs"
 [package]
 name = "databricks_kube"
 default-run = "databricks_kube"
-version = "0.6.2"
+version = "0.7.0"
 edition = "2021"
 
 [dependencies]

diff --git a/databricks-kube/src/crds/databricks_job.rs b/databricks-kube/src/crds/databricks_job.rs
@@ -244,6 +244,7 @@ impl RemoteAPIResource<Job> for DatabricksJob {
                     git_source: job_settings.git_source,
                     format: job_settings.format.map(job_settings_to_create_format),
                     continuous: job_settings.continuous,
+                    queue: job_settings.queue,
                     ..JobsCreateRequest::default()
                 }
             ).await?;

diff --git a/databricks-kube/src/traits/remote_api_resource.rs b/databricks-kube/src/traits/remote_api_resource.rs
@@ -1,6 +1,6 @@
 use std::{fmt::Debug, hash::Hash, pin::Pin, sync::Arc, time::Duration};
 
-use crate::{context::Context, error::DatabricksKubeError};
+use crate::{context::Context, error::{DatabricksKubeError, OpenAPIError}};
 
 use assert_json_diff::assert_json_matches_no_panic;
 use futures::{Future, FutureExt, Stream, StreamExt, TryFutureExt, TryStreamExt};
@@ -55,6 +55,37 @@ where
 
     match latest_remote {
         Err(DatabricksKubeError::IDUnsetError) => {
+            log::info!(
+                "Resource {} {} does not have an ID set, creating...",
+                TCRDType::api_resource().kind,
+                resource.name_unchecked()
+            );
+
+            let created = resource
+                .remote_create(context.clone())
+                .next()
+                .await
+                .unwrap()?;
+
+            log::info!(
+                "Created {} {} in Databricks",
+                TCRDType::api_resource().kind,
+                resource.name_unchecked()
+            );
+
+            kube_api
+                .replace(&resource.name_unchecked(), &PostParams::default(), &created)
+                .await
+                .map_err(|e| DatabricksKubeError::ResourceUpdateError(e.to_string()))?;
+
+            log::info!(
+                "Updated {} {} in K8S",
+                TCRDType::api_resource().kind,
+                resource.name_unchecked()
+            );
+        },
+        // TODO: stricter assertion that doesn't rely on dbx error presentation
+        Err(DatabricksKubeError::APIError(OpenAPIError::ResponseError(re))) if re.status == 400 && re.content.contains("does not exist") => {
             log::info!(
                 "Resource {} {} is missing in Databricks, creating",
                 TCRDType::api_resource().kind,

diff --git a/databricks-rust-jobs/.openapi-generator/FILES b/databricks-rust-jobs/.openapi-generator/FILES
@@ -45,6 +45,7 @@ docs/Job.md
 docs/JobCluster.md
 docs/JobEmailNotifications.md
 docs/JobSettings.md
+docs/JobSettingsQueue.md
 docs/JobTask.md
 docs/JobTaskSettings.md
 docs/JobsCreate200Response.md
@@ -176,6 +177,7 @@ src/models/job.rs
 src/models/job_cluster.rs
 src/models/job_email_notifications.rs
 src/models/job_settings.rs
+src/models/job_settings_queue.rs
 src/models/job_task.rs
 src/models/job_task_settings.rs
 src/models/jobs_create_200_response.rs

diff --git a/databricks-rust-jobs/.openapi-generator/VERSION b/databricks-rust-jobs/.openapi-generator/VERSION
@@ -1 +1 @@
-7.0.1
+7.2.0
diff --git a/databricks-rust-jobs/README.md b/databricks-rust-jobs/README.md
@@ -87,6 +87,7 @@ Class | Method | HTTP request | Description
  - [JobCluster](docs/JobCluster.md)
  - [JobEmailNotifications](docs/JobEmailNotifications.md)
  - [JobSettings](docs/JobSettings.md)
+ - [JobSettingsQueue](docs/JobSettingsQueue.md)
  - [JobTask](docs/JobTask.md)
  - [JobTaskSettings](docs/JobTaskSettings.md)
  - [JobsCreate200Response](docs/JobsCreate200Response.md)

diff --git a/databricks-rust-jobs/docs/JobSettings.md b/databricks-rust-jobs/docs/JobSettings.md
@@ -6,6 +6,7 @@ Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
 **name** | Option<**String**> | An optional name for the job. | [optional][default to Untitled]
 **tags** | Option<[**::std::collections::HashMap<String, serde_json::Value>**](serde_json::Value.md)> | A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. | [optional][default to {}]
+**queue** | Option<[**crate::models::JobSettingsQueue**](JobSettings_queue.md)> |  | [optional]
 **tasks** | Option<[**Vec<crate::models::JobTaskSettings>**](JobTaskSettings.md)> | A list of task specifications to be executed by this job. | [optional]
 **job_clusters** | Option<[**Vec<crate::models::JobCluster>**](JobCluster.md)> | A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. | [optional]
 **email_notifications** | Option<[**crate::models::JobEmailNotifications**](JobEmailNotifications.md)> |  | [optional]

diff --git a/databricks-rust-jobs/docs/JobSettingsQueue.md b/databricks-rust-jobs/docs/JobSettingsQueue.md
@@ -0,0 +1,11 @@
+# JobSettingsQueue
+
+## Properties
+
+Name | Type | Description | Notes
+------------ | ------------- | ------------- | -------------
+**enabled** | Option<**bool**> |  | [optional]
+
+[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
+
+
diff --git a/databricks-rust-jobs/docs/JobsCreateRequest.md b/databricks-rust-jobs/docs/JobsCreateRequest.md
@@ -6,6 +6,7 @@ Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
 **name** | Option<**String**> | An optional name for the job. | [optional][default to Untitled]
 **tags** | Option<[**::std::collections::HashMap<String, serde_json::Value>**](serde_json::Value.md)> | A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. | [optional][default to {}]
+**queue** | Option<[**crate::models::JobSettingsQueue**](JobSettings_queue.md)> |  | [optional]
 **tasks** | Option<[**Vec<crate::models::JobTaskSettings>**](JobTaskSettings.md)> | A list of task specifications to be executed by this job. | [optional]
 **job_clusters** | Option<[**Vec<crate::models::JobCluster>**](JobCluster.md)> | A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. | [optional]
 **email_notifications** | Option<[**crate::models::JobEmailNotifications**](JobEmailNotifications.md)> |  | [optional]

diff --git a/databricks-rust-jobs/docs/NewCluster.md b/databricks-rust-jobs/docs/NewCluster.md
@@ -10,6 +10,8 @@ Name | Type | Description | Notes
 **spark_conf** | Option<[**::std::collections::HashMap<String, serde_json::Value>**](serde_json::Value.md)> | An arbitrary object where the object key is a configuration propery name and the value is a configuration property value. | [optional]
 **aws_attributes** | Option<[**crate::models::AwsAttributes**](AwsAttributes.md)> |  | [optional]
 **node_type_id** | Option<**String**> | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads A list of available node types can be retrieved by using the [List node types](https://docs.databricks.com/dev-tools/api/latest/clusters.html#list-node-types) API call. | [optional]
+**data_security_mode** | Option<**String**> | Data security mode decides what data governance model to use when accessing data from a cluster. | [optional]
+**single_user_name** | Option<**String**> | Single user name if data_security_mode is SINGLE_USER | [optional]
 **driver_node_type_id** | Option<**String**> | The node type of the Spark driver. This field is optional; if unset, the driver node type is set as the same value as `node_type_id` defined above. | [optional]
 **ssh_public_keys** | Option<**Vec<String>**> | SSH public key contents that are added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. | [optional]
 **custom_tags** | Option<[**::std::collections::HashMap<String, serde_json::Value>**](serde_json::Value.md)> | An object with key value pairs. The key length must be between 1 and 127 UTF-8 characters, inclusive. The value length must be less than or equal to 255 UTF-8 characters. For a list of all restrictions, see AWS Tag Restrictions: <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions> | [optional]

diff --git a/databricks-rust-jobs/docs/NewTaskCluster.md b/databricks-rust-jobs/docs/NewTaskCluster.md
@@ -10,6 +10,8 @@ Name | Type | Description | Notes
 **spark_conf** | Option<[**::std::collections::HashMap<String, serde_json::Value>**](serde_json::Value.md)> | An arbitrary object where the object key is a configuration propery name and the value is a configuration property value. | [optional]
 **aws_attributes** | Option<[**crate::models::AwsAttributes**](AwsAttributes.md)> |  | [optional]
 **node_type_id** | Option<**String**> | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads A list of available node types can be retrieved by using the [List node types](https://docs.databricks.com/dev-tools/api/latest/clusters.html#list-node-types) API call. | [optional]
+**data_security_mode** | Option<**String**> | Data security mode decides what data governance model to use when accessing data from a cluster. | [optional]
+**single_user_name** | Option<**String**> | Single user name if data_security_mode is SINGLE_USER | [optional]
 **driver_node_type_id** | Option<**String**> | The node type of the Spark driver. This field is optional; if unset, the driver node type is set as the same value as `node_type_id` defined above. | [optional]
 **ssh_public_keys** | Option<**Vec<String>**> | SSH public key contents that are added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. | [optional]
 **custom_tags** | Option<[**::std::collections::HashMap<String, serde_json::Value>**](serde_json::Value.md)> | An object with key value pairs. The key length must be between 1 and 127 UTF-8 characters, inclusive. The value length must be less than or equal to 255 UTF-8 characters. For a list of all restrictions, see AWS Tag Restrictions: <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions> | [optional]

diff --git a/databricks-rust-jobs/src/models/job_settings.rs b/databricks-rust-jobs/src/models/job_settings.rs
@@ -20,6 +20,8 @@ pub struct JobSettings {
     /// A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.
     #[serde(rename = "tags", skip_serializing_if = "Option::is_none")]
     pub tags: Option<::std::collections::HashMap<String, serde_json::Value>>,
+    #[serde(rename = "queue", skip_serializing_if = "Option::is_none")]
+    pub queue: Option<Box<crate::models::JobSettingsQueue>>,
     /// A list of task specifications to be executed by this job.
     #[serde(rename = "tasks", skip_serializing_if = "Option::is_none")]
     pub tasks: Option<Vec<crate::models::JobTaskSettings>>,
@@ -52,6 +54,7 @@ impl JobSettings {
         JobSettings {
             name: None,
             tags: None,
+            queue: None,
             tasks: None,
             job_clusters: None,
             email_notifications: None,