From 4b02eec604410918b6cc0302e5fcb0711890e5a4 Mon Sep 17 00:00:00 2001 From: Garvit-77 Date: Mon, 13 Jan 2025 19:42:48 +0530 Subject: [PATCH 1/7] resolves kubeflow/training/#2279 Signed-off-by: Garvit-77 --- .../training/user-guides/managedby.md | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 content/en/docs/components/training/user-guides/managedby.md diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md new file mode 100644 index 0000000000..0000bf018e --- /dev/null +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -0,0 +1,65 @@ ++++ +title = "spec.managedBy" +Desciption = "Using managedBy feild for MultiKueue" +weight = 60 ++++ + +## Overview + +The `spec.managedBy` field is a new feature introduced for MultiQueue support in the Kubeflow Training Operator. This field allows for more robust management of multi-cluster job dispatching by specifying the managing entity. + +## Prerequisites + +1. Ensure that you have the latest version of the Kubeflow Training Operator installed. +2. Make sure Kueue is compiled against the new operator to leverage the `spec.managedBy` field. + +## Usage + +To use the `spec.managedBy` field in your training jobs, include it in the job specification as shown below: + +```yaml +apiVersion: "kubeflow.org/v1" +kind: "TFJob" +metadata: + name: "example-tfjob" +spec: + managedBy: "kueue" + tfReplicaSpecs: + ... +``` + +Example + +Here is a complete example of a TensorFlow job using the spec.managedBy field: + +```YAML +apiVersion: "kubeflow.org/v1" +kind: "TFJob" +metadata: + name: "example-tfjob" +spec: + managedBy: "kueue" + tfReplicaSpecs: + Chief: + replicas: 1 + template: + spec: + containers: + - name: tensorflow + image: tensorflow/tensorflow:latest + args: ["python", "model.py"] + Worker: + replicas: 2 + template: + spec: + containers: + - name: tensorflow + image: tensorflow/tensorflow:latest + args: ["python", "model.py"] +``` + +## Cross-References + +For more details on setting up and using MultiQueue with the Kubeflow Training Operator, refer to the following documentation pages: + +- [Kueue/Kubeflow](https://kueue.sigs.k8s.io/docs/tasks/run/multikueue/kubeflow/) \ No newline at end of file From fe517b1e4332a2dc1edd9cc0390649665c78a1e0 Mon Sep 17 00:00:00 2001 From: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> Date: Fri, 24 Jan 2025 01:39:42 +0530 Subject: [PATCH 2/7] Update content/en/docs/components/training/user-guides/managedby.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Woźniak Signed-off-by: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> --- content/en/docs/components/training/user-guides/managedby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md index 0000bf018e..0c9d79404a 100644 --- a/content/en/docs/components/training/user-guides/managedby.md +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -1,6 +1,6 @@ +++ title = "spec.managedBy" -Desciption = "Using managedBy feild for MultiKueue" +Desciption = "Using managedBy field for MultiKueue" weight = 60 +++ From af5ac9e6561a89b7b0476a2b95daed569bf71591 Mon Sep 17 00:00:00 2001 From: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> Date: Fri, 24 Jan 2025 01:39:56 +0530 Subject: [PATCH 3/7] Update content/en/docs/components/training/user-guides/managedby.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Woźniak Signed-off-by: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> --- content/en/docs/components/training/user-guides/managedby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md index 0c9d79404a..925659886d 100644 --- a/content/en/docs/components/training/user-guides/managedby.md +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -6,7 +6,7 @@ weight = 60 ## Overview -The `spec.managedBy` field is a new feature introduced for MultiQueue support in the Kubeflow Training Operator. This field allows for more robust management of multi-cluster job dispatching by specifying the managing entity. +The `spec.managedBy` field is a new feature introduced in the Kubeflow Training Operator to support a more robust multi-cluster job dispatching by [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/). ## Prerequisites From 2c2cd7dbd226312da27776275ec6d18bbd11cbc7 Mon Sep 17 00:00:00 2001 From: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> Date: Fri, 24 Jan 2025 01:40:05 +0530 Subject: [PATCH 4/7] Update content/en/docs/components/training/user-guides/managedby.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Woźniak Signed-off-by: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> --- content/en/docs/components/training/user-guides/managedby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md index 925659886d..048dae80e8 100644 --- a/content/en/docs/components/training/user-guides/managedby.md +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -60,6 +60,6 @@ spec: ## Cross-References -For more details on setting up and using MultiQueue with the Kubeflow Training Operator, refer to the following documentation pages: +For more details on setting up and using MultiKueue with the Kubeflow Training Operator, refer to the following documentation pages: - [Kueue/Kubeflow](https://kueue.sigs.k8s.io/docs/tasks/run/multikueue/kubeflow/) \ No newline at end of file From cbac0ed810a458dc4150a4a405106edc173c2ec4 Mon Sep 17 00:00:00 2001 From: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> Date: Fri, 24 Jan 2025 01:40:16 +0530 Subject: [PATCH 5/7] Update content/en/docs/components/training/user-guides/managedby.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Woźniak Signed-off-by: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> --- content/en/docs/components/training/user-guides/managedby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md index 048dae80e8..5f632289ec 100644 --- a/content/en/docs/components/training/user-guides/managedby.md +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -58,7 +58,7 @@ spec: args: ["python", "model.py"] ``` -## Cross-References +## What's next? For more details on setting up and using MultiKueue with the Kubeflow Training Operator, refer to the following documentation pages: From 713beb764ef2c3138e832a0bd5f7273e60bed009 Mon Sep 17 00:00:00 2001 From: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> Date: Fri, 24 Jan 2025 01:40:24 +0530 Subject: [PATCH 6/7] Update content/en/docs/components/training/user-guides/managedby.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Woźniak Signed-off-by: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> --- content/en/docs/components/training/user-guides/managedby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md index 5f632289ec..a3ab3dff5d 100644 --- a/content/en/docs/components/training/user-guides/managedby.md +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -23,7 +23,7 @@ kind: "TFJob" metadata: name: "example-tfjob" spec: - managedBy: "kueue" + managedBy: "kueue.x-k8s.io/multikueue" tfReplicaSpecs: ... ``` From 0df155e35a21ce880f0396edc59b6286205abddf Mon Sep 17 00:00:00 2001 From: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> Date: Fri, 24 Jan 2025 22:15:37 +0530 Subject: [PATCH 7/7] Update content/en/docs/components/training/user-guides/managedby.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Woźniak Signed-off-by: Garvit Khandelwal <70192868+Garvit-77@users.noreply.github.com> --- content/en/docs/components/training/user-guides/managedby.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/docs/components/training/user-guides/managedby.md b/content/en/docs/components/training/user-guides/managedby.md index a3ab3dff5d..e8bdb7261f 100644 --- a/content/en/docs/components/training/user-guides/managedby.md +++ b/content/en/docs/components/training/user-guides/managedby.md @@ -38,7 +38,7 @@ kind: "TFJob" metadata: name: "example-tfjob" spec: - managedBy: "kueue" + managedBy: "kueue.x-k8s.io/multikueue" tfReplicaSpecs: Chief: replicas: 1