diff --git a/examples/README.md b/examples/README.md index 9890286175..511de953e1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -42,6 +42,9 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [hpc-gke.yaml](#hpc-gkeyaml--) ![core-badge] ![experimental-badge] * [ml-gke](#ml-gkeyaml--) ![core-badge] ![experimental-badge] * [storage-gke](#storage-gkeyaml--) ![core-badge] ![experimental-badge] + * [gke-managed-hyperdisk.yaml](#gke-managed-hyperdiskyaml--) ![core-badge] ![experimental-badge] + * [gke-managed-parallelstore.yaml](#gke-managed-parallelstoreyaml--) ![core-badge] ![experimental-badge] + * [gke-a3-ultragpu.yaml](#gke-a3-ultragpuyaml--) ![core-badge] ![experimental-badge] * [gke-a3-megagpu](#gke-a3-megagpuyaml--) ![core-badge] ![experimental-badge] * [gke-a3-highgpu](#gke-a3-highgpuyaml--) ![core-badge] ![experimental-badge] * [htc-slurm.yaml](#htc-slurmyaml-) ![community-badge] @@ -1141,6 +1144,12 @@ The blueprint contains the following: [gke-managed-parallelstore.yaml]: ../examples/gke-managed-parallelstore.yaml +### [gke-a3-ultragpu.yaml] ![core-badge] ![experimental-badge] + +Refer to [AI Hypercomputer Documentation](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#create-cluster) for instructions. + +[gke-a3-ultragpu.yaml]: ../examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml + ### [gke-a3-megagpu.yaml] ![core-badge] ![experimental-badge] This blueprint shows how to provision a GKE cluster with A3 Mega machines in the toolkit. diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index 1d209e732d..d03903bd88 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -97,6 +97,7 @@ deployment_groups: source: modules/scheduler/gke-cluster use: [gke-a3-ultra-net-0] settings: + release_channel: RAPID system_node_pool_machine_type: "e2-standard-16" system_node_pool_disk_size_gb: $(vars.system_node_pool_disk_size_gb) system_node_pool_taints: [] @@ -106,31 +107,13 @@ deployment_groups: master_authorized_networks: - cidr_block: $(vars.authorized_cidr) # Allows your machine to run the kubectl command. Required for multi network setup. display_name: "kubectl-access-network" - additional_networks: - $(concat( - [{ - network=gke-a3-ultra-net-1.network_name, - subnetwork=gke-a3-ultra-net-1.subnetwork_name, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - gke-a3-ultra-rdma-net.subnetwork_interfaces_gke - )) - # Cluster versions cannot be updated through the toolkit after creation - # Please manage cluster version from the Google Cloud Console directly - version_prefix: "1.31." - release_channel: RAPID maintenance_exclusions: - name: no-minor-or-node-upgrades-indefinite start_time: "2024-12-01T00:00:00Z" end_time: "2025-12-22T00:00:00Z" exclusion_scope: NO_MINOR_OR_NODE_UPGRADES + additional_networks: $(gke-a3-ultra-net-1.additional_networks_gke) + additional_networks_rdma: $(gke-a3-ultra-rdma-net.subnetwork_interfaces_gke) outputs: [instructions] - id: a3-ultragpu-pool @@ -152,22 +135,8 @@ deployment_groups: consume_reservation_type: SPECIFIC_RESERVATION specific_reservations: - name: $(vars.extended_reservation) - additional_networks: - $(concat( - [{ - network=gke-a3-ultra-net-1.network_name, - subnetwork=gke-a3-ultra-net-1.subnetwork_name, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - gke-a3-ultra-rdma-net.subnetwork_interfaces_gke - )) + additional_networks: $(gke-a3-ultra-net-1.additional_networks_gke) + additional_networks_rdma: $(gke-a3-ultra-rdma-net.subnetwork_interfaces_gke) outputs: [instructions] - id: workload-manager-install diff --git a/modules/compute/gke-node-pool/README.md b/modules/compute/gke-node-pool/README.md index 1f4f255920..48f01c56e0 100644 --- a/modules/compute/gke-node-pool/README.md +++ b/modules/compute/gke-node-pool/README.md @@ -314,6 +314,7 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GKE, if any. Providing additional networks adds additional node networks to the node pool |
list(object({| `[]` | no | +| [additional\_networks\_rdma](#input\_additional\_networks\_rdma) | Additional rdma network interface details for GKE, if any. Providing additional networks adds additional node networks to the node pool |
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | | [auto\_upgrade](#input\_auto\_upgrade) | Whether the nodes will be automatically upgraded. | `bool` | `false` | no | | [autoscaling\_total\_max\_nodes](#input\_autoscaling\_total\_max\_nodes) | Total maximum number of nodes in the NodePool. | `number` | `1000` | no | | [autoscaling\_total\_min\_nodes](#input\_autoscaling\_total\_min\_nodes) | Total minimum number of nodes in the NodePool. | `number` | `0` | no | diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 8662cc4427..f223241240 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -27,6 +27,13 @@ locals { } } +locals { + all_additional_networks = concat( + var.additional_networks, + var.additional_networks_rdma + ) +} + module "gpu" { source = "../../internal/gpu-definition" @@ -228,7 +235,7 @@ resource "google_container_node_pool" "node_pool" { network_config { dynamic "additional_node_network_configs" { - for_each = var.additional_networks + for_each = local.all_additional_networks content { network = additional_node_network_configs.value.network diff --git a/modules/compute/gke-node-pool/variables.tf b/modules/compute/gke-node-pool/variables.tf index b15fc3f3ef..5c1d789d05 100644 --- a/modules/compute/gke-node-pool/variables.tf +++ b/modules/compute/gke-node-pool/variables.tf @@ -345,6 +345,32 @@ variable "additional_networks" { nullable = false } +variable "additional_networks_rdma" { + description = "Additional rdma network interface details for GKE, if any. Providing additional networks adds additional node networks to the node pool" + default = [] + type = list(object({ + network = string + subnetwork = string + subnetwork_project = string + network_ip = string + nic_type = string + stack_type = string + queue_count = number + access_config = list(object({ + nat_ip = string + network_tier = string + })) + ipv6_access_config = list(object({ + network_tier = string + })) + alias_ip_range = list(object({ + ip_cidr_range = string + subnetwork_range_name = string + })) + })) + nullable = false +} + variable "reservation_affinity" { description = <<-EOT Reservation resource to consume. When targeting SPECIFIC_RESERVATION, specific_reservations needs be specified. diff --git a/modules/network/vpc/README.md b/modules/network/vpc/README.md index d2275d19be..2ae7f81cb5 100644 --- a/modules/network/vpc/README.md +++ b/modules/network/vpc/README.md @@ -224,6 +224,7 @@ limitations under the License. | Name | Description | |------|-------------| +| [additional\_networks\_gke](#output\_additional\_networks\_gke) | Full list of subnetwork objects belonging to the new VPC network (compatible with gke-node-pool) | | [nat\_ips](#output\_nat\_ips) | External IPs of the Cloud NAT from which outbound internet traffic will arrive (empty list if no NAT is used) | | [network\_id](#output\_network\_id) | ID of the new VPC network | | [network\_name](#output\_network\_name) | Name of the new VPC network | diff --git a/modules/network/vpc/main.tf b/modules/network/vpc/main.tf index 2662757043..268ff7e563 100644 --- a/modules/network/vpc/main.tf +++ b/modules/network/vpc/main.tf @@ -167,6 +167,21 @@ locals { for secondary_range in var.secondary_ranges_list : secondary_range.subnetwork_name => secondary_range.ranges } + + output_additional_networks_gke = [ + { + network = local.network_name + subnetwork = local.output_primary_subnetwork_name + subnetwork_project = var.project_id + network_ip = null + nic_type = "GVNIC" + stack_type = null + queue_count = null + access_config = [] + ipv6_access_config = [] + alias_ip_range = [] + } + ] } module "vpc" { diff --git a/modules/network/vpc/outputs.tf b/modules/network/vpc/outputs.tf index c2ee6bdf6b..db8f05975f 100644 --- a/modules/network/vpc/outputs.tf +++ b/modules/network/vpc/outputs.tf @@ -66,3 +66,9 @@ output "nat_ips" { description = "External IPs of the Cloud NAT from which outbound internet traffic will arrive (empty list if no NAT is used)" value = flatten([for ipmod in module.nat_ip_addresses : ipmod.addresses]) } + +output "additional_networks_gke" { + description = "Full list of subnetwork objects belonging to the new VPC network (compatible with gke-node-pool)" + value = local.output_additional_networks_gke + depends_on = [module.vpc, module.cloud_router] +} diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index 84dfed0d7e..8a26daa8b9 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -140,6 +140,7 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GKE, if any. Providing additional networks enables multi networking and creates relevat network objects on the cluster. |
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | +| [additional\_networks\_rdma](#input\_additional\_networks\_rdma) | Additional rdma network interface details for GKE, if any. Providing additional networks enables multi networking and creates relevat network objects on the cluster. |
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | | [authenticator\_security\_group](#input\_authenticator\_security\_group) | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | | [autoscaling\_profile](#input\_autoscaling\_profile) | (Beta) Optimize for utilization or availability when deciding to remove nodes. Can be BALANCED or OPTIMIZE\_UTILIZATION. | `string` | `"OPTIMIZE_UTILIZATION"` | no | | [cluster\_availability\_type](#input\_cluster\_availability\_type) | Type of cluster availability. Possible values are: {REGIONAL, ZONAL} | `string` | `"REGIONAL"` | no | diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 21dfc1c40e..52b9ce98e3 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -27,6 +27,13 @@ locals { } } +locals { + all_additional_networks = concat( + var.additional_networks, + var.additional_networks_rdma + ) +} + locals { dash = var.prefix_with_deployment_name && var.name_suffix != "" ? "-" : "" prefix = var.prefix_with_deployment_name ? var.deployment_name : "" @@ -41,7 +48,7 @@ locals { sa_email = coalesce(var.service_account_email, local.default_sa_email) # additional VPCs enable multi networking - derived_enable_multi_networking = coalesce(var.enable_multi_networking, length(var.additional_networks) > 0) + derived_enable_multi_networking = coalesce(var.enable_multi_networking, length(local.all_additional_networks) > 0) # multi networking needs enabled Dataplane v2 derived_enable_dataplane_v2 = coalesce(var.enable_dataplane_v2, local.derived_enable_multi_networking) @@ -234,7 +241,7 @@ resource "google_container_cluster" "gke_cluster" { error_message = "'enable_dataplane_v2' cannot be false when enabling multi networking." } precondition { - condition = coalesce(var.enable_multi_networking, true) || length(var.additional_networks) == 0 + condition = coalesce(var.enable_multi_networking, true) || length(local.all_additional_networks) == 0 error_message = "'enable_multi_networking' cannot be false when using multivpc module, which passes additional_networks." } } @@ -383,7 +390,7 @@ module "kubectl_apply" { gke_cluster_exists = true apply_manifests = flatten([ - for idx, network_info in var.additional_networks : [ + for idx, network_info in local.all_additional_networks : [ { source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", template_vars = { diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index a181c58239..e1e087bb4d 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -370,6 +370,31 @@ variable "additional_networks" { })) } +variable "additional_networks_rdma" { + description = "Additional rdma network interface details for GKE, if any. Providing additional networks enables multi networking and creates relevat network objects on the cluster." + default = [] + type = list(object({ + network = string + subnetwork = string + subnetwork_project = string + network_ip = string + nic_type = string + stack_type = string + queue_count = number + access_config = list(object({ + nat_ip = string + network_tier = string + })) + ipv6_access_config = list(object({ + network_tier = string + })) + alias_ip_range = list(object({ + ip_cidr_range = string + subnetwork_range_name = string + })) + })) +} + variable "cluster_reference_type" { description = "How the google_container_node_pool.system_node_pools refers to the cluster. Possible values are: {SELF_LINK, NAME}" default = "SELF_LINK"
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))