Website: https://www.cast.ai
- Terraform 0.13+
A module to connect an EKS cluster to CAST AI.
Requires castai/castai
and hashicorp/aws
providers to be configured.
module "castai-eks-cluster" {
source = "castai/eks-cluster/castai"
aws_account_id = var.aws_account_id
aws_cluster_region = var.cluster_region
aws_cluster_name = var.cluster_id
aws_assume_role_arn = module.castai-eks-role-iam.role_arn
autoscaler_policies_json = var.autoscaler_policies_json
// Default node configuration will be used for all CAST provisioned nodes unless specific configuration is requested.
default_node_configuration = module.cast-eks-cluster.castai_node_configurations["default"]
node_configurations = {
default = {
subnets = module.vpc.private_subnets
dns_cluster_ip = "10.100.0.10"
instance_profile_role_arn = var.instance_profile_arn
ssh_public_key = var.ssh_public_key
security_groups = [
module.eks.node_security_group_id,
]
tags = {
"team" : "core"
}
init_script = base64encode(var.init_script)
docker_config = jsonencode({
"insecure-registries" = ["registry.com:5000"],
"max-concurrent-downloads" = 10
})
kubelet_config = jsonencode({
"registryBurst" : 20,
"registryPullQPS" : 10
})
container_runtime = "dockerd"
}
}
node_templates = {
spot_tmpl = {
configuration_id = module.cast-eks-cluster.castai_node_configurations["default"]
should_taint = true
custom_labels = {
custom-label-key-1 = "custom-label-value-1"
custom-label-key-2 = "custom-label-value-2"
}
custom_taints = [
{
key = "custom-taint-key-1"
value = "custom-taint-value-1"
},
{
key = "custom-taint-key-2"
value = "custom-taint-value-2"
}
]
constraints = {
fallback_restore_rate_seconds = 1800
spot = true
use_spot_fallbacks = true
min_cpu = 4
max_cpu = 100
instance_families = {
exclude = ["m5"]
}
compute_optimized_state = "disabled"
storage_optimized_state = "disabled"
is_gpu_only = false
architectures = ["amd64"]
}
}
}
autoscaler_settings = {
enabled = true
node_templates_partial_matching_enabled = false
unschedulable_pods = {
enabled = true
headroom = {
enabled = true
cpu_percentage = 10
memory_percentage = 10
}
headroom_spot = {
enabled = true
cpu_percentage = 10
memory_percentage = 10
}
}
node_downscaler = {
enabled = true
empty_nodes = {
enabled = true
}
evictor = {
aggressive_mode = false
cycle_interval = "5s10s"
dry_run = false
enabled = true
node_grace_period_minutes = 10
scoped_mode = false
}
}
cluster_limits = {
enabled = true
cpu = {
max_cores = 20
min_cores = 1
}
}
}
}
Existing configuration:
module "castai-eks-cluster" {
// ...
subnets = module.vpc.private_subnets
dns_cluster_ip = "10.100.0.10"
instance_profile_role_arn = var.instance_profile_arn
ssh_public_key = var.ssh_public_key
override_security_groups = [
module.eks.node_security_group_id,
]
tags = {
"team" : "core"
}
}
New configuration:
module "castai-eks-cluster" {
// ...
// Default node configuration will be used for all CAST provisioned nodes unless specific configuration is requested.
default_node_configuration = module.cast-eks-cluster.castai_node_configurations["default"]
node_configurations = {
default = {
subnets = module.vpc.private_subnets
dns_cluster_ip = "10.100.0.10"
instance_profile_role_arn = var.instance_profile_arn
ssh_public_key = var.ssh_public_key
security_groups = [
module.eks.node_security_group_id,
]
tags = {
"team" : "core"
}
}
}
}
Existing configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
// ...
}
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"spotInstances": {
"enabled": true,
"clouds": ["aws"],
"spotBackups": {
"enabled": true
},
"spotDiversityEnabled": false,
"spotDiversityPriceIncreaseLimitPercent": 20,
"spotInterruptionPredictions": {
"enabled": true,
"type": "AWSRebalanceRecommendations"
}
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": true,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
}
}
EOT
}
New configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
default_by_castai = {
name = "default-by-castai"
configuration_id = module.castai-eks-cluster.castai_node_configurations["default"]
is_default = true
should_taint = false
constraints = {
on_demand = true
spot = true
use_spot_fallbacks = true
enable_spot_diversity = false
spot_diversity_price_increase_limit_percent = 20
spot_interruption_predictions_enabled = true
spot_interruption_predictions_type = "aws-rebalance-recommendations"
}
}
}
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": true,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
}
}
EOT
}
Version 7.x.x changes:
- Removed
custom_label
attribute incastai_node_template
resource. Usecustom_labels
instead.
Old configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
spot_tmpl = {
custom_label = {
key = "custom-label-key-1"
value = "custom-label-value-1"
}
}
}
}
New configuration:
module "castai-eks-cluster" {
// ...
node_templates = {
spot_tmpl = {
custom_labels = {
custom-label-key-1 = "custom-label-value-1"
}
}
}
}
Version 8.x.x changed:
- Removed
compute_optimized
andstorage_optimized
attributes incastai_node_template
resource,constraints
object. Usecompute_optimized_state
andstorage_optimized_state
instead.
Old configuration:
module "castai-eks-cluster" {
node_templates = {
spot_tmpl = {
constraints = {
compute_optimized = false
storage_optimized = true
}
}
}
}
New configuration:
module "castai-eks-cluster" {
node_templates = {
spot_tmpl = {
constraints = {
compute_optimized_state = "disabled"
storage_optimized_state = "enabled"
}
}
}
}
Version 9.3.x changed:
- Deprecated
autoscaler_policies_json
attribute. Useautoscaler_settings
instead.
Old configuration:
module "castai-eks-cluster" {
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": false,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
},
"nodeTemplatesPartialMatchingEnabled": false,
"clusterLimits": {
"cpu": {
"maxCores": 20,
"minCores": 1
},
"enabled": true
}
}
EOT
}
New configuration:
module "castai-eks-cluster" {
autoscaler_settings = {
enabled = true
node_templates_partial_matching_enabled = false
unschedulable_pods = {
enabled = true
}
node_downscaler = {
enabled = true
empty_nodes = {
enabled = true
}
evictor = {
aggressive_mode = false
cycle_interval = "5m10s"
dry_run = false
enabled = true
node_grace_period_minutes = 10
scoped_mode = false
}
}
cluster_limits = {
enabled = true
cpu = {
max_cores = 20
min_cores = 1
}
}
}
}
Usage examples are located in terraform provider repo
terraform-docs markdown table . --output-file README.md
Name | Version |
---|---|
terraform | >= 0.13 |
aws | >= 2.49 |
castai | ~> 7.4.0 |
helm | >= 2.0.0 |
Name | Version |
---|---|
castai | 7.4.0 |
helm | 2.13.2 |
null | 3.2.2 |
No modules.
Name | Type |
---|---|
castai_autoscaler.castai_autoscaler_policies | resource |
castai_eks_cluster.my_castai_cluster | resource |
castai_node_configuration.this | resource |
castai_node_configuration_default.this | resource |
castai_node_template.this | resource |
helm_release.castai_agent | resource |
helm_release.castai_cluster_controller | resource |
helm_release.castai_cluster_controller_self_managed | resource |
helm_release.castai_egressd | resource |
helm_release.castai_egressd_self_managed | resource |
helm_release.castai_evictor | resource |
helm_release.castai_evictor_ext | resource |
helm_release.castai_evictor_self_managed | resource |
helm_release.castai_kvisor | resource |
helm_release.castai_kvisor_self_managed | resource |
helm_release.castai_pod_pinner | resource |
helm_release.castai_pod_pinner_self_managed | resource |
helm_release.castai_spot_handler | resource |
helm_release.castai_workload_autoscaler | resource |
helm_release.castai_workload_autoscaler_self_managed | resource |
null_resource.wait_for_cluster | resource |
Name | Description | Type | Default | Required |
---|---|---|---|---|
agent_aws_access_key_id | AWS access key for CAST AI agent to fetch instance details. | string |
"" |
no |
agent_aws_iam_service_account_role_arn | Arn of the role to be used by CAST AI agent to fetch instance details. Only readonly AmazonEC2ReadOnlyAccess is needed. | string |
"" |
no |
agent_aws_secret_access_key | AWS access key secret for CAST AI agent to fetch instance details. | string |
"" |
no |
agent_values | List of YAML formatted string with agent values | list(string) |
[] |
no |
agent_version | Version of castai-agent helm chart. Default latest | string |
null |
no |
api_grpc_addr | CAST AI GRPC API address | string |
"api-grpc.cast.ai:443" |
no |
api_url | URL of alternative CAST AI API to be used during development or testing | string |
"https://api.cast.ai" |
no |
autoscaler_policies_json | Optional json object to override CAST AI cluster autoscaler policies. Deprecated, use autoscaler_settings instead. |
string |
null |
no |
autoscaler_policy_overrides | Optional Autoscaler policy definitions to override current autoscaler settings | any |
null |
no |
aws_account_id | ID of AWS account the cluster is located in. | string |
n/a | yes |
aws_assume_role_arn | Arn of the role to be used by CAST AI for IAM access | string |
null |
no |
aws_cluster_name | Name of the cluster to be connected to CAST AI. | string |
n/a | yes |
aws_cluster_region | Region of the cluster to be connected to CAST AI. | string |
n/a | yes |
castai_api_token | Optional CAST AI API token created in console.cast.ai API Access keys section. Used only when wait_for_cluster_ready is set to true |
string |
"" |
no |
castai_components_labels | Optional additional Kubernetes labels for CAST AI pods | map(any) |
{} |
no |
cluster_controller_values | List of YAML formatted string with cluster-controller values | list(string) |
[] |
no |
cluster_controller_version | Version of castai-cluster-controller helm chart. Default latest | string |
null |
no |
default_node_configuration | ID of the default node configuration | string |
n/a | yes |
delete_nodes_on_disconnect | Optionally delete Cast AI created nodes when the cluster is destroyed | bool |
false |
no |
egressd_values | List of YAML formatted string with egressd values | list(string) |
[] |
no |
egressd_version | Version of castai-egressd helm chart. Default latest | string |
null |
no |
evictor_ext_values | List of YAML formatted string with evictor-ext values | list(string) |
[] |
no |
evictor_ext_version | Version of castai-evictor-ext chart. Default latest | string |
null |
no |
evictor_values | List of YAML formatted string with evictor values | list(string) |
[] |
no |
evictor_version | Version of castai-evictor chart. Default latest | string |
null |
no |
grpc_url | gRPC endpoint used by pod-pinner | string |
"grpc.cast.ai:443" |
no |
install_egressd | Optional flag for installation of Egressd (Network cost monitoring) (https://docs.cast.ai/docs/network-cost) | bool |
false |
no |
install_security_agent | Optional flag for installation of security agent (https://docs.cast.ai/product-overview/console/security-insights/) | bool |
false |
no |
install_workload_autoscaler | Optional flag for installation of workload autoscaler (https://docs.cast.ai/docs/workload-autoscaling-configuration) | bool |
false |
no |
kvisor_values | List of YAML formatted string with kvisor values | list(string) |
[] |
no |
kvisor_version | Version of kvisor chart. Default latest | string |
null |
no |
kvisor_controller_extra_args | Map of extra arguments for the kvisor controller | map(string) |
{ kube-linter-enabled = true image-scan-enabled = true kube-bench-enabled = true kube-bench-cloud-provider = eks } |
no |
node_configurations | Map of EKS node configurations to create | any |
{} |
no |
node_templates | Map of node templates to create | any |
{} |
no |
pod_pinner_version | Version of pod-pinner helm chart. Default latest | string |
null |
no |
self_managed | Whether CAST AI components' upgrades are managed by a customer; by default upgrades are managed CAST AI central system. | bool |
false |
no |
spot_handler_values | List of YAML formatted string with spot-handler values | list(string) |
[] |
no |
spot_handler_version | Version of castai-spot-handler helm chart. Default latest | string |
null |
no |
wait_for_cluster_ready | Wait for cluster to be ready before finishing the module execution, this option requires castai_api_token to be set |
bool |
false |
no |
workload_autoscaler_values | List of YAML formatted string with cluster-workload-autoscaler values | list(string) |
[] |
no |
workload_autoscaler_version | Version of castai-workload-autoscaler helm chart. Default latest | string |
null |
no |
Name | Description |
---|---|
castai_node_configurations | Map of node configurations ids by name |
castai_node_templates | Map of node template by name |
cluster_id | CAST AI cluster id, which can be used for accessing cluster data using API |