From f4e3faa846fefda180718688761ac9e06a49f754 Mon Sep 17 00:00:00 2001 From: Radu Gheorghe Date: Wed, 4 May 2022 14:51:39 +0300 Subject: [PATCH] first pass at disk-based scale-up and down --- docs/zalando.org_elasticsearchdatasets.yaml | 8 ++++ ...ndo.org_elasticsearchdatasets_v1beta1.yaml | 8 ++++ operator/autoscaler.go | 41 +++++++++++++++++++ pkg/apis/zalando.org/v1/types.go | 6 +++ 4 files changed, 63 insertions(+) diff --git a/docs/zalando.org_elasticsearchdatasets.yaml b/docs/zalando.org_elasticsearchdatasets.yaml index 86e8aa82..b6dc390d 100644 --- a/docs/zalando.org_elasticsearchdatasets.yaml +++ b/docs/zalando.org_elasticsearchdatasets.yaml @@ -102,6 +102,10 @@ spec: format: int64 minimum: 0 type: integer + scaleDownDiskUsagePercentBoundary: + format: int32 + minimum: 0 + type: integer scaleDownThresholdDurationSeconds: format: int64 minimum: 0 @@ -114,6 +118,10 @@ spec: format: int64 minimum: 0 type: integer + scaleUpDiskUsagePercentBoundary: + format: int32 + minimum: 0 + type: integer scaleUpThresholdDurationSeconds: format: int64 minimum: 0 diff --git a/docs/zalando.org_elasticsearchdatasets_v1beta1.yaml b/docs/zalando.org_elasticsearchdatasets_v1beta1.yaml index ab73c012..071c9a90 100644 --- a/docs/zalando.org_elasticsearchdatasets_v1beta1.yaml +++ b/docs/zalando.org_elasticsearchdatasets_v1beta1.yaml @@ -103,6 +103,10 @@ spec: format: int64 minimum: 0 type: integer + scaleDownDiskUsagePercentBoundary: + format: int32 + minimum: 0 + type: integer scaleDownThresholdDurationSeconds: format: int64 minimum: 0 @@ -115,6 +119,10 @@ spec: format: int64 minimum: 0 type: integer + scaleUpDiskUsagePercentBoundary: + format: int32 + minimum: 0 + type: integer scaleUpThresholdDurationSeconds: format: int64 minimum: 0 diff --git a/operator/autoscaler.go b/operator/autoscaler.go index 5b473140..7a854cdf 100644 --- a/operator/autoscaler.go +++ b/operator/autoscaler.go @@ -126,6 +126,23 @@ func (as *AutoScaler) scalingHint() ScalingDirection { as.logger.Info("Not scaling up, currently in cool-down period.") } } + + // check if disk usage is below threshold + // TODO(Radu) is there a way to avoid calling _cat/nodes all the time? maybe just once per loop? + // or just store it somewhere with a timeout? + esNodes, err := as.esClient.GetNodes() + if err != nil { + as.logger.Errorf("Can't get ES Nodes: %v", err) + } else { + maxDiskUsagePercent := as.getMaxDiskUsage(esNodes) + scaleDownDiskUsagePercentBoundary := float64(scaling.ScaleDownDiskUsagePercentBoundary) + if maxDiskUsagePercent < scaleDownDiskUsagePercentBoundary { + as.logger.Debugf("Scaling hint DOWN because max disk usage %.2f is lower than the threshold %.2f", + maxDiskUsagePercent, scaleDownDiskUsagePercentBoundary) + return DOWN + } + } + return NONE } @@ -268,6 +285,30 @@ func (as *AutoScaler) scaleUpOrDown(esIndices map[string]ESIndex, scalingHint Sc } } + // independent of scaling direction: if we run out of disk, we try to scale up + esNodes, err := as.esClient.GetNodes() + if err != nil { + as.logger.Errorf("Can't get ES Nodes: %v", err) + } else { + maxDiskUsagePercent := as.getMaxDiskUsage(esNodes) + scaleUpDiskUsagePercentBoundary := float64(scalingSpec.ScaleUpDiskUsagePercentBoundary) + if maxDiskUsagePercent > scaleUpDiskUsagePercentBoundary { + // TODO(Radu) compute how many nodes we need instead of increasing by one? + newDesiredNodeReplicas := as.ensureBoundsNodeReplicas(currentDesiredNodeReplicas + 1) + + scalingMessage := fmt.Sprintf("Scaling up to %d nodes because %.2f is higher than %.2f", + newDesiredNodeReplicas, maxDiskUsagePercent, scaleUpDiskUsagePercentBoundary) + + as.logger.Debug(scalingMessage) + + return &ScalingOperation{ + ScalingDirection: UP, + NodeReplicas: &newDesiredNodeReplicas, + Description: scalingMessage, + } + } + } + // independent of the scaling direction: in case there are indices with < MinIndexReplicas, we try to scale these indices. if len(newDesiredIndexReplicas) > 0 { return &ScalingOperation{ diff --git a/pkg/apis/zalando.org/v1/types.go b/pkg/apis/zalando.org/v1/types.go index 1aa50315..d16dccf7 100644 --- a/pkg/apis/zalando.org/v1/types.go +++ b/pkg/apis/zalando.org/v1/types.go @@ -155,6 +155,9 @@ type ElasticsearchDataSetScaling struct { ScaleUpCPUBoundary int32 `json:"scaleUpCPUBoundary"` // +kubebuilder:validation:Minimum=0 // +optional + ScaleUpDiskUsagePercentBoundary int32 `json:"scaleUpDiskUsagePercentBoundary"` + // +kubebuilder:validation:Minimum=0 + // +optional ScaleUpThresholdDurationSeconds int64 `json:"scaleUpThresholdDurationSeconds"` // +kubebuilder:validation:Minimum=0 // +optional @@ -164,6 +167,9 @@ type ElasticsearchDataSetScaling struct { ScaleDownCPUBoundary int32 `json:"scaleDownCPUBoundary"` // +kubebuilder:validation:Minimum=0 // +optional + ScaleDownDiskUsagePercentBoundary int32 `json:"scaleDownDiskUsagePercentBoundary"` + // +kubebuilder:validation:Minimum=0 + // +optional ScaleDownThresholdDurationSeconds int64 `json:"scaleDownThresholdDurationSeconds"` // +kubebuilder:validation:Minimum=0 // +optional