Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

first pass at disk-based scale-up and down #259

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/zalando.org_elasticsearchdatasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ spec:
format: int64
minimum: 0
type: integer
scaleDownDiskUsagePercentBoundary:
format: int32
minimum: 0
type: integer
scaleDownThresholdDurationSeconds:
format: int64
minimum: 0
Expand All @@ -114,6 +118,10 @@ spec:
format: int64
minimum: 0
type: integer
scaleUpDiskUsagePercentBoundary:
format: int32
minimum: 0
type: integer
scaleUpThresholdDurationSeconds:
format: int64
minimum: 0
Expand Down
8 changes: 8 additions & 0 deletions docs/zalando.org_elasticsearchdatasets_v1beta1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ spec:
format: int64
minimum: 0
type: integer
scaleDownDiskUsagePercentBoundary:
format: int32
minimum: 0
type: integer
scaleDownThresholdDurationSeconds:
format: int64
minimum: 0
Expand All @@ -115,6 +119,10 @@ spec:
format: int64
minimum: 0
type: integer
scaleUpDiskUsagePercentBoundary:
format: int32
minimum: 0
type: integer
scaleUpThresholdDurationSeconds:
format: int64
minimum: 0
Expand Down
41 changes: 41 additions & 0 deletions operator/autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,23 @@ func (as *AutoScaler) scalingHint() ScalingDirection {
as.logger.Info("Not scaling up, currently in cool-down period.")
}
}

// check if disk usage is below threshold
// TODO(Radu) is there a way to avoid calling _cat/nodes all the time? maybe just once per loop?
// or just store it somewhere with a timeout?
esNodes, err := as.esClient.GetNodes()
if err != nil {
as.logger.Errorf("Can't get ES Nodes: %v", err)
} else {
maxDiskUsagePercent := as.getMaxDiskUsage(esNodes)
scaleDownDiskUsagePercentBoundary := float64(scaling.ScaleDownDiskUsagePercentBoundary)
if maxDiskUsagePercent < scaleDownDiskUsagePercentBoundary {
as.logger.Debugf("Scaling hint DOWN because max disk usage %.2f is lower than the threshold %.2f",
maxDiskUsagePercent, scaleDownDiskUsagePercentBoundary)
return DOWN
}
}

return NONE
}

Expand Down Expand Up @@ -268,6 +285,30 @@ func (as *AutoScaler) scaleUpOrDown(esIndices map[string]ESIndex, scalingHint Sc
}
}

// independent of scaling direction: if we run out of disk, we try to scale up
esNodes, err := as.esClient.GetNodes()
if err != nil {
as.logger.Errorf("Can't get ES Nodes: %v", err)
} else {
maxDiskUsagePercent := as.getMaxDiskUsage(esNodes)
scaleUpDiskUsagePercentBoundary := float64(scalingSpec.ScaleUpDiskUsagePercentBoundary)
if maxDiskUsagePercent > scaleUpDiskUsagePercentBoundary {
// TODO(Radu) compute how many nodes we need instead of increasing by one?
newDesiredNodeReplicas := as.ensureBoundsNodeReplicas(currentDesiredNodeReplicas + 1)

scalingMessage := fmt.Sprintf("Scaling up to %d nodes because %.2f is higher than %.2f",
newDesiredNodeReplicas, maxDiskUsagePercent, scaleUpDiskUsagePercentBoundary)

as.logger.Debug(scalingMessage)

return &ScalingOperation{
ScalingDirection: UP,
NodeReplicas: &newDesiredNodeReplicas,
Description: scalingMessage,
}
}
}

// independent of the scaling direction: in case there are indices with < MinIndexReplicas, we try to scale these indices.
if len(newDesiredIndexReplicas) > 0 {
return &ScalingOperation{
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/zalando.org/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ type ElasticsearchDataSetScaling struct {
ScaleUpCPUBoundary int32 `json:"scaleUpCPUBoundary"`
// +kubebuilder:validation:Minimum=0
// +optional
ScaleUpDiskUsagePercentBoundary int32 `json:"scaleUpDiskUsagePercentBoundary"`
// +kubebuilder:validation:Minimum=0
// +optional
ScaleUpThresholdDurationSeconds int64 `json:"scaleUpThresholdDurationSeconds"`
// +kubebuilder:validation:Minimum=0
// +optional
Expand All @@ -164,6 +167,9 @@ type ElasticsearchDataSetScaling struct {
ScaleDownCPUBoundary int32 `json:"scaleDownCPUBoundary"`
// +kubebuilder:validation:Minimum=0
// +optional
ScaleDownDiskUsagePercentBoundary int32 `json:"scaleDownDiskUsagePercentBoundary"`
// +kubebuilder:validation:Minimum=0
// +optional
ScaleDownThresholdDurationSeconds int64 `json:"scaleDownThresholdDurationSeconds"`
// +kubebuilder:validation:Minimum=0
// +optional
Expand Down