Skip to content

Commit

Permalink
Fix cache miss hit on resource recreation (#1818)
Browse files Browse the repository at this point in the history
The k8s client is configured to use selectors
based on label to watch only a subset of
certain resources to limit the memory consumption.

IF the user explictly removes that label
from one of the watched object, the client
cache will miss it and so the operator will try
recreating it failing then with AlreadyExists.

Let's explictly detect this corner case and
fix it setting the missing label with
a custom client to bypass the cache.

Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=2032837

Signed-off-by: Simone Tiraboschi <[email protected]>

Co-authored-by: Simone Tiraboschi <[email protected]>
  • Loading branch information
kubevirt-bot and tiraboschi authored Mar 15, 2022
1 parent 4700fae commit 9c826c9
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 1 deletion.
5 changes: 5 additions & 0 deletions hack/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,10 @@ KUBECTL_BINARY=${KUBECTL_BINARY} ./hack/check_defaults.sh
# check golden images
KUBECTL_BINARY=${KUBECTL_BINARY} ./hack/check_golden_images.sh

# check if HCO is able to correctly add back a label used as a label selector
${KUBECTL_BINARY} label priorityclass kubevirt-cluster-critical app-
sleep 10
[[ $(${KUBECTL_BINARY} get priorityclass kubevirt-cluster-critical -o=jsonpath='{.metadata.labels.app}') == 'kubevirt-hyperconverged' ]]

# Check the webhook, to see if it allow deleteing of the HyperConverged CR
./hack/retry.sh 10 30 "${KUBECTL_BINARY} delete hco -n ${INSTALLED_NAMESPACE} kubevirt-hyperconverged"
47 changes: 46 additions & 1 deletion pkg/controller/operands/operand.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"reflect"
"strings"

"sigs.k8s.io/controller-runtime/pkg/client/config"

conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"

jsonpatch "github.com/evanphx/json-patch"
Expand Down Expand Up @@ -93,7 +95,16 @@ func (h *genericOperand) ensure(req *common.HcoRequest) *EnsureResult {
found := h.hooks.getEmptyCr()
err = h.Client.Get(req.Ctx, key, found)
if err != nil {
return h.createNewCr(req, err, cr, res)
result := h.createNewCr(req, err, cr, res)

if apierrors.IsAlreadyExists(result.Err) {
// we failed trying to create it due to a caching error
// or we neither tried because we know that the object is already there for sure,
// but we cannot get it due to a bad cache hit.
// Let's try updating it bypassing the client cache mechanism
return h.handleExistingCrSkipCache(req, key, found, cr, res)
}
return result
}

return h.handleExistingCr(req, key, found, cr, res)
Expand Down Expand Up @@ -126,6 +137,40 @@ func (h *genericOperand) handleExistingCr(req *common.HcoRequest, key client.Obj
return res.SetUpgradeDone(req.ComponentUpgradeInProgress)
}

func (h *genericOperand) handleExistingCrSkipCache(req *common.HcoRequest, key client.ObjectKey, found client.Object, cr client.Object, res *EnsureResult) *EnsureResult {
cfg, configerr := config.GetConfig()
if configerr != nil {
req.Logger.Error(configerr, "failed creating a config for a custom client")
return &EnsureResult{
Err: configerr,
}
}
apiClient, acerr := client.New(cfg, client.Options{
Scheme: h.Scheme,
})
if acerr != nil {
req.Logger.Error(acerr, "failed creating a custom client to bypass the cache")
return &EnsureResult{
Err: acerr,
}
}
geterr := apiClient.Get(req.Ctx, key, found)
if geterr != nil {
req.Logger.Error(geterr, "failed trying to get the object bypassing the cache")
return &EnsureResult{
Err: geterr,
}
}
originalClient := h.Client
// this is not exactly thread safe,
// but we are not supposed to call twice in parallel
// the handler for a single CR
h.Client = apiClient
existingcrresult := h.handleExistingCr(req, key, found, cr, res)
h.Client = originalClient
return existingcrresult
}

func (h *genericOperand) completeEnsureOperands(req *common.HcoRequest, opr hcoOperandHooks, found client.Object, res *EnsureResult) *EnsureResult {
// Handle KubeVirt resource conditions
isReady := handleComponentConditions(req, h.crType, opr.getConditions(found))
Expand Down

0 comments on commit 9c826c9

Please sign in to comment.