From 7307c524d835333410172d31d5fa27041d6f968d Mon Sep 17 00:00:00 2001 From: i065450 Date: Sun, 27 Oct 2024 18:09:59 +0200 Subject: [PATCH 1/7] remove rate limit error from state --- controllers/servicebinding_controller.go | 8 +++---- controllers/serviceinstance_controller.go | 6 ++--- internal/utils/condition_utils.go | 27 +++++++++++++---------- internal/utils/condition_utils_test.go | 4 ++-- internal/utils/controller_util.go | 4 ++-- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/controllers/servicebinding_controller.go b/controllers/servicebinding_controller.go index d2b5f7a3..90acb6cd 100644 --- a/controllers/servicebinding_controller.go +++ b/controllers/servicebinding_controller.go @@ -262,7 +262,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s _, bindingParameters, err := utils.BuildSMRequestParameters(serviceBinding.Namespace, serviceBinding.Spec.ParametersFrom, serviceBinding.Spec.Parameters) if err != nil { log.Error(err, "failed to parse smBinding parameters") - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err.Error(), serviceBinding) + return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err, serviceBinding) } smBinding, operationURL, bindErr := smClient.Bind(&smClientTypes.ServiceBinding{ @@ -284,7 +284,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s if operationURL != "" { var bindingID string if bindingID = sm.ExtractBindingID(operationURL); len(bindingID) == 0 { - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, fmt.Sprintf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding) + return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, fmt.Errorf("failed to extract smBinding ID from operation URL %s", operationURL), serviceBinding) } serviceBinding.Status.BindingID = bindingID @@ -361,7 +361,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, serviceBinding *v operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, utils.BuildUserInfo(ctx, serviceBinding.Spec.UserInfo)) if unbindErr != nil { // delete will proceed anyway - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, unbindErr.Error(), serviceBinding) + return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, unbindErr, serviceBinding) } if operationURL != "" { @@ -847,7 +847,7 @@ func (r *ServiceBindingReconciler) handleSecretError(ctx context.Context, op smC log := utils.GetLogger(ctx) log.Error(err, fmt.Sprintf("failed to store secret %s for binding %s", binding.Spec.SecretName, binding.Name)) if apierrors.ReasonForError(err) == metav1.StatusReasonUnknown { - return utils.MarkAsNonTransientError(ctx, r.Client, op, err.Error(), binding) + return utils.MarkAsNonTransientError(ctx, r.Client, op, err, binding) } return utils.MarkAsTransientError(ctx, r.Client, op, err, binding) } diff --git a/controllers/serviceinstance_controller.go b/controllers/serviceinstance_controller.go index c5cea1b6..8d1532e7 100644 --- a/controllers/serviceinstance_controller.go +++ b/controllers/serviceinstance_controller.go @@ -176,7 +176,7 @@ func (r *ServiceInstanceReconciler) createInstance(ctx context.Context, smClient if err != nil { // if parameters are invalid there is nothing we can do, the user should fix it according to the error message in the condition log.Error(err, "failed to parse instance parameters") - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err.Error(), serviceInstance) + return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.CREATE, err, serviceInstance) } provision, provisionErr := smClient.Provision(&smClientTypes.ServiceInstance{ @@ -231,7 +231,7 @@ func (r *ServiceInstanceReconciler) updateInstance(ctx context.Context, smClient _, instanceParameters, err := utils.BuildSMRequestParameters(serviceInstance.Namespace, serviceInstance.Spec.ParametersFrom, serviceInstance.Spec.Parameters) if err != nil { log.Error(err, "failed to parse instance parameters") - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.UPDATE, fmt.Sprintf("failed to parse parameters: %v", err.Error()), serviceInstance) + return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance) } _, operationURL, err := smClient.UpdateInstance(serviceInstance.Status.InstanceID, &smClientTypes.ServiceInstance{ @@ -296,7 +296,7 @@ func (r *ServiceInstanceReconciler) deleteInstance(ctx context.Context, serviceI operationURL, deprovisionErr := smClient.Deprovision(serviceInstance.Status.InstanceID, nil, utils.BuildUserInfo(ctx, serviceInstance.Spec.UserInfo)) if deprovisionErr != nil { // delete will proceed anyway - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, deprovisionErr.Error(), serviceInstance) + return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, deprovisionErr, serviceInstance) } if operationURL != "" { diff --git a/internal/utils/condition_utils.go b/internal/utils/condition_utils.go index ae43ec31..2e76ff70 100644 --- a/internal/utils/condition_utils.go +++ b/internal/utils/condition_utils.go @@ -178,19 +178,22 @@ func SetFailureConditions(operationType smClientTypes.OperationCategory, errorMe object.SetConditions(conditions) } -func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, errMsg string, object common.SAPBTPResource) (ctrl.Result, error) { +func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) - SetFailureConditions(operationType, errMsg, object) - if operationType != smClientTypes.DELETE { - log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg)) - } - object.SetObservedGeneration(object.GetGeneration()) - err := UpdateStatus(ctx, k8sClient, object) - if err != nil { - return ctrl.Result{}, err - } - if operationType == smClientTypes.DELETE { - return ctrl.Result{}, fmt.Errorf(errMsg) + if smError, ok := err.(*sm.ServiceManagerError); !ok || smError.StatusCode != http.StatusTooManyRequests { + errMsg := err.Error() + SetFailureConditions(operationType, errMsg, object) + if operationType != smClientTypes.DELETE { + log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg)) + } + object.SetObservedGeneration(object.GetGeneration()) + updateErr := UpdateStatus(ctx, k8sClient, object) + if updateErr != nil { + return ctrl.Result{}, updateErr + } + if operationType == smClientTypes.DELETE { + return ctrl.Result{}, fmt.Errorf(errMsg) + } } return ctrl.Result{}, nil } diff --git a/internal/utils/condition_utils_test.go b/internal/utils/condition_utils_test.go index b11c0de6..a044eaf7 100644 --- a/internal/utils/condition_utils_test.go +++ b/internal/utils/condition_utils_test.go @@ -1,6 +1,7 @@ package utils import ( + "fmt" "net/http" "github.com/SAP/sap-btp-service-operator/api/common" @@ -168,9 +169,8 @@ var _ = Describe("Condition Utils", func() { Context("MarkAsNonTransientError", func() { It("should mark as non-transient error and update status", func() { operationType := smClientTypes.CREATE - errorMessage := "Non-transient error" - result, err := MarkAsNonTransientError(ctx, k8sClient, operationType, errorMessage, resource) + result, err := MarkAsNonTransientError(ctx, k8sClient, operationType, fmt.Errorf("Non-transient error"), resource) Expect(err).ToNot(HaveOccurred()) Expect(result).To(Equal(ctrl.Result{})) }) diff --git a/internal/utils/controller_util.go b/internal/utils/controller_util.go index 83e5381d..c18bd9de 100644 --- a/internal/utils/controller_util.go +++ b/internal/utils/controller_util.go @@ -134,13 +134,13 @@ func HandleError(ctx context.Context, k8sClient client.Client, operationType smC ok := errors.As(err, &smError) if !ok { log.Info("unable to cast error to SM error, will be treated as non transient") - return MarkAsNonTransientError(ctx, k8sClient, operationType, err.Error(), resource) + return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource) } if ignoreNonTransient || IsTransientError(smError, log) { return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource) } - return MarkAsNonTransientError(ctx, k8sClient, operationType, smError.Error(), resource) + return MarkAsNonTransientError(ctx, k8sClient, operationType, smError, resource) } func IsTransientError(smError *sm.ServiceManagerError, log logr.Logger) bool { From 091734fa99a8a855f9a038828e12f48e5a802f38 Mon Sep 17 00:00:00 2001 From: i065450 Date: Mon, 28 Oct 2024 08:40:49 +0200 Subject: [PATCH 2/7] remove rate limit error from state --- internal/utils/condition_utils.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/internal/utils/condition_utils.go b/internal/utils/condition_utils.go index 2e76ff70..c032b263 100644 --- a/internal/utils/condition_utils.go +++ b/internal/utils/condition_utils.go @@ -2,6 +2,7 @@ package utils import ( "context" + "errors" "fmt" "net/http" @@ -180,7 +181,8 @@ func SetFailureConditions(operationType smClientTypes.OperationCategory, errorMe func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) - if smError, ok := err.(*sm.ServiceManagerError); !ok || smError.StatusCode != http.StatusTooManyRequests { + var smError *sm.ServiceManagerError + if !errors.As(err, &smError) || smError.StatusCode != http.StatusTooManyRequests { errMsg := err.Error() SetFailureConditions(operationType, errMsg, object) if operationType != smClientTypes.DELETE { @@ -191,9 +193,9 @@ func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, opera if updateErr != nil { return ctrl.Result{}, updateErr } - if operationType == smClientTypes.DELETE { - return ctrl.Result{}, fmt.Errorf(errMsg) - } + } + if operationType == smClientTypes.DELETE { + return ctrl.Result{}, err } return ctrl.Result{}, nil } From 23ff1e7613fe9ab2c66062267aaa290f7d86f707 Mon Sep 17 00:00:00 2001 From: I501080 Date: Mon, 11 Nov 2024 09:27:27 +0200 Subject: [PATCH 3/7] handle delete error --- controllers/servicebinding_controller.go | 5 ++- controllers/serviceinstance_controller.go | 6 ++-- internal/utils/condition_utils.go | 39 +++++++---------------- internal/utils/controller_util.go | 25 +++++++++++---- 4 files changed, 35 insertions(+), 40 deletions(-) diff --git a/controllers/servicebinding_controller.go b/controllers/servicebinding_controller.go index 90acb6cd..fef2f309 100644 --- a/controllers/servicebinding_controller.go +++ b/controllers/servicebinding_controller.go @@ -278,7 +278,7 @@ func (r *ServiceBindingReconciler) createBinding(ctx context.Context, smClient s if bindErr != nil { log.Error(err, "failed to create service binding", "serviceInstanceID", serviceInstance.Status.InstanceID) - return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, bindErr, serviceBinding, true) + return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, bindErr, serviceBinding) } if operationURL != "" { @@ -360,8 +360,7 @@ func (r *ServiceBindingReconciler) delete(ctx context.Context, serviceBinding *v log.Info(fmt.Sprintf("Deleting binding with id %v from SM", serviceBinding.Status.BindingID)) operationURL, unbindErr := smClient.Unbind(serviceBinding.Status.BindingID, nil, utils.BuildUserInfo(ctx, serviceBinding.Spec.UserInfo)) if unbindErr != nil { - // delete will proceed anyway - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, unbindErr, serviceBinding) + return utils.HandleDeleteError(ctx, r.Client, unbindErr, serviceBinding) } if operationURL != "" { diff --git a/controllers/serviceinstance_controller.go b/controllers/serviceinstance_controller.go index 8d1532e7..2cb1d798 100644 --- a/controllers/serviceinstance_controller.go +++ b/controllers/serviceinstance_controller.go @@ -193,7 +193,7 @@ func (r *ServiceInstanceReconciler) createInstance(ctx context.Context, smClient if provisionErr != nil { log.Error(provisionErr, "failed to create service instance", "serviceOfferingName", serviceInstance.Spec.ServiceOfferingName, "servicePlanName", serviceInstance.Spec.ServicePlanName) - return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, provisionErr, serviceInstance, true) + return utils.HandleError(ctx, r.Client, smClientTypes.CREATE, provisionErr, serviceInstance) } serviceInstance.Status.InstanceID = provision.InstanceID @@ -242,7 +242,7 @@ func (r *ServiceInstanceReconciler) updateInstance(ctx context.Context, smClient if err != nil { log.Error(err, fmt.Sprintf("failed to update service instance with ID %s", serviceInstance.Status.InstanceID)) - return utils.HandleError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance, true) + return utils.HandleError(ctx, r.Client, smClientTypes.UPDATE, err, serviceInstance) } if operationURL != "" { @@ -296,7 +296,7 @@ func (r *ServiceInstanceReconciler) deleteInstance(ctx context.Context, serviceI operationURL, deprovisionErr := smClient.Deprovision(serviceInstance.Status.InstanceID, nil, utils.BuildUserInfo(ctx, serviceInstance.Spec.UserInfo)) if deprovisionErr != nil { // delete will proceed anyway - return utils.MarkAsNonTransientError(ctx, r.Client, smClientTypes.DELETE, deprovisionErr, serviceInstance) + return utils.HandleDeleteError(ctx, r.Client, deprovisionErr, serviceInstance) } if operationURL != "" { diff --git a/internal/utils/condition_utils.go b/internal/utils/condition_utils.go index c032b263..13998561 100644 --- a/internal/utils/condition_utils.go +++ b/internal/utils/condition_utils.go @@ -2,12 +2,8 @@ package utils import ( "context" - "errors" "fmt" - "net/http" - "github.com/SAP/sap-btp-service-operator/api/common" - "github.com/SAP/sap-btp-service-operator/client/sm" smClientTypes "github.com/SAP/sap-btp-service-operator/client/sm/types" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -181,37 +177,24 @@ func SetFailureConditions(operationType smClientTypes.OperationCategory, errorMe func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) - var smError *sm.ServiceManagerError - if !errors.As(err, &smError) || smError.StatusCode != http.StatusTooManyRequests { - errMsg := err.Error() - SetFailureConditions(operationType, errMsg, object) - if operationType != smClientTypes.DELETE { - log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg)) - } - object.SetObservedGeneration(object.GetGeneration()) - updateErr := UpdateStatus(ctx, k8sClient, object) - if updateErr != nil { - return ctrl.Result{}, updateErr - } + errMsg := err.Error() + SetFailureConditions(operationType, errMsg, object) + if operationType != smClientTypes.DELETE { + log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg)) } - if operationType == smClientTypes.DELETE { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil + object.SetObservedGeneration(object.GetGeneration()) + return ctrl.Result{}, UpdateStatus(ctx, k8sClient, object) } func MarkAsTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) - //DO NOT REMOVE - 429 error is not reflected to the status - if smError, ok := err.(*sm.ServiceManagerError); !ok || smError.StatusCode != http.StatusTooManyRequests { - SetInProgressConditions(ctx, operationType, err.Error(), object) - log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error())) - if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil { - return ctrl.Result{}, updateErr - } + SetInProgressConditions(ctx, operationType, err.Error(), object) + log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error())) + if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil { + return ctrl.Result{}, updateErr } - return ctrl.Result{}, err + return ctrl.Result{}, UpdateStatus(ctx, k8sClient, object) } // blocked condition marks to the user that action from his side is required, this is considered as in progress operation diff --git a/internal/utils/controller_util.go b/internal/utils/controller_util.go index c18bd9de..2be92c6a 100644 --- a/internal/utils/controller_util.go +++ b/internal/utils/controller_util.go @@ -128,19 +128,32 @@ func GetLogger(ctx context.Context) logr.Logger { return ctx.Value(LogKey{}).(logr.Logger) } -func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource, ignoreNonTransient bool) (ctrl.Result, error) { +func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) var smError *sm.ServiceManagerError - ok := errors.As(err, &smError) - if !ok { + if ok := errors.As(err, &smError); !ok { log.Info("unable to cast error to SM error, will be treated as non transient") return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource) } - if ignoreNonTransient || IsTransientError(smError, log) { - return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource) + + if smError.StatusCode == http.StatusTooManyRequests { + log.Info(fmt.Sprintf("SM returned 429 (%s), requeueing...", smError.Error())) + return ctrl.Result{Requeue: true}, nil } + return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource) +} - return MarkAsNonTransientError(ctx, k8sClient, operationType, smError, resource) +func HandleDeleteError(ctx context.Context, k8sClient client.Client, err error, object common.SAPBTPResource) (ctrl.Result, error) { + log := GetLogger(ctx) + log.Info(fmt.Sprintf("handling delete error: %v", err)) + var smError *sm.ServiceManagerError + if !errors.As(err, &smError) || smError.StatusCode != http.StatusTooManyRequests { + if _, updateErr := MarkAsNonTransientError(ctx, k8sClient, smClientTypes.DELETE, err, object); updateErr != nil { + log.Error(updateErr, "failed to update resource status") + return ctrl.Result{}, updateErr + } + } + return ctrl.Result{}, err } func IsTransientError(smError *sm.ServiceManagerError, log logr.Logger) bool { From 3d1d487dd888224b52ebda8972a20ef6786cb8e3 Mon Sep 17 00:00:00 2001 From: I501080 Date: Mon, 11 Nov 2024 10:37:39 +0200 Subject: [PATCH 4/7] handle delete error --- internal/utils/condition_utils.go | 8 +++----- internal/utils/controller_util.go | 3 ++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/internal/utils/condition_utils.go b/internal/utils/condition_utils.go index 13998561..227dc356 100644 --- a/internal/utils/condition_utils.go +++ b/internal/utils/condition_utils.go @@ -178,23 +178,21 @@ func SetFailureConditions(operationType smClientTypes.OperationCategory, errorMe func MarkAsNonTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) errMsg := err.Error() + log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, setting failure conditions", operationType, object.GetControllerName(), errMsg)) SetFailureConditions(operationType, errMsg, object) - if operationType != smClientTypes.DELETE { - log.Info(fmt.Sprintf("operation %s of %s encountered a non transient error %s, giving up operation :(", operationType, object.GetControllerName(), errMsg)) - } object.SetObservedGeneration(object.GetGeneration()) return ctrl.Result{}, UpdateStatus(ctx, k8sClient, object) } func MarkAsTransientError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) - SetInProgressConditions(ctx, operationType, err.Error(), object) log.Info(fmt.Sprintf("operation %s of %s encountered a transient error %s, retrying operation :)", operationType, object.GetControllerName(), err.Error())) + SetInProgressConditions(ctx, operationType, err.Error(), object) if updateErr := UpdateStatus(ctx, k8sClient, object); updateErr != nil { return ctrl.Result{}, updateErr } - return ctrl.Result{}, UpdateStatus(ctx, k8sClient, object) + return ctrl.Result{}, err } // blocked condition marks to the user that action from his side is required, this is considered as in progress operation diff --git a/internal/utils/controller_util.go b/internal/utils/controller_util.go index 2be92c6a..02ffad23 100644 --- a/internal/utils/controller_util.go +++ b/internal/utils/controller_util.go @@ -132,7 +132,7 @@ func HandleError(ctx context.Context, k8sClient client.Client, operationType smC log := GetLogger(ctx) var smError *sm.ServiceManagerError if ok := errors.As(err, &smError); !ok { - log.Info("unable to cast error to SM error, will be treated as non transient") + log.Info(fmt.Sprintf("unable to cast error to SM error, will be treated as non transient. (error: %v)", err)) return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource) } @@ -140,6 +140,7 @@ func HandleError(ctx context.Context, k8sClient client.Client, operationType smC log.Info(fmt.Sprintf("SM returned 429 (%s), requeueing...", smError.Error())) return ctrl.Result{Requeue: true}, nil } + log.Info(fmt.Sprintf("SM returned error: %s", smError.Error())) return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource) } From 430c7db2e2c1592ff55f2e2f700095fe438580f4 Mon Sep 17 00:00:00 2001 From: I501080 Date: Mon, 11 Nov 2024 11:41:41 +0200 Subject: [PATCH 5/7] . --- internal/utils/condition_utils.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/utils/condition_utils.go b/internal/utils/condition_utils.go index 227dc356..6d82cc40 100644 --- a/internal/utils/condition_utils.go +++ b/internal/utils/condition_utils.go @@ -3,6 +3,7 @@ package utils import ( "context" "fmt" + "github.com/SAP/sap-btp-service-operator/api/common" smClientTypes "github.com/SAP/sap-btp-service-operator/client/sm/types" "k8s.io/apimachinery/pkg/api/meta" From e7f01d120ad088be8e3c50f9354c0fa0cb6423b9 Mon Sep 17 00:00:00 2001 From: I501080 Date: Mon, 11 Nov 2024 12:33:26 +0200 Subject: [PATCH 6/7] . --- client/sm/client.go | 12 ++--- controllers/servicebinding_controller_test.go | 7 ++- internal/utils/controller_util.go | 45 +++++++++++++------ 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/client/sm/client.go b/client/sm/client.go index f813da53..8b595a6b 100644 --- a/client/sm/client.go +++ b/client/sm/client.go @@ -70,10 +70,11 @@ type Client interface { } type ServiceManagerError struct { - ErrorType string `json:"error,omitempty"` - Description string `json:"description,omitempty"` - StatusCode int `json:"-"` - BrokerError *common.HTTPStatusCodeError `json:"broker_error,omitempty"` + ErrorType string `json:"error,omitempty"` + Description string `json:"description,omitempty"` + StatusCode int `json:"-"` + BrokerError *common.HTTPStatusCodeError `json:"broker_error,omitempty"` + ResponseHeaders http.Header `json:"-"` } func (e *ServiceManagerError) Error() string { @@ -590,7 +591,8 @@ func handleResponseError(response *http.Response) error { } smError := &ServiceManagerError{ - StatusCode: response.StatusCode, + StatusCode: response.StatusCode, + ResponseHeaders: response.Header, } _ = json.Unmarshal(body, &smError) diff --git a/controllers/servicebinding_controller_test.go b/controllers/servicebinding_controller_test.go index 0d8d2a3c..1d1033e9 100644 --- a/controllers/servicebinding_controller_test.go +++ b/controllers/servicebinding_controller_test.go @@ -4,11 +4,10 @@ import ( "context" "encoding/json" "errors" - "net/http" - "strings" - "github.com/lithammer/dedent" authv1 "k8s.io/api/authentication/v1" + "net/http" + "strings" "github.com/SAP/sap-btp-service-operator/api/common" "github.com/SAP/sap-btp-service-operator/internal/utils" @@ -400,7 +399,7 @@ var _ = Describe("ServiceBinding controller", func() { }) }) - When("SM returned transient error(429)", func() { + When("SM returned transient error(429) without retry-after header", func() { BeforeEach(func() { errorMessage = "too many requests" fakeClient.BindReturnsOnCall(0, nil, "", &sm.ServiceManagerError{ diff --git a/internal/utils/controller_util.go b/internal/utils/controller_util.go index 02ffad23..f216d6b9 100644 --- a/internal/utils/controller_util.go +++ b/internal/utils/controller_util.go @@ -7,6 +7,7 @@ import ( "fmt" "net/http" "strings" + "time" "github.com/SAP/sap-btp-service-operator/api/common" "github.com/SAP/sap-btp-service-operator/client/sm" @@ -131,28 +132,46 @@ func GetLogger(ctx context.Context) logr.Logger { func HandleError(ctx context.Context, k8sClient client.Client, operationType smClientTypes.OperationCategory, err error, resource common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) var smError *sm.ServiceManagerError - if ok := errors.As(err, &smError); !ok { - log.Info(fmt.Sprintf("unable to cast error to SM error, will be treated as non transient. (error: %v)", err)) - return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource) + if ok := errors.As(err, &smError); ok { + if smError.StatusCode == http.StatusTooManyRequests { + log.Info(fmt.Sprintf("SM returned 429 (%s), requeueing...", smError.Error())) + return handleRateLimitError(smError, log) + } + + log.Info(fmt.Sprintf("SM returned error: %s", smError.Error())) + return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource) } - if smError.StatusCode == http.StatusTooManyRequests { - log.Info(fmt.Sprintf("SM returned 429 (%s), requeueing...", smError.Error())) - return ctrl.Result{Requeue: true}, nil + log.Info(fmt.Sprintf("unable to cast error to SM error, will be treated as non transient. (error: %v)", err)) + return MarkAsNonTransientError(ctx, k8sClient, operationType, err, resource) +} + +func handleRateLimitError(smError *sm.ServiceManagerError, log logr.Logger) (ctrl.Result, error) { + retryAfterStr, ok := smError.ResponseHeaders["Retry-After"] + if ok { + log.Info(fmt.Sprintf("SM returned 429 with Retry-After: %s, requeueing after it...", retryAfterStr[0])) + retryAfter, err := time.Parse(time.RFC1123, retryAfterStr[0]) + if err == nil { + timeToRequeue := time.Until(retryAfter) + log.Info(fmt.Sprintf("requeueing after %.2f minutes", timeToRequeue.Minutes())) + return ctrl.Result{RequeueAfter: timeToRequeue}, nil + } } - log.Info(fmt.Sprintf("SM returned error: %s", smError.Error())) - return MarkAsTransientError(ctx, k8sClient, operationType, smError, resource) + + return ctrl.Result{Requeue: true}, nil } func HandleDeleteError(ctx context.Context, k8sClient client.Client, err error, object common.SAPBTPResource) (ctrl.Result, error) { log := GetLogger(ctx) log.Info(fmt.Sprintf("handling delete error: %v", err)) var smError *sm.ServiceManagerError - if !errors.As(err, &smError) || smError.StatusCode != http.StatusTooManyRequests { - if _, updateErr := MarkAsNonTransientError(ctx, k8sClient, smClientTypes.DELETE, err, object); updateErr != nil { - log.Error(updateErr, "failed to update resource status") - return ctrl.Result{}, updateErr - } + if errors.As(err, &smError) && smError.StatusCode == http.StatusTooManyRequests { + return handleRateLimitError(smError, log) + } + + if _, updateErr := MarkAsNonTransientError(ctx, k8sClient, smClientTypes.DELETE, err, object); updateErr != nil { + log.Error(updateErr, "failed to update resource status") + return ctrl.Result{}, updateErr } return ctrl.Result{}, err } From a942ed548489f8d4c13a832f19c0e2dd6121cc4f Mon Sep 17 00:00:00 2001 From: I501080 Date: Mon, 11 Nov 2024 16:30:10 +0200 Subject: [PATCH 7/7] . --- internal/utils/controller_util.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/internal/utils/controller_util.go b/internal/utils/controller_util.go index f216d6b9..d368ae0e 100644 --- a/internal/utils/controller_util.go +++ b/internal/utils/controller_util.go @@ -147,13 +147,15 @@ func HandleError(ctx context.Context, k8sClient client.Client, operationType smC } func handleRateLimitError(smError *sm.ServiceManagerError, log logr.Logger) (ctrl.Result, error) { - retryAfterStr, ok := smError.ResponseHeaders["Retry-After"] - if ok { - log.Info(fmt.Sprintf("SM returned 429 with Retry-After: %s, requeueing after it...", retryAfterStr[0])) - retryAfter, err := time.Parse(time.RFC1123, retryAfterStr[0]) - if err == nil { + retryAfterStr := smError.ResponseHeaders.Get("Retry-After") + if len(retryAfterStr) > 0 { + log.Info(fmt.Sprintf("SM returned 429 with Retry-After: %s, requeueing after it...", retryAfterStr)) + retryAfter, err := time.Parse(time.DateTime, retryAfterStr[:len(time.DateTime)]) // format 2024-11-11 14:59:33 +0000 UTC + if err != nil { + log.Error(err, "failed to parse Retry-After header, using default requeue time") + } else { timeToRequeue := time.Until(retryAfter) - log.Info(fmt.Sprintf("requeueing after %.2f minutes", timeToRequeue.Minutes())) + log.Info(fmt.Sprintf("requeueing after %d minutes, %d seconds", int(timeToRequeue.Minutes()), int(timeToRequeue.Seconds())%60)) return ctrl.Result{RequeueAfter: timeToRequeue}, nil } }